diff options
-rw-r--r-- | ChangeLog | 28 | ||||
-rw-r--r-- | INSTALL | 8 | ||||
-rw-r--r-- | Makefile.in | 28 | ||||
-rw-r--r-- | NEWS | 56 | ||||
-rw-r--r-- | README | 8 | ||||
-rw-r--r-- | archive_reader.cc | 101 | ||||
-rw-r--r-- | archive_reader.h | 17 | ||||
-rw-r--r-- | common_decode.cc | 67 | ||||
-rw-r--r-- | compress.cc | 54 | ||||
-rwxr-xr-x | configure | 2 | ||||
-rw-r--r-- | create.cc | 277 | ||||
-rw-r--r-- | create.h | 1 | ||||
-rw-r--r-- | create_lz.cc | 21 | ||||
-rw-r--r-- | decode.cc | 226 | ||||
-rw-r--r-- | decode.h | 32 | ||||
-rw-r--r-- | decode_lz.cc | 416 | ||||
-rw-r--r-- | delete.cc | 72 | ||||
-rw-r--r-- | delete_lz.cc | 36 | ||||
-rw-r--r-- | doc/tarlz.1 | 81 | ||||
-rw-r--r-- | doc/tarlz.info | 567 | ||||
-rw-r--r-- | doc/tarlz.texi | 621 | ||||
-rw-r--r-- | exclude.cc | 7 | ||||
-rw-r--r-- | extended.cc | 179 | ||||
-rw-r--r-- | lzip_index.cc | 4 | ||||
-rw-r--r-- | lzip_index.h | 2 | ||||
-rw-r--r-- | main.cc | 255 | ||||
-rw-r--r-- | tarlz.h | 136 | ||||
-rwxr-xr-x | testsuite/check.sh | 349 | ||||
-rw-r--r-- | testsuite/eoa_blocks.tar (renamed from testsuite/eof.tar) | bin | 1024 -> 1024 bytes | |||
-rw-r--r-- | testsuite/eoa_blocks.tar.lz (renamed from testsuite/eof.tar.lz) | bin | 44 -> 44 bytes | |||
-rw-r--r-- | testsuite/test3_eoa1.tar (renamed from testsuite/test3_eof1.tar) | bin | 3072 -> 3072 bytes | |||
-rw-r--r-- | testsuite/test3_eoa1.tar.lz (renamed from testsuite/test3_eof1.tar.lz) | bin | 312 -> 312 bytes | |||
-rw-r--r-- | testsuite/test3_eoa2.tar (renamed from testsuite/test3_eof2.tar) | bin | 3584 -> 3584 bytes | |||
-rw-r--r-- | testsuite/test3_eoa2.tar.lz (renamed from testsuite/test3_eof2.tar.lz) | bin | 352 -> 352 bytes | |||
-rw-r--r-- | testsuite/test3_eoa3.tar (renamed from testsuite/test3_eof3.tar) | bin | 4608 -> 4608 bytes | |||
-rw-r--r-- | testsuite/test3_eoa3.tar.lz (renamed from testsuite/test3_eof3.tar.lz) | bin | 396 -> 396 bytes | |||
-rw-r--r-- | testsuite/test3_eoa4.tar (renamed from testsuite/test3_eof4.tar) | bin | 4096 -> 4096 bytes | |||
-rw-r--r-- | testsuite/test3_eoa4.tar.lz (renamed from testsuite/test3_eof4.tar.lz) | bin | 535 -> 535 bytes | |||
-rw-r--r-- | testsuite/test3_eoa5.tar.lz (renamed from testsuite/test3_eof5.tar.lz) | bin | 535 -> 535 bytes |
39 files changed, 2210 insertions, 1441 deletions
@@ -1,3 +1,29 @@ +2022-09-23 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.23 released. + * Create and decode the extended records 'atime' and 'mtime'. + * Create and decode the extended records 'uid' and 'gid'. + * New option '--ignore-overflow'. + * Refuse to read/write archive data from/to a terminal. + (Reported by DustDFG). + * main.cc (parse_mtime): Make time of day 'HH:MM:SS' optional. + Accept both space and 'T' as separator between date and time. + (show_option_error): New function showing argument and option name. + * decode.cc (extract_member): Diagnose intermediate directory failure. + Failure to extract a member is no longer fatal. + * decode_lz.cc: Make diagnostics identical to serial decoder. + * common_decode.cc (format_member_name): Improve column alignment. + * create.cc (fill_headers): Improve diagnostic when stat reports a + wrong st_size for a symbolic link. (Reported by Jason Lenz). + Change diagnostic "File is the archive" to "Archive can't contain + itself" following a similar change made by Paul Eggert to GNU tar. + * Don't show "Removing leading '/' from member names." if excluded. + * tarlz.texi: Change GNU Texinfo category from 'Data Compression' + to 'Archiving' to match that of GNU tar. + Use 'end-of-archive' (EOA) instead of 'end-of-file' (EOF). + * main.cc (show_help), tarlz.texi: List operations before options. + * Many small improvements have been made to code and documentation. + 2022-01-05 Antonio Diaz Diaz <antonio@gnu.org> * Version 0.22 released. @@ -136,7 +162,7 @@ * Version 0.6 released. * New option '-A, --concatenate'. * Option '--ignore-crc' replaced with '--missing-crc'. - * create.cc (add_member): Test that uid, gid, mtime, devmajor + * create.cc (add_member): Verify that uid, gid, mtime, devmajor, and devminor are in ustar range. * configure: Accept appending to CXXFLAGS; 'CXXFLAGS+=OPTIONS'. * Makefile.in: Use tarlz in target 'dist'. @@ -1,7 +1,7 @@ Requirements ------------ -You will need a C++11 compiler and the compression library lzlib installed. -(gcc 3.3.6 or newer is recommended). +You will need a C++98 compiler with support for 'long long', and the +compression library lzlib installed. (gcc 3.3.6 or newer is recommended). I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards compliant compiler. @@ -10,6 +10,10 @@ Lzlib must be version 1.12 or newer. Gcc is available at http://gcc.gnu.org. Lzlib is available at http://www.nongnu.org/lzip/lzlib.html. +The operating system must allow signal handlers read access to objects with +static storage duration so that the cleanup handler for Control-C can delete +the partial output file in '-z, --compress' mode. + Procedure --------- diff --git a/Makefile.in b/Makefile.in index 7506a17..0fa5761 100644 --- a/Makefile.in +++ b/Makefile.in @@ -32,19 +32,19 @@ main.o : main.cc $(objs) : Makefile arg_parser.o : arg_parser.h archive_reader.o : tarlz.h lzip_index.h archive_reader.h -common.o : arg_parser.h tarlz.h -common_decode.o : arg_parser.h tarlz.h -compress.o : arg_parser.h tarlz.h -create.o : arg_parser.h tarlz.h create.h -create_lz.o : arg_parser.h tarlz.h create.h -decode.o : arg_parser.h tarlz.h lzip_index.h archive_reader.h -decode_lz.o : arg_parser.h tarlz.h lzip_index.h archive_reader.h -delete.o : arg_parser.h tarlz.h lzip_index.h archive_reader.h -delete_lz.o : arg_parser.h tarlz.h lzip_index.h archive_reader.h +common.o : tarlz.h arg_parser.h +common_decode.o : tarlz.h arg_parser.h +compress.o : tarlz.h arg_parser.h +create.o : tarlz.h arg_parser.h create.h +create_lz.o : tarlz.h arg_parser.h create.h +decode.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h decode.h +decode_lz.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h decode.h +delete.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h +delete_lz.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h exclude.o : tarlz.h extended.o : tarlz.h lzip_index.o : tarlz.h lzip_index.h -main.o : arg_parser.h tarlz.h +main.o : tarlz.h arg_parser.h doc : info man @@ -137,18 +137,18 @@ dist : doc $(DISTNAME)/testsuite/rbaz \ $(DISTNAME)/testsuite/test3.tar \ $(DISTNAME)/testsuite/test3_nn.tar \ - $(DISTNAME)/testsuite/test3_eof[1-4].tar \ + $(DISTNAME)/testsuite/test3_eoa[1-4].tar \ $(DISTNAME)/testsuite/test3_gh[1-4].tar \ $(DISTNAME)/testsuite/test3_bad[1-5].tar \ $(DISTNAME)/testsuite/test3_dir.tar \ $(DISTNAME)/testsuite/t155.tar \ $(DISTNAME)/testsuite/t155_fv[1-3].tar \ - $(DISTNAME)/testsuite/eof.tar \ + $(DISTNAME)/testsuite/eoa_blocks.tar \ $(DISTNAME)/testsuite/test.txt.lz \ $(DISTNAME)/testsuite/test.txt.tar.lz \ $(DISTNAME)/testsuite/test_bad[12].txt.tar.lz \ $(DISTNAME)/testsuite/test3.tar.lz \ - $(DISTNAME)/testsuite/test3_eof[1-5].tar.lz \ + $(DISTNAME)/testsuite/test3_eoa[1-5].tar.lz \ $(DISTNAME)/testsuite/test3_em[1-6].tar.lz \ $(DISTNAME)/testsuite/test3_gh[1-6].tar.lz \ $(DISTNAME)/testsuite/test3_nn.tar.lz \ @@ -164,7 +164,7 @@ dist : doc $(DISTNAME)/testsuite/dotdot[1-5].tar.lz \ $(DISTNAME)/testsuite/ug32767.tar.lz \ $(DISTNAME)/testsuite/ug32chars.tar.lz \ - $(DISTNAME)/testsuite/eof.tar.lz + $(DISTNAME)/testsuite/eoa_blocks.tar.lz rm -f $(DISTNAME) clean : @@ -1,11 +1,53 @@ -Changes in version 0.22: +Changes in version 0.23: -In case of error in a numerical argument to a command line option, tarlz -now shows the name of the option and the range of valid values. +Tarlz now can create and decode the extended records 'atime' and 'mtime', +allowing times beyond the ustar range (before 1970-01-01 00:00:00 UTC or +after 2242-03-16 12:56:31 UTC). -'--check-lib' now checks that LZ_API_VERSION and LZ_version_string match. +Tarlz now can create and decode the extended records 'uid' and 'gid', +allowing user and group IDs beyond the ustar limit of 2_097_151. -Tarlz now reports an error and exits with status 1 if '-o, --output' is used -with any operation other than '-z, --compress'. +The new option '--ignore-overflow', which makes '-d, --diff' ignore +differences in mtime caused by overflow on 32-bit systems, has been added. -The variable LIBS can now be set from configure. +Tarlz now refuses to read archive data from a terminal or write archive data +to a terminal. (Reported by DustDFG). + +In the date format of option '--mtime' the time of day 'HH:MM:SS' is now +optional and defaults to '00:00:00'. Both space and 'T' are now accepted as +separator between date and time. + +Diagnostics caused by invalid arguments to command line options now show the +argument and the name of the option. + +Tarlz now diagnoses separately the failure to create an intermediate +directory during extraction. + +Failure to extract a member due to environmental problems is no longer fatal +in serial extraction. (It was already non-fatal in parallel extraction). + +The diagnostics emitted by the parallel decoder should now be identical to +the corresponding diagnostics of the serial decoder. + +Column alignment has been improved in listings by printing "user/group size" +in a field of minimum width 19 with at least 8 characters for size. + +The diagnostic shown when the filesystem reports a wrong st_size for a +symbolic link has been improved. (Reported by Jason Lenz). + +The diagnostic "File is the archive" has been changed to "Archive can't +contain itself" following a similar change made by Paul Eggert to GNU tar. + +The warning "Removing leading '/' from member names." is now not shown when +compressing nor if the member causing it is excluded. + +The texinfo category of the manual has been changed from 'Data Compression' +to 'Archiving' to match that of GNU tar. + +'end-of-archive' (EOA) is now used consistently to refer to the blocks of +binary zeros used to mark the end of the archive. + +Operations are now listed before options in the --help output and in the +manual. + +Many small improvements have been made to the code and documentation. @@ -30,9 +30,9 @@ archive, but it has the following advantages: * The resulting multimember tar.lz archive can be decompressed in parallel, multiplying the decompression speed. - * New members can be appended to the archive (by removing the EOF - member), and unwanted members can be deleted from the archive. Just - like an uncompressed tar archive. + * New members can be appended to the archive (by removing the + end-of-archive member), and unwanted members can be deleted from the + archive. Just like an uncompressed tar archive. * It is a safe POSIX-style backup format. In case of corruption, tarlz can extract all the undamaged members from the tar.lz archive, @@ -78,7 +78,7 @@ compression is used: tar +========+======+=================+===============+========+======+========+ -| header | data | extended header | extended data | header | data | EOF | +| header | data | extended header | extended data | header | data | EOA | +========+======+=================+===============+========+======+========+ tar.lz diff --git a/archive_reader.cc b/archive_reader.cc index a146156..8ad315d 100644 --- a/archive_reader.cc +++ b/archive_reader.cc @@ -30,6 +30,8 @@ namespace { +const char * const rdaerr_msg = "Error reading archive"; + /* Return the number of bytes really read. If (value returned < size) and (errno == 0), means EOF was reached. */ @@ -49,51 +51,61 @@ int preadblock( const int fd, uint8_t * const buf, const int size, return sz; } +int non_tty_infd( const std::string & archive_name, const char * const namep ) + { + int infd = archive_name.empty() ? STDIN_FILENO : open_instream( archive_name ); + if( infd >= 0 && isatty( infd ) ) // for example /dev/tty + { show_file_error( namep, archive_name.empty() ? + "I won't read archive data from a terminal (missing -f option?)" : + "I won't read archive data from a terminal." ); + close( infd ); infd = -1; } + return infd; + } + -/* Return the number of bytes really written. - If (value returned < size), it is always an error. -*//* -int pwriteblock( const int fd, const uint8_t * const buf, const int size, - const long long pos ) +void xLZ_decompress_write( LZ_Decoder * const decoder, + const uint8_t * const buffer, const int size ) { - int sz = 0; - errno = 0; - while( sz < size ) - { - const int n = pwrite( fd, buf + sz, size - sz, pos + sz ); - if( n > 0 ) sz += n; - else if( n < 0 && errno != EINTR ) break; - errno = 0; - } - return sz; + if( LZ_decompress_write( decoder, buffer, size ) != size ) + internal_error( "library error (LZ_decompress_write)." ); } -*/ } // end namespace +Archive_descriptor::Archive_descriptor( const std::string & archive_name ) + : name( archive_name ), namep( name.empty() ? "(stdin)" : name.c_str() ), + infd( non_tty_infd( archive_name, namep ) ), + lzip_index( infd, true, false ), + seekable( lseek( infd, 0, SEEK_SET ) == 0 ), + indexed( seekable && lzip_index.retval() == 0 ) {} + + int Archive_reader_base::parse_records( Extended & extended, const Tar_header header, Resizable_buffer & rbuf, + const char * const default_msg, const bool permissive ) { const long long edsize = parse_octal( header + size_o, size_l ); const long long bufsize = round_up( edsize ); - if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX ) - return 1; // overflow or no extended data - if( !rbuf.resize( bufsize ) ) return 1; // extended records buffer - int retval = read( rbuf.u8(), bufsize ); + if( edsize <= 0 ) return err( 2, misrec_msg ); // no extended records + if( edsize >= 1LL << 33 || bufsize >= INT_MAX ) + return err( -2, longrec_msg ); // records too long + if( !rbuf.resize( bufsize ) ) return err( -1, mem_msg ); + e_msg_ = ""; e_code_ = 0; + int retval = read( rbuf.u8(), bufsize ); // extended records buffer if( retval == 0 && !extended.parse( rbuf(), edsize, permissive ) ) retval = 2; + if( retval && !*e_msg_ ) e_msg_ = default_msg; return retval; } /* Read 'size' uncompressed bytes, decompressing the input if needed. - Return value: 0 = OK, 1 = damaged member, 2 = fatal error. */ + Return value: 0 = OK, 1 = OOM or read error, 2 = EOF or invalid data. */ int Archive_reader::read( uint8_t * const buf, const int size ) { - if( fatal_ ) return err( 2 ); if( first_read ) // check format { first_read = false; @@ -102,32 +114,30 @@ int Archive_reader::read( uint8_t * const buf, const int size ) if( size != header_size ) internal_error( "size != header_size on first call." ); const int rd = readblock( ad.infd, buf, size ); - if( rd != size && errno ) - return err( 2, "Error reading archive", errno, rd ); + if( rd != size && errno ) return err( -1, rdaerr_msg, errno, rd ); const Lzip_header & header = (*(const Lzip_header *)buf); const bool islz = ( rd >= min_member_size && header.verify_magic() && header.verify_version() && isvalid_ds( header.dictionary_size() ) ); const bool istar = ( rd == size && verify_ustar_chksum( buf ) ); - const bool iseof = + const bool iseoa = ( !islz && !istar && rd == size && block_is_zero( buf, size ) ); bool maybe_lz = islz; // maybe corrupt tar.lz - if( !islz && !istar && !iseof ) // corrupt or invalid format + if( !islz && !istar && !iseoa ) // corrupt or invalid format { const bool lz_ext = has_lz_ext( ad.name ); show_file_error( ad.namep, lz_ext ? posix_lz_msg : posix_msg ); if( lz_ext && rd >= min_member_size ) maybe_lz = true; - else return err( 1 ); + else return err( 2 ); } if( !maybe_lz ) // uncompressed { if( rd == size ) return 0; - return err( 2, "EOF reading archive", 0, rd ); } + return err( -2, "EOF reading archive.", 0, rd ); } uncompressed_seekable = false; // compressed decoder = LZ_decompress_open(); if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) - return err( 2, mem_msg ); - if( LZ_decompress_write( decoder, buf, rd ) != rd ) - internal_error( "library error (LZ_decompress_write)." ); + { LZ_decompress_close( decoder ); decoder = 0; return err( -1, mem_msg ); } + xLZ_decompress_write( decoder, buf, rd ); const int ret = read( buf, size ); if( ret != 0 ) return ret; if( verify_ustar_chksum( buf ) || block_is_zero( buf, size ) ) return 0; return err( 2, islz ? posix_lz_msg : "" ); @@ -136,7 +146,7 @@ int Archive_reader::read( uint8_t * const buf, const int size ) if( !decoder ) // uncompressed { const int rd = readblock( ad.infd, buf, size ); - if( rd == size ) return 0; else return err( 2, end_msg, 0, rd ); + if( rd == size ) return 0; else return err( -2, end_msg, 0, rd ); } const int ibuf_size = 16384; uint8_t ibuf[ibuf_size]; @@ -153,23 +163,22 @@ int Archive_reader::read( uint8_t * const buf, const int size ) const unsigned long long new_pos = LZ_decompress_total_in_size( decoder ); // lzlib < 1.8 does not update total_in_size when syncing to member if( new_pos >= old_pos && new_pos < LLONG_MAX ) - return err( 1, "", 0, sz, true ); - return err( 2, "Skipping to next header failed. " + return err( 2, "", 0, sz, true ); + return err( -1, "Skipping to next header failed. " "Lzlib 1.8 or newer required.", 0, sz ); } if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) - { return err( 2, end_msg, 0, sz ); } + { return err( -2, end_msg, 0, sz ); } sz += rd; if( sz < size && !at_eof && LZ_decompress_write_size( decoder ) > 0 ) { const int rsize = std::min( ibuf_size, LZ_decompress_write_size( decoder ) ); const int rd = readblock( ad.infd, ibuf, rsize ); - if( LZ_decompress_write( decoder, ibuf, rd ) != rd ) - internal_error( "library error (LZ_decompress_write)." ); + xLZ_decompress_write( decoder, ibuf, rd ); if( rd < rsize ) { at_eof = true; LZ_decompress_finish( decoder ); - if( errno ) return err( 2, "Error reading archive.", errno, sz ); + if( errno ) return err( -1, rdaerr_msg, errno, sz ); } } } @@ -179,9 +188,9 @@ int Archive_reader::read( uint8_t * const buf, const int size ) int Archive_reader::skip_member( const Extended & extended ) { + if( extended.file_size() <= 0 ) return 0; long long rest = round_up( extended.file_size() ); // size + padding - if( uncompressed_seekable && lseek( ad.infd, rest, SEEK_CUR ) > 0 ) - return 0; + if( uncompressed_seekable && lseek( ad.infd, rest, SEEK_CUR ) > 0 ) return 0; const int bufsize = 32 * header_size; uint8_t buf[bufsize]; while( rest > 0 ) // skip tar member @@ -206,7 +215,7 @@ void Archive_reader_i::set_member( const long i ) /* Read 'size' decompressed bytes from the archive. - Return value: 0 = OK, 1 = damaged member, 2 = fatal error. */ + Return value: 0 = OK, 1 = OOM or read error, 2 = EOF or invalid data. */ int Archive_reader_i::read( uint8_t * const buf, const int size ) { int sz = 0; @@ -215,9 +224,9 @@ int Archive_reader_i::read( uint8_t * const buf, const int size ) { const int rd = LZ_decompress_read( decoder, buf + sz, size - sz ); if( rd < 0 ) - return err( 1, LZ_strerror( LZ_decompress_errno( decoder ) ), 0, sz ); + return err( 2, LZ_strerror( LZ_decompress_errno( decoder ) ), 0, sz ); if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) - return err( 2, end_msg, 0, sz ); + return err( -2, end_msg, 0, sz ); sz += rd; data_pos_ += rd; if( sz < size && LZ_decompress_write_size( decoder ) > 0 ) { @@ -232,13 +241,12 @@ int Archive_reader_i::read( uint8_t * const buf, const int size ) else { const int rd = preadblock( ad.infd, ibuf, rsize, archive_pos ); - if( LZ_decompress_write( decoder, ibuf, rd ) != rd ) - internal_error( "library error (LZ_decompress_write)." ); + xLZ_decompress_write( decoder, ibuf, rd ); archive_pos += rd; if( rd < rsize ) { LZ_decompress_finish( decoder ); - if( errno ) return err( 2, "Error reading archive", 0, sz ); + if( errno ) return err( -1, rdaerr_msg, errno, sz ); } } } @@ -249,6 +257,7 @@ int Archive_reader_i::read( uint8_t * const buf, const int size ) int Archive_reader_i::skip_member( const Extended & extended ) { + if( extended.file_size() <= 0 ) return 0; long long rest = round_up( extended.file_size() ); // size + padding if( data_pos_ + rest == mdata_end_ ) { data_pos_ = mdata_end_; return 0; } const int bufsize = 32 * header_size; diff --git a/archive_reader.h b/archive_reader.h index 1b16f1c..47fa844 100644 --- a/archive_reader.h +++ b/archive_reader.h @@ -24,12 +24,7 @@ struct Archive_descriptor const bool seekable; const bool indexed; // archive is a compressed regular file - Archive_descriptor( const std::string & archive_name ) - : name( archive_name ), namep( name.empty() ? "(stdin)" : name.c_str() ), - infd( name.empty() ? STDIN_FILENO : open_instream( name ) ), - lzip_index( infd, true, false ), - seekable( lseek( infd, 0, SEEK_SET ) == 0 ), - indexed( seekable && lzip_index.retval() == 0 ) {} + Archive_descriptor( const std::string & archive_name ); }; @@ -48,7 +43,8 @@ protected: int err( const int retval, const char * const msg = "", const int code = 0, const int size = 0, const bool skip = false ) { e_msg_ = msg; e_code_ = code; e_size_ = size; e_skip_ = skip; - if( retval == 2 ) { fatal_ = true; } return retval; } + if( retval >= 0 ) return retval; + fatal_ = true; if( !*e_msg_ ) e_msg_ = "Fatal error"; return -retval; } Archive_reader_base( const Archive_descriptor & d ) : ad( d ), decoder( 0 ), e_msg_( "" ), e_code_( 0 ), e_size_( 0 ), @@ -65,12 +61,13 @@ public: bool fatal() const { return fatal_; } /* Read 'size' uncompressed bytes, decompressing the input if needed. - Return value: 0 = OK, 1 = damaged member, 2 = fatal error. + Return value: 0 = OK, 1 = OOM or read error, 2 = EOF or invalid data. If !OK, fills all the e_* variables. */ virtual int read( uint8_t * const buf, const int size ) = 0; int parse_records( Extended & extended, const Tar_header header, - Resizable_buffer & rbuf, const bool permissive ); + Resizable_buffer & rbuf, const char * const default_msg, + const bool permissive ); }; @@ -96,7 +93,7 @@ public: */ class Archive_reader_i : public Archive_reader_base // indexed reader { - long long data_pos_; // current decompressed position + long long data_pos_; // current decompressed position in archive long long mdata_end_; // current member decompressed end long long archive_pos; // current position in archive for pread long member_id; // current member unless reading beyond diff --git a/common_decode.cc b/common_decode.cc index a030428..835687f 100644 --- a/common_decode.cc +++ b/common_decode.cc @@ -68,7 +68,8 @@ void format_mode_string( const Tar_header header, char buf[mode_string_size] ) } -int format_user_group_string( const Tar_header header, +int format_user_group_string( const Extended & extended, + const Tar_header header, char buf[group_string_size] ) { int len; @@ -76,11 +77,8 @@ int format_user_group_string( const Tar_header header, len = snprintf( buf, group_string_size, " %.32s/%.32s", header + uname_o, header + gname_o ); else - { - const unsigned uid = parse_octal( header + uid_o, uid_l ); - const unsigned gid = parse_octal( header + gid_o, gid_l ); - len = snprintf( buf, group_string_size, " %u/%u", uid, gid ); - } + len = snprintf( buf, group_string_size, " %llu/%llu", + extended.get_uid(), extended.get_gid() ); return len; } @@ -122,32 +120,41 @@ bool format_member_name( const Extended & extended, const Tar_header header, { format_mode_string( header, rbuf() ); const int group_string_len = - format_user_group_string( header, rbuf() + mode_string_size ); + format_user_group_string( extended, header, rbuf() + mode_string_size ); int offset = mode_string_size + group_string_len; - const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits - struct tm tms; - const struct tm * tm = localtime_r( &mtime, &tms ); - if( !tm ) - { time_t z = 0; tm = localtime_r( &z, &tms ); if( !tm ) tm = &tms; } + const time_t mtime = extended.mtime().sec(); + struct tm t; + if( !localtime_r( &mtime, &t ) ) // if local time fails + { time_t z = 0; if( !gmtime_r( &z, &t ) ) // use the UTC epoch + { t.tm_year = 70; t.tm_mon = t.tm_hour = t.tm_min = 0; t.tm_mday = 1; } } const Typeflag typeflag = (Typeflag)header[typeflag_o]; const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); const char * const link_string = !islink ? "" : ( ( typeflag == tf_link ) ? " link to " : " -> " ); + // print "user/group size" in a field of width 19 with 8 or more for size if( typeflag == tf_chardev || typeflag == tf_blockdev ) - offset += snprintf( rbuf() + offset, rbuf.size() - offset, " %5u,%u", - (unsigned)parse_octal( header + devmajor_o, devmajor_l ), - (unsigned)parse_octal( header + devminor_o, devminor_l ) ); + { + const unsigned devmajor = parse_octal( header + devmajor_o, devmajor_l ); + const unsigned devminor = parse_octal( header + devminor_o, devminor_l ); + const int width = std::max( 1, + std::max( 8, 19 - group_string_len ) - 1 - decimal_digits( devminor ) ); + offset += snprintf( rbuf() + offset, rbuf.size() - offset, " %*u,%u", + width, devmajor, devminor ); + } else - offset += snprintf( rbuf() + offset, rbuf.size() - offset, " %9llu", - extended.file_size() ); + { + const int width = std::max( 8, 19 - group_string_len ); + offset += snprintf( rbuf() + offset, rbuf.size() - offset, " %*llu", + width, extended.file_size() ); + } for( int i = 0; i < 2; ++i ) // resize rbuf if not large enough { const int len = snprintf( rbuf() + offset, rbuf.size() - offset, " %4d-%02u-%02u %02u:%02u %s%s%s\n", - 1900 + tm->tm_year, 1 + tm->tm_mon, tm->tm_mday, - tm->tm_hour, tm->tm_min, extended.path().c_str(), - link_string, islink ? extended.linkpath().c_str() : "" ); - if( (int)rbuf.size() > len + offset ) break; + 1900 + t.tm_year, 1 + t.tm_mon, t.tm_mday, t.tm_hour, + t.tm_min, extended.path().c_str(), link_string, + islink ? extended.linkpath().c_str() : "" ); + if( len + offset < (int)rbuf.size() ) break; if( !rbuf.resize( len + offset + 1 ) ) return false; } } @@ -180,16 +187,18 @@ bool check_skip_filename( const Cl_options & cl_opts, const char * const filename ) { if( Exclude::excluded( filename ) ) return true; // skip excluded files - bool skip = cl_opts.num_files > 0; - if( skip ) + bool skip = cl_opts.num_files > 0; // if no files specified, skip nothing + if( skip ) // else skip all but the files (or trees) specified for( int i = 0; i < cl_opts.parser.arguments(); ++i ) if( nonempty_arg( cl_opts.parser, i ) ) { - const char * const name = - remove_leading_dotslash( cl_opts.parser.argument( i ).c_str() ); + std::string removed_prefix; + const char * const name = remove_leading_dotslash( + cl_opts.parser.argument( i ).c_str(), &removed_prefix ); if( compare_prefix_dir( name, filename ) || compare_tslash( name, filename ) ) - { skip = false; name_pending[i] = false; break; } + { print_removed_prefix( removed_prefix ); + skip = false; name_pending[i] = false; break; } } return skip; } @@ -224,10 +233,10 @@ bool make_path( const std::string & name ) { const std::string partial( name, 0, index ); struct stat st; - if( stat( partial.c_str(), &st ) == 0 ) - { if( !S_ISDIR( st.st_mode ) ) return false; } + if( lstat( partial.c_str(), &st ) == 0 ) + { if( !S_ISDIR( st.st_mode ) ) { errno = ENOTDIR; return false; } } else if( mkdir( partial.c_str(), mode ) != 0 && errno != EEXIST ) - return false; + return false; // if EEXIST, another thread or process created the dir } } return true; diff --git a/compress.cc b/compress.cc index ad3f151..4e74efa 100644 --- a/compress.cc +++ b/compress.cc @@ -78,9 +78,18 @@ const char * ne_output_filename() // non-empty output file name } +bool check_tty_in( const char * const input_filename, const int infd ) + { + if( isatty( infd ) ) // for example /dev/tty + { show_file_error( input_filename, + "I won't read archive data from a terminal." ); + close( infd ); return false; } + return true; + } + bool check_tty_out() { - if( isatty( outfd ) ) + if( isatty( outfd ) ) // for example /dev/tty { show_file_error( ne_output_filename(), "I won't write compressed data to a terminal." ); return false; } @@ -145,8 +154,7 @@ bool archive_write( const uint8_t * const buf, const long long size, if( rd < 0 ) internal_error( "library error (LZ_compress_read)." ); if( rd == 0 && sz >= size ) break; if( writeblock( outfd, obuf, rd ) != rd ) - { show_file_error( ne_output_filename(), "Write error", errno ); - return false; } + { show_file_error( ne_output_filename(), werr_msg, errno ); return false; } } if( LZ_compress_finished( encoder ) == 1 && LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 ) @@ -160,9 +168,9 @@ bool tail_compress( const Cl_options & cl_opts, LZ_Encoder * const encoder ) { if( cl_opts.solidity != solid && !archive_write( 0, 0, encoder ) ) - return false; // flush encoder before EOF blocks + return false; // flush encoder before compressing EOA blocks int size = header_size; - bool zero = true; // true until non-zero data found after EOF blocks + bool zero = true; // true until non-zero data found after EOA blocks while( true ) { if( size > 0 && !archive_write( header, size, encoder ) ) @@ -171,7 +179,7 @@ bool tail_compress( const Cl_options & cl_opts, size = readblock( infd, header, header_size ); if( errno ) return false; if( zero && !block_is_zero( header, size ) ) - { zero = false; // flush encoder after EOF blocks + { zero = false; // flush encoder after compressing EOA blocks if( cl_opts.solidity != solid && !archive_write( 0, 0, encoder ) ) return false; } } @@ -188,7 +196,7 @@ int compress_archive( const Cl_options & cl_opts, const bool from_stdin = input_filename == "-"; const char * const filename = from_stdin ? "(stdin)" : input_filename.c_str(); const int infd = from_stdin ? STDIN_FILENO : open_instream( filename ); - if( infd < 0 ) return 1; + if( infd < 0 || !check_tty_in( filename, infd ) ) return 1; if( one_to_one ) { if( from_stdin ) { outfd = STDOUT_FILENO; output_filename.clear(); } @@ -206,14 +214,16 @@ int compress_archive( const Cl_options & cl_opts, unsigned long long partial_data_size = 0; // size of current block Extended extended; // metadata from extended records Resizable_buffer rbuf; // headers and extended records buffer + if( !rbuf.size() ) { show_error( mem_msg ); return 1; } + const char * const rderr_msg = "Read error"; + while( true ) // process one tar member per iteration { int total_header_size = header_size; // size of header(s) read const int rd = readblock( infd, rbuf.u8(), header_size ); - if( rd == 0 && errno == 0 ) break; // missing EOF blocks + if( rd == 0 && errno == 0 ) break; // missing EOA blocks if( rd != header_size ) - { show_file_error( filename, "Read error", errno ); - close( infd ); return 1; } + { show_file_error( filename, rderr_msg, errno ); close( infd ); return 1; } if( to_file && outfd < 0 ) // open outfd after verifying infd { @@ -223,9 +233,9 @@ int compress_archive( const Cl_options & cl_opts, delete_output_on_interrupt = true; } - if( !verify_ustar_chksum( rbuf.u8() ) ) // maybe EOF + if( !verify_ustar_chksum( rbuf.u8() ) ) // maybe EOA block { - if( block_is_zero( rbuf.u8(), header_size ) ) // first EOF block + if( block_is_zero( rbuf.u8(), header_size ) ) // first EOA block { tail_compress( cl_opts, infd, rbuf.u8(), encoder ); break; } show_file_error( filename, bad_hdr_msg ); close( infd ); return 2; } @@ -241,8 +251,7 @@ int compress_archive( const Cl_options & cl_opts, if( !rbuf.resize( total_header_size + bufsize ) ) { show_file_error( filename, mem_msg ); close( infd ); return 1; } if( readblock( infd, rbuf.u8() + total_header_size, bufsize ) != bufsize ) - { show_file_error( filename, "Read error", errno ); - close( infd ); return 1; } + { show_file_error( filename, rderr_msg, errno ); close( infd ); return 1; } total_header_size += bufsize; if( typeflag == tf_extended ) // do not parse global headers { @@ -252,7 +261,7 @@ int compress_archive( const Cl_options & cl_opts, if( !rbuf.resize( total_header_size + header_size ) ) { show_file_error( filename, mem_msg ); close( infd ); return 1; } if( readblock( infd, rbuf.u8() + total_header_size, header_size ) != header_size ) - { show_file_error( filename, errno ? "Read error" : end_msg, errno ); + { show_file_error( filename, errno ? rderr_msg : end_msg, errno ); close( infd ); return errno ? 1 : 2; } if( !verify_ustar_chksum( rbuf.u8() ) ) { show_file_error( filename, bad_hdr_msg ); close( infd ); return 2; } @@ -304,7 +313,7 @@ int compress_archive( const Cl_options & cl_opts, const struct stat * const in_statsp = ( need_close && fstat( infd, &in_stats ) == 0 ) ? &in_stats : 0; if( close( infd ) != 0 ) - { show_file_error( filename, "Error closing file", errno ); return 1; } + { show_file_error( filename, eclosf_msg, errno ); return 1; } if( need_close ) close_and_set_permissions( in_statsp ); return 0; } @@ -312,7 +321,7 @@ int compress_archive( const Cl_options & cl_opts, } // end namespace -int compress( Cl_options & cl_opts ) +int compress( const Cl_options & cl_opts ) { if( cl_opts.num_files > 1 && cl_opts.output_filename.size() ) { show_file_error( cl_opts.output_filename.c_str(), @@ -326,14 +335,9 @@ int compress( Cl_options & cl_opts ) if( !to_stdout && ( cl_opts.filenames_given || to_file ) ) set_signals( signal_handler ); - const int dictionary_size = option_mapping[cl_opts.level].dictionary_size; - if( cl_opts.data_size <= 0 ) - { - if( cl_opts.level == 0 ) cl_opts.data_size = 1 << 20; - else cl_opts.data_size = 2 * dictionary_size; - } - LZ_Encoder * encoder = LZ_compress_open( dictionary_size, - option_mapping[cl_opts.level].match_len_limit, LLONG_MAX ); + LZ_Encoder * encoder = LZ_compress_open( + option_mapping[cl_opts.level].dictionary_size, + option_mapping[cl_opts.level].match_len_limit, LLONG_MAX ); if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) { if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error ) @@ -6,7 +6,7 @@ # to copy, distribute, and modify it. pkgname=tarlz -pkgversion=0.22 +pkgversion=0.23 progname=tarlz srctrigger=doc/${pkgname}.texi @@ -63,11 +63,11 @@ bool option_C_after_relative_filename( const Arg_parser & parser ) } -/* Check archive type. Return position of EOF blocks or -1 if failure. - If remove_eof, leave fd file pos at beginning of the EOF blocks. +/* Check archive type. Return position of EOA blocks or -1 if failure. + If remove_eoa, leave fd file pos at beginning of the EOA blocks. Else, leave fd file pos at 0. */ -long long check_appendable( const int fd, const bool remove_eof ) +long long check_compressed_appendable( const int fd, const bool remove_eoa ) { struct stat st; // fd must be regular if( fstat( fd, &st ) != 0 || !S_ISREG( st.st_mode ) ) return -1; @@ -85,24 +85,25 @@ long long check_appendable( const int fd, const bool remove_eof ) LZ_decompress_read( decoder, buf, header_size ) != header_size ) { LZ_decompress_close( decoder ); return -1; } LZ_decompress_close( decoder ); - const bool maybe_eof = block_is_zero( buf, header_size ); - if( !verify_ustar_chksum( buf ) && !maybe_eof ) return -1; + const bool maybe_eoa = block_is_zero( buf, header_size ); + if( !verify_ustar_chksum( buf ) && !maybe_eoa ) return -1; const long long end = lseek( fd, 0, SEEK_END ); if( end < min_member_size ) return -1; - Lzip_trailer trailer; + Lzip_trailer trailer; // read last trailer if( seek_read( fd, trailer.data, Lzip_trailer::size, end - Lzip_trailer::size ) != Lzip_trailer::size ) return -1; const long long member_size = trailer.member_size(); if( member_size < min_member_size || member_size > end || - ( maybe_eof && member_size != end ) ) return -1; + ( maybe_eoa && member_size != end ) ) return -1; // garbage after EOA? - Lzip_header header; + Lzip_header header; // read last header if( seek_read( fd, header.data, Lzip_header::size, end - member_size ) != Lzip_header::size ) return -1; if( !header.verify_magic() || !header.verify_version() || !isvalid_ds( header.dictionary_size() ) ) return -1; + // EOA marker in last member must contain between 512 and 32256 zeros alone const unsigned long long data_size = trailer.data_size(); if( data_size < header_size || data_size > 32256 ) return -1; const unsigned data_crc = trailer.data_crc(); @@ -112,33 +113,36 @@ long long check_appendable( const int fd, const bool remove_eof ) crc ^= 0xFFFFFFFFU; if( crc != data_crc ) return -1; - const long long pos = remove_eof ? end - member_size : 0; + const long long pos = remove_eoa ? end - member_size : 0; if( lseek( fd, pos, SEEK_SET ) != pos ) return -1; return end - member_size; } -/* Skip all tar headers. Return position of EOF blocks or -1 if failure. - If remove_eof, leave fd file pos at beginning of the EOF blocks. +/* Skip all tar headers. + Return position of EOA blocks, -1 if failure, -2 if out of memory. + If remove_eoa, leave fd file pos at beginning of the EOA blocks. Else, leave fd file pos at 0. */ -long long check_uncompressed_appendable( const int fd, const bool remove_eof ) +long long check_uncompressed_appendable( const int fd, const bool remove_eoa ) { struct stat st; // fd must be regular if( fstat( fd, &st ) != 0 || !S_ISREG( st.st_mode ) ) return -1; if( lseek( fd, 0, SEEK_SET ) != 0 ) return -1; if( st.st_size <= 0 ) return 0; // append to empty archive - long long eof_pos = 0; + long long eoa_pos = 0; // pos of EOA blocks Extended extended; // metadata from extended records Resizable_buffer rbuf; // extended records buffer bool prev_extended = false; // prev header was extended + if( !rbuf.size() ) return -2; + while( true ) // process one tar header per iteration { Tar_header header; const int rd = readblock( fd, header, header_size ); - if( rd == 0 && errno == 0 ) break; // missing EOF blocks + if( rd == 0 && errno == 0 ) break; // missing EOA blocks if( rd != header_size ) return -1; - if( !verify_ustar_chksum( header ) ) // maybe EOF + if( !verify_ustar_chksum( header ) ) // maybe EOA block { if( block_is_zero( header, header_size ) ) break; else return -1; } const Typeflag typeflag = (Typeflag)header[typeflag_o]; if( typeflag == tf_extended || typeflag == tf_global ) @@ -148,7 +152,7 @@ long long check_uncompressed_appendable( const int fd, const bool remove_eof ) const long long bufsize = round_up( edsize ); if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX ) return -1; // overflow or no extended data - if( !rbuf.resize( bufsize ) ) return -1; + if( !rbuf.resize( bufsize ) ) return -2; if( readblock( fd, rbuf.u8(), bufsize ) != bufsize ) return -1; if( typeflag == tf_extended ) @@ -158,15 +162,15 @@ long long check_uncompressed_appendable( const int fd, const bool remove_eof ) } prev_extended = false; - eof_pos = lseek( fd, round_up( extended.get_file_size_and_reset( header ) ), + eoa_pos = lseek( fd, round_up( extended.get_file_size_and_reset( header ) ), SEEK_CUR ); - if( eof_pos <= 0 ) return -1; + if( eoa_pos <= 0 ) return -1; } if( prev_extended ) return -1; - const long long pos = remove_eof ? eof_pos : 0; + const long long pos = remove_eoa ? eoa_pos : 0; if( lseek( fd, pos, SEEK_SET ) != pos ) return -1; - return eof_pos; + return eoa_pos; } @@ -202,8 +206,9 @@ bool archive_write( const uint8_t * const buf, const int size ) bool write_extended( const Extended & extended ) { - const long long ebsize = extended.format_block( grbuf ); - if( ebsize < 0 ) return false; + const long long ebsize = extended.format_block( grbuf ); // may be 0 + if( ebsize < 0 ) + { show_error( ( ebsize == -2 ) ? mem_msg2 : eferec_msg ); return false; } for( long long pos = 0; pos < ebsize; ) // write extended block to archive { int size = std::min( ebsize - pos, 1LL << 20 ); @@ -218,7 +223,8 @@ bool write_extended( const Extended & extended ) bool store_name( const char * const filename, Extended & extended, Tar_header header, const bool force_extended_name ) { - const char * const stored_name = remove_leading_dotslash( filename, true ); + const char * const stored_name = + remove_leading_dotslash( filename, &extended.removed_prefix, true ); if( !force_extended_name ) // try storing filename in the ustar header { @@ -250,6 +256,7 @@ int add_member( const char * const filename, const struct stat *, Extended extended; // metadata for extended records Tar_header header; if( !fill_headers( filename, extended, header, file_size, flag ) ) return 0; + print_removed_prefix( extended.removed_prefix ); const int infd = file_size ? open_instream( filename ) : -1; if( file_size && infd < 0 ) { set_error_status( 1 ); return 0; } @@ -286,7 +293,7 @@ int add_member( const char * const filename, const struct stat *, if( !archive_write( buf, size ) ) { close( infd ); return 1; } } if( close( infd ) != 0 ) - { show_file_error( filename, "Error closing file", errno ); return 1; } + { show_file_error( filename, eclosf_msg, errno ); return 1; } } if( encoder && gcl_opts->solidity == no_solid && !archive_write( 0, 0 ) ) return 1; @@ -297,6 +304,18 @@ int add_member( const char * const filename, const struct stat *, return 0; } + +bool check_tty_out( const char * const archive_namep, const int outfd, + const bool to_stdout ) + { + if( isatty( outfd ) ) // for example /dev/tty + { show_file_error( archive_namep, to_stdout ? + "I won't write archive data to a terminal (missing -f option?)" : + "I won't write archive data to a terminal." ); + return false; } + return true; + } + } // end namespace @@ -335,23 +354,23 @@ bool writeblock_wrapper( const int outfd, const uint8_t * const buffer, const int size ) { if( writeblock( outfd, buffer, size ) != size ) - { show_file_error( archive_namep, "Write error", errno ); return false; } + { show_file_error( archive_namep, werr_msg, errno ); return false; } return true; } // write End-Of-Archive records -bool write_eof_records( const int outfd, const bool compressed ) +bool write_eoa_records( const int outfd, const bool compressed ) { if( compressed ) { - enum { eof_member_size = 44 }; - const uint8_t eof_member[eof_member_size] = { + enum { eoa_member_size = 44 }; + const uint8_t eoa_member[eoa_member_size] = { 0x4C, 0x5A, 0x49, 0x50, 0x01, 0x0C, 0x00, 0x00, 0x6F, 0xFD, 0xFF, 0xFF, 0xA3, 0xB7, 0x80, 0x0C, 0x82, 0xDB, 0xFF, 0xFF, 0x9F, 0xF0, 0x00, 0x00, 0x2E, 0xAF, 0xB5, 0xEF, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; - return writeblock_wrapper( outfd, eof_member, eof_member_size ); + return writeblock_wrapper( outfd, eoa_member, eoa_member_size ); } enum { bufsize = 2 * header_size }; uint8_t buf[bufsize]; @@ -360,40 +379,52 @@ bool write_eof_records( const int outfd, const bool compressed ) } -/* Removes any amount of leading "./" and '/' strings from filename. - Optionally also removes prefixes containing a ".." component. +/* Remove any amount of leading "./" and '/' strings from filename. + Optionally also remove prefixes containing a ".." component. + Return the removed prefix in *removed_prefixp. */ const char * remove_leading_dotslash( const char * const filename, + std::string * const removed_prefixp, const bool dotdot ) { - // prevent two threads from modifying the list of prefixes at the same time - static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - static std::vector< std::string > prefixes; // list of prefixes const char * p = filename; if( dotdot ) for( int i = 0; filename[i]; ++i ) if( dotdot_at_i( filename, i ) ) p = filename + i + 2; while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p; - if( p != filename ) - { - std::string msg( filename, p - filename ); - unsigned i = 0; - xlock( &mutex ); - while( i < prefixes.size() && prefixes[i] != msg ) ++i; - if( i >= prefixes.size() ) - { - prefixes.push_back( msg ); - msg.insert( 0, "Removing leading '" ); msg += "' from member names."; - show_error( msg.c_str() ); - } - xunlock( &mutex ); - } + if( p != filename ) removed_prefixp->assign( filename, p - filename ); + else removed_prefixp->clear(); // no prefix was removed if( *p == 0 && *filename != 0 ) p = "."; return p; } +/* If msgp is null, print the message, else return the message in *msgp. + If prefix is already in the list, print nothing or return empty *msgp. + Return true if a message is printed or returned in *msgp. */ +bool print_removed_prefix( const std::string & prefix, + std::string * const msgp ) + { + // prevent two threads from modifying the list of prefixes at the same time + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + static std::vector< std::string > prefixes; // list of prefixes + + if( verbosity < 0 || prefix.empty() ) + { if( msgp ) msgp->clear(); return false; } + xlock( &mutex ); + for( unsigned i = 0; i < prefixes.size(); ++i ) + if( prefixes[i] == prefix ) + { xunlock( &mutex ); if( msgp ) msgp->clear(); return false; } + prefixes.push_back( prefix ); + std::string msg( "Removing leading '" ); msg += prefix; + msg += "' from member names."; + if( msgp ) *msgp = msg; else show_error( msg.c_str() ); + xunlock( &mutex ); // put here to prevent mixing calls to show_error + return true; + } + + // set file_size != 0 only for regular files bool fill_headers( const char * const filename, Extended & extended, Tar_header header, long long & file_size, const int flag ) @@ -403,7 +434,7 @@ bool fill_headers( const char * const filename, Extended & extended, { show_file_error( filename, cant_stat, errno ); set_error_status( 1 ); return false; } if( archive_attrs.is_the_archive( st ) ) - { show_file_error( archive_namep, "File is the archive; not dumped." ); + { show_file_error( archive_namep, "Archive can't contain itself; not dumped." ); return false; } init_tar_header( header ); bool force_extended_name = false; @@ -412,19 +443,17 @@ bool fill_headers( const char * const filename, Extended & extended, print_octal( header + mode_o, mode_l - 1, mode & ( S_ISUID | S_ISGID | S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO ) ); - const uid_t uid = (gcl_opts->owner >= 0) ? (uid_t)gcl_opts->owner : st.st_uid; - const gid_t gid = (gcl_opts->group >= 0) ? (gid_t)gcl_opts->group : st.st_gid; - if( uid >= 2 << 20 || gid >= 2 << 20 ) - { show_file_error( filename, "uid or gid is larger than 2_097_151." ); - set_error_status( 1 ); return false; } - print_octal( header + uid_o, uid_l - 1, uid ); - print_octal( header + gid_o, gid_l - 1, gid ); - const unsigned long long mtime = ( gcl_opts->mtime >= 0 ) ? gcl_opts->mtime : - ( ( st.st_mtime >= 0 ) ? st.st_mtime : 0 ); - if( mtime >= 1ULL << 33 ) - { show_file_error( filename, "mtime is out of ustar range [0, 8_589_934_591]." ); - set_error_status( 1 ); return false; } - print_octal( header + mtime_o, mtime_l - 1, mtime ); + const long long uid = ( gcl_opts->uid >= 0 ) ? gcl_opts->uid : st.st_uid; + const long long gid = ( gcl_opts->gid >= 0 ) ? gcl_opts->gid : st.st_gid; + if( uid_in_ustar_range( uid ) ) print_octal( header + uid_o, uid_l - 1, uid ); + else if( extended.set_uid( uid ) ) force_extended_name = true; + if( uid_in_ustar_range( gid ) ) print_octal( header + gid_o, gid_l - 1, gid ); + else if( extended.set_gid( gid ) ) force_extended_name = true; + const long long mtime = gcl_opts->mtime_set ? gcl_opts->mtime : st.st_mtime; + if( time_in_ustar_range( mtime ) ) + print_octal( header + mtime_o, mtime_l - 1, mtime ); + else { extended.set_atime( gcl_opts->mtime_set ? mtime : st.st_atime ); + extended.set_mtime( mtime ); force_extended_name = true; } Typeflag typeflag; if( S_ISREG( mode ) ) typeflag = tf_regular; else if( S_ISDIR( mode ) ) @@ -458,8 +487,16 @@ bool fill_headers( const char * const filename, Extended & extended, delete[] buf; } if( sz != st.st_size ) - { show_file_error( filename, "Error reading link", (sz < 0) ? errno : 0 ); - set_error_status( 1 ); return false; } + { + if( sz < 0 ) + show_file_error( filename, "Error reading symbolic link", errno ); + else + show_file_error( filename, "Wrong size reading symbolic link.\n" + "Please, send a bug report to the maintainers of your filesystem, " + "mentioning\n'wrong st_size of symbolic link'.\nSee " + "http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/sys_stat.h.html" ); + set_error_status( 1 ); return false; + } } else if( S_ISCHR( mode ) || S_ISBLK( mode ) ) { @@ -475,18 +512,17 @@ bool fill_headers( const char * const filename, Extended & extended, else { show_file_error( filename, "Unknown file type." ); set_error_status( 2 ); return false; } header[typeflag_o] = typeflag; -// errno = 0; - const struct passwd * const pw = getpwuid( uid ); - if( pw && pw->pw_name ) - std::strncpy( (char *)header + uname_o, pw->pw_name, uname_l - 1 ); -/* else { show_file_error( filename, "Can't read user name from database", errno ); - set_error_status( 1 ); } */ // numerical only -// errno = 0; - const struct group * const gr = getgrgid( gid ); - if( gr && gr->gr_name ) - std::strncpy( (char *)header + gname_o, gr->gr_name, gname_l - 1 ); -/* else { show_file_error( filename, "Can't read group name from database", errno ); - set_error_status( 1 ); } */ // numerical only + + if( uid == (long long)( (uid_t)uid ) ) // get name if uid is in range + { const struct passwd * const pw = getpwuid( uid ); + if( pw && pw->pw_name ) + std::strncpy( (char *)header + uname_o, pw->pw_name, uname_l - 1 ); } + + if( gid == (long long)( (gid_t)gid ) ) // get name if gid is in range + { const struct group * const gr = getgrgid( gid ); + if( gr && gr->gr_name ) + std::strncpy( (char *)header + gname_o, gr->gr_name, gname_l - 1 ); } + file_size = ( typeflag == tf_regular && st.st_size > 0 && st.st_size <= max_file_size ) ? st.st_size : 0; if( file_size >= 1LL << 33 ) @@ -525,7 +561,7 @@ void set_error_status( const int retval ) int final_exit_status( int retval, const bool show_msg ) { - if( !retval && error_status ) + if( retval == 0 && error_status ) { if( show_msg ) show_error( "Exiting with failure status due to previous errors." ); retval = error_status; } @@ -564,6 +600,8 @@ int concatenate( const Cl_options & cl_opts ) const int outfd = to_stdout ? STDOUT_FILENO : open_outstream( cl_opts.archive_name, false ); if( outfd < 0 ) return 1; + if( !check_tty_out( archive_namep, outfd, to_stdout ) ) + { close( outfd ); return 1; } if( !to_stdout && !archive_attrs.init( outfd ) ) { show_file_error( archive_namep, "Can't stat", errno ); return 1; } int compressed; // tri-state bool @@ -571,22 +609,23 @@ int concatenate( const Cl_options & cl_opts ) else { compressed = has_lz_ext( cl_opts.archive_name ); // default value - long long pos = check_appendable( outfd, true ); + long long pos = check_compressed_appendable( outfd, true ); if( pos > 0 ) compressed = true; else if( pos < 0 ) { pos = check_uncompressed_appendable( outfd, true ); if( pos > 0 ) compressed = false; + else if( pos == -2 ) { show_error( mem_msg ); close( outfd ); return 1; } else if( pos < 0 ) { show_file_error( archive_namep, compressed ? "This does not look like an appendable tar.lz archive." : "This does not look like an appendable tar archive." ); - return 2; } + close( outfd ); return 2; } } } int retval = 0; - bool eof_pending = false; + bool eoa_pending = false; for( int i = 0; i < cl_opts.parser.arguments(); ++i ) // copy archives { if( !nonempty_arg( cl_opts.parser, i ) ) continue; // skip opts, empty names @@ -596,18 +635,21 @@ int concatenate( const Cl_options & cl_opts ) if( infd < 0 ) { retval = 1; break; } struct stat st; if( !to_stdout && fstat( infd, &st ) == 0 && archive_attrs.is_the_archive( st ) ) - { show_file_error( filename, "File is the archive; not concatenated." ); + { show_file_error( filename, "Archive can't contain itself; not concatenated." ); close( infd ); continue; } long long size; if( compressed < 0 ) // not initialized yet { - if( ( size = check_appendable( infd, false ) ) > 0 ) compressed = true; + if( ( size = check_compressed_appendable( infd, false ) ) > 0 ) + compressed = true; else if( ( size = check_uncompressed_appendable( infd, false ) ) > 0 ) compressed = false; - else { size = -1 ; compressed = has_lz_ext( filename ); } + else if( size != -2 ) { size = -1 ; compressed = has_lz_ext( filename ); } } - else size = compressed ? check_appendable( infd, false ) : + else size = compressed ? check_compressed_appendable( infd, false ) : check_uncompressed_appendable( infd, false ); + if( size == -2 ) + { show_error( mem_msg ); close( infd ); retval = 1; break; } if( size < 0 ) { show_file_error( filename, compressed ? "Not an appendable tar.lz archive." : @@ -615,22 +657,22 @@ int concatenate( const Cl_options & cl_opts ) close( infd ); retval = 2; break; } if( !copy_file( infd, outfd, size ) || close( infd ) != 0 ) { show_file_error( filename, "Error copying archive", errno ); - eof_pending = false; retval = 1; break; } - eof_pending = true; + eoa_pending = false; retval = 1; break; } + eoa_pending = true; if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename ); } - if( eof_pending && !write_eof_records( outfd, compressed ) && !retval ) + if( eoa_pending && !write_eoa_records( outfd, compressed ) && retval == 0 ) retval = 1; - if( close( outfd ) != 0 && !retval ) - { show_file_error( archive_namep, "Error closing archive", errno ); - retval = 1; } + if( close( outfd ) != 0 && retval == 0 ) + { show_file_error( archive_namep, eclosa_msg, errno ); retval = 1; } return retval; } -int encode( Cl_options & cl_opts ) +int encode( const Cl_options & cl_opts ) { + if( !grbuf.size() ) { show_error( mem_msg ); return 1; } const bool compressed = ( cl_opts.level >= 0 && cl_opts.level <= 9 ); const bool to_stdout = cl_opts.archive_name.empty(); archive_namep = to_stdout ? "(stdout)" : cl_opts.archive_name.c_str(); @@ -652,31 +694,33 @@ int encode( Cl_options & cl_opts ) if( to_stdout ) // create/append to stdout goutfd = STDOUT_FILENO; - else if( !append ) // create archive - { if( ( goutfd = open_outstream( cl_opts.archive_name ) ) < 0 ) return 1; } - else // append to archive - { - if( ( goutfd = open_outstream( cl_opts.archive_name, false ) ) < 0 ) + else // create/append to archive + if( ( goutfd = open_outstream( cl_opts.archive_name, !append ) ) < 0 ) return 1; - if( compressed && check_appendable( goutfd, true ) < 0 ) - { show_file_error( archive_namep, - "This does not look like an appendable tar.lz archive." ); return 2; } - if( !compressed && check_uncompressed_appendable( goutfd, true ) < 0 ) + if( !check_tty_out( archive_namep, goutfd, to_stdout ) ) + { close( goutfd ); return 1; } + if( append && !to_stdout ) + { + if( compressed && check_compressed_appendable( goutfd, true ) < 0 ) { show_file_error( archive_namep, - "This does not look like an appendable tar archive." ); return 2; } + "This does not look like an appendable tar.lz archive." ); + close( goutfd ); return 2; } + if( !compressed ) + { + const long long pos = check_uncompressed_appendable( goutfd, true ); + if( pos == -2 ) { show_error( mem_msg ); close( goutfd ); return 1; } + if( pos < 0 ) { show_file_error( archive_namep, + "This does not look like an appendable tar archive." ); + close( goutfd ); return 2; } + } } if( !archive_attrs.init( goutfd ) ) - { show_file_error( archive_namep, "Can't stat", errno ); return 1; } + { show_file_error( archive_namep, "Can't stat", errno ); + close( goutfd ); return 1; } if( compressed ) { - const int dictionary_size = option_mapping[cl_opts.level].dictionary_size; - if( cl_opts.data_size <= 0 ) - { - if( cl_opts.level == 0 ) cl_opts.data_size = 1 << 20; - else cl_opts.data_size = 2 * dictionary_size; - } /* CWD is not per-thread; multi-threaded --create can't be used if a -C option appears after a relative filename in the command line. */ if( cl_opts.solidity != asolid && cl_opts.solidity != solid && @@ -684,10 +728,9 @@ int encode( Cl_options & cl_opts ) !option_C_after_relative_filename( cl_opts.parser ) ) { // show_file_error( archive_namep, "Multi-threaded --create" ); - return encode_lz( cl_opts, archive_namep, dictionary_size, - option_mapping[cl_opts.level].match_len_limit, goutfd ); + return encode_lz( cl_opts, archive_namep, goutfd ); } - encoder = LZ_compress_open( dictionary_size, + encoder = LZ_compress_open( option_mapping[cl_opts.level].dictionary_size, option_mapping[cl_opts.level].match_len_limit, LLONG_MAX ); if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) { @@ -695,7 +738,7 @@ int encode( Cl_options & cl_opts ) show_error( mem_msg2 ); else internal_error( "invalid argument to encoder." ); - return 1; + close( goutfd ); return 1; } } @@ -706,8 +749,7 @@ int encode( Cl_options & cl_opts ) const std::string & arg = cl_opts.parser.argument( i ); const char * filename = arg.c_str(); if( code == 'C' && chdir( filename ) != 0 ) - { show_file_error( filename, "Error changing working directory", errno ); - retval = 1; break; } + { show_file_error( filename, chdir_msg, errno ); retval = 1; break; } if( code ) continue; // skip options if( cl_opts.parser.argument( i ).empty() ) continue; // skip empty names std::string deslashed; // arg without trailing slashes @@ -726,7 +768,7 @@ int encode( Cl_options & cl_opts ) { retval = 1; break; } } - if( !retval ) // write End-Of-Archive records + if( retval == 0 ) // write End-Of-Archive records { enum { bufsize = 2 * header_size }; uint8_t buf[bufsize]; @@ -740,8 +782,7 @@ int encode( Cl_options & cl_opts ) } if( encoder && LZ_compress_close( encoder ) < 0 ) { show_error( "LZ_compress_close failed." ); retval = 1; } - if( close( goutfd ) != 0 && !retval ) - { show_file_error( archive_namep, "Error closing archive", errno ); - retval = 1; } + if( close( goutfd ) != 0 && retval == 0 ) + { show_file_error( archive_namep, eclosa_msg, errno ); retval = 1; } return final_exit_status( retval ); } @@ -45,3 +45,4 @@ public: extern Archive_attrs archive_attrs; const char * const cant_stat = "Can't stat input file"; +const char * const eferec_msg = "Error formatting extended records."; diff --git a/create_lz.cc b/create_lz.cc index 67a6f7a..1acaf23 100644 --- a/create_lz.cc +++ b/create_lz.cc @@ -267,6 +267,7 @@ int add_member_lz( const char * const filename, const struct stat *, { show_error( mem_msg ); if( extended ) delete extended; return 1; } if( !fill_headers( filename, *extended, header, file_size, flag ) ) { delete[] header; delete extended; return 0; } + print_removed_prefix( extended->removed_prefix ); if( gcl_opts->solidity == bsolid && block_is_full( extended->full_size(), file_size, gcl_opts->data_size, @@ -304,8 +305,7 @@ extern "C" void * grouper( void * arg ) const std::string & arg = cl_opts.parser.argument( i ); const char * filename = arg.c_str(); if( code == 'C' && chdir( filename ) != 0 ) - { show_file_error( filename, "Error changing working directory", errno ); - exit_fail_mt(); } + { show_file_error( filename, chdir_msg, errno ); exit_fail_mt(); } if( code ) continue; // skip options if( cl_opts.parser.argument( i ).empty() ) continue; // skip empty names std::string deslashed; // arg without trailing slashes @@ -448,7 +448,7 @@ extern "C" void * cworker( void * arg ) { const long long ebsize = ipacket->extended->format_block( rbuf ); if( ebsize < 0 ) - { show_error( "Error formatting extended records." ); exit_fail_mt(); } + { show_error( ( ebsize == -2 ) ? mem_msg2 : eferec_msg ); exit_fail_mt(); } /* Limit the size of the extended block to INT_MAX - 1 so that it can be fed to lzlib as one buffer. */ if( ebsize >= INT_MAX ) @@ -488,8 +488,7 @@ extern "C" void * cworker( void * arg ) loop_encode( buf, size, data, opos, courier, encoder, worker_id ); } if( close( infd ) != 0 ) - { show_file_error( filename, "Error closing file", errno ); - exit_fail_mt(); } + { show_file_error( filename, eclosf_msg, errno ); exit_fail_mt(); } } if( gcl_opts->warn_newer && archive_attrs.is_newer( filename ) ) { show_file_error( filename, "File is newer than the archive." ); @@ -525,13 +524,14 @@ void muxer( Packet_courier & courier, const int outfd ) // init the courier, then start the grouper and the workers and call the muxer int encode_lz( const Cl_options & cl_opts, const char * const archive_namep, - const int dictionary_size, const int match_len_limit, const int outfd ) { const int in_slots = 65536; // max small files (<=512B) in 64 MiB const int num_workers = cl_opts.num_workers; const int total_in_slots = ( INT_MAX / num_workers >= in_slots ) ? num_workers * in_slots : INT_MAX; + const int dictionary_size = option_mapping[cl_opts.level].dictionary_size; + const int match_len_limit = option_mapping[cl_opts.level].match_len_limit; gcl_opts = &cl_opts; /* If an error happens after any threads have been started, exit must be @@ -546,7 +546,7 @@ int encode_lz( const Cl_options & cl_opts, const char * const archive_namep, pthread_t grouper_thread; int errcode = pthread_create( &grouper_thread, 0, grouper, &grouper_arg ); if( errcode ) - { show_error( "Can't create grouper thread", errcode ); exit_fail_mt(); } + { show_error( "Can't create grouper thread", errcode ); return 1; } Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers]; pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers]; @@ -579,11 +579,10 @@ int encode_lz( const Cl_options & cl_opts, const char * const archive_namep, { show_error( "Can't join grouper thread", errcode ); exit_fail_mt(); } // write End-Of-Archive records - int retval = !write_eof_records( outfd, true ); + int retval = !write_eoa_records( outfd, true ); - if( close( outfd ) != 0 && !retval ) - { show_file_error( archive_namep, "Error closing archive", errno ); - retval = 1; } + if( close( outfd ) != 0 && retval == 0 ) + { show_file_error( archive_namep, eclosa_msg, errno ); retval = 1; } if( cl_opts.debug_level & 1 ) std::fprintf( stderr, @@ -36,6 +36,7 @@ #include "arg_parser.h" #include "lzip_index.h" #include "archive_reader.h" +#include "decode.h" namespace { @@ -60,10 +61,12 @@ void read_error( const Archive_reader & ar ) } -int skip_member( Archive_reader & ar, const Extended & extended ) +int skip_member( Archive_reader & ar, const Extended & extended, + const Typeflag typeflag ) { - const int ret = ar.skip_member( extended ); - if( ret != 0 ) { read_error( ar ); if( ret == 2 ) return 2; } + if( data_may_follow( typeflag ) ) + { const int ret = ar.skip_member( extended ); + if( ret != 0 ) { read_error( ar ); if( ar.fatal() ) return ret; } } return 0; } @@ -80,15 +83,15 @@ int compare_member( const Cl_options & cl_opts, Archive_reader & ar, if( extended.file_size() <= 0 ) return 0; const Typeflag typeflag = (Typeflag)header[typeflag_o]; if( ( typeflag != tf_regular && typeflag != tf_hiperf ) || stat_differs ) - return skip_member( ar, extended ); + return skip_member( ar, extended, typeflag ); // else compare file contents const char * const filename = extended.path().c_str(); const int infd2 = open_instream( filename ); if( infd2 < 0 ) - { set_error_status( 1 ); return skip_member( ar, extended ); } + { set_error_status( 1 ); return skip_member( ar, extended, typeflag ); } int retval = compare_file_contents( estr, ostr, ar, extended.file_size(), filename, infd2 ); - if( retval ) { read_error( ar ); if( retval != 2 ) retval = 0; } + if( retval ) { read_error( ar ); if( !ar.fatal() ) retval = 0; } else { if( estr.size() ) std::fputs( estr.c_str(), stderr ); if( ostr.size() ) { std::fputs( ostr.c_str(), stdout ); std::fflush( stdout ); } } @@ -100,7 +103,7 @@ int list_member( Archive_reader & ar, const Extended & extended, const Tar_header header ) { if( !show_member_name( extended, header, 0, grbuf ) ) return 1; - return skip_member( ar, extended ); + return skip_member( ar, extended, (Typeflag)header[typeflag_o] ); } @@ -108,27 +111,33 @@ int extract_member( const Cl_options & cl_opts, Archive_reader & ar, const Extended & extended, const Tar_header header ) { const char * const filename = extended.path().c_str(); + const Typeflag typeflag = (Typeflag)header[typeflag_o]; if( contains_dotdot( filename ) ) { - show_file_error( filename, "Contains a '..' component, skipping." ); - return skip_member( ar, extended ); + show_file_error( filename, dotdot_msg ); + return skip_member( ar, extended, typeflag ); } mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits if( geteuid() != 0 && !cl_opts.preserve_permissions ) mode &= ~get_umask(); - const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits - const Typeflag typeflag = (Typeflag)header[typeflag_o]; - const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); int outfd = -1; if( !show_member_name( extended, header, 1, grbuf ) ) return 1; + // remove file (or empty dir) before extraction to prevent following links std::remove( filename ); - make_path( filename ); + if( !make_path( filename ) ) + { + show_file_error( filename, intdir_msg, errno ); + set_error_status( 1 ); + return skip_member( ar, extended, typeflag ); + } + switch( typeflag ) { case tf_regular: case tf_hiperf: outfd = open_outstream( filename ); - if( outfd < 0 ) return 2; + if( outfd < 0 ) + { set_error_status( 1 ); return skip_member( ar, extended, typeflag ); } break; case tf_link: case tf_symlink: @@ -138,18 +147,15 @@ int extract_member( const Cl_options & cl_opts, Archive_reader & ar, if( ( hard && link( linkname, filename ) != 0 ) || ( !hard && symlink( linkname, filename ) != 0 ) ) { - if( verbosity >= 0 ) - std::fprintf( stderr, "Can't %slink '%s' to '%s': %s.\n", - hard ? "" : "sym", linkname, filename, - std::strerror( errno ) ); - return 2; + print_error( errno, cantln_msg, hard ? "" : "sym", linkname, filename ); + set_error_status( 1 ); } } break; case tf_directory: if( mkdir( filename, mode ) != 0 && errno != EEXIST ) { - show_file_error( filename, "Can't create directory", errno ); - return 2; + show_file_error( filename, mkdir_msg, errno ); + set_error_status( 1 ); } break; case tf_chardev: @@ -161,70 +167,74 @@ int extract_member( const Cl_options & cl_opts, Archive_reader & ar, const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode; if( mknod( filename, dmode, dev ) != 0 ) { - show_file_error( filename, "Can't create device node", errno ); - return 2; + show_file_error( filename, mknod_msg, errno ); + set_error_status( 1 ); } break; } case tf_fifo: - if( mkfifo( filename, mode ) != 0 && errno != EEXIST ) + if( mkfifo( filename, mode ) != 0 ) { - show_file_error( filename, "Can't create FIFO file", errno ); - return 2; + show_file_error( filename, mkfifo_msg, errno ); + set_error_status( 1 ); } break; default: - if( verbosity >= 0 ) - std::fprintf( stderr, "File type '%c' not supported for file '%s'.\n", - typeflag, filename ); - return 2; + print_error( 0, uftype_msg, filename, typeflag ); + set_error_status( 2 ); + return skip_member( ar, extended, typeflag ); } - const uid_t uid = (uid_t)parse_octal( header + uid_o, uid_l ); - const gid_t gid = (gid_t)parse_octal( header + gid_o, gid_l ); - if( !islink && chown( filename, uid, gid ) != 0 && - errno != EPERM && errno != EINVAL ) + const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); + errno = 0; + if( !islink && + ( !uid_gid_in_range( extended.get_uid(), extended.get_gid() ) || + chown( filename, extended.get_uid(), extended.get_gid() ) != 0 ) ) { - show_file_error( filename, "Can't change file owner", errno ); - return 2; + if( outfd >= 0 ) mode &= ~( S_ISUID | S_ISGID | S_ISVTX ); + // chown will in many cases return with EPERM, which can be safely ignored. + if( errno != EPERM && errno != EINVAL ) + { show_file_error( filename, chown_msg, errno ); set_error_status( 1 ); } } - if( typeflag == tf_regular || typeflag == tf_hiperf ) - fchmod( outfd, mode ); // ignore errors + if( outfd >= 0 ) fchmod( outfd, mode ); // ignore errors - const int bufsize = 32 * header_size; - uint8_t buf[bufsize]; - long long rest = extended.file_size(); - const int rem = rest % header_size; - const int padding = rem ? header_size - rem : 0; - while( rest > 0 ) + if( data_may_follow( typeflag ) ) { - const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; - const int ret = ar.read( buf, rsize ); - if( ret != 0 ) + const int bufsize = 32 * header_size; + uint8_t buf[bufsize]; + long long rest = extended.file_size(); + const int rem = rest % header_size; + const int padding = rem ? header_size - rem : 0; + while( rest > 0 ) { - read_error( ar ); - if( outfd >= 0 ) + const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; + const int ret = ar.read( buf, rsize ); + if( ret != 0 ) { - if( cl_opts.keep_damaged ) - { writeblock( outfd, buf, std::min( rest, (long long)ar.e_size() ) ); - close( outfd ); } - else { close( outfd ); std::remove( filename ); } + read_error( ar ); + if( outfd >= 0 ) + { + if( cl_opts.keep_damaged ) + { writeblock( outfd, buf, std::min( rest, (long long)ar.e_size() ) ); + close( outfd ); } + else { close( outfd ); std::remove( filename ); } + } + if( ar.fatal() ) return ret; else return 0; } - if( ret == 2 ) return 2; else return 0; + const int wsize = ( rest >= bufsize ) ? bufsize : rest; + if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize ) + { show_file_error( filename, werr_msg, errno ); return 1; } + rest -= wsize; } - const int wsize = ( rest >= bufsize ) ? bufsize : rest; - if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize ) - { show_file_error( filename, "Error writing file", errno ); return 2; } - rest -= wsize; } if( outfd >= 0 && close( outfd ) != 0 ) - { show_file_error( filename, "Error closing file", errno ); return 2; } + { show_file_error( filename, eclosf_msg, errno ); return 1; } if( !islink ) { struct utimbuf t; - t.actime = mtime; - t.modtime = mtime; + t.actime = extended.atime().sec(); + t.modtime = extended.mtime().sec(); utime( filename, &t ); // ignore errors } return 0; @@ -233,10 +243,8 @@ int extract_member( const Cl_options & cl_opts, Archive_reader & ar, void format_file_diff( std::string & ostr, const char * const filename, const char * const msg ) - { - if( verbosity < 0 ) return; - { ostr += filename; ostr += ": "; ostr += msg; ostr += '\n'; } - } + { if( verbosity >= 0 ) + { ostr += filename; ostr += ": "; ostr += msg; ostr += '\n'; } } } // end namespace @@ -276,18 +284,21 @@ bool compare_file_type( std::string & estr, std::string & ostr, } if( !cl_opts.ignore_ids ) { - if( (uid_t)parse_octal( header + uid_o, uid_l ) != st.st_uid ) + if( extended.get_uid() != (long long)st.st_uid ) { format_file_diff( ostr, filename, "Uid differs" ); diff = true; } - if( (gid_t)parse_octal( header + gid_o, gid_l ) != st.st_gid ) + if( extended.get_gid() != (long long)st.st_gid ) { format_file_diff( ostr, filename, "Gid differs" ); diff = true; } } if( typeflag != tf_symlink ) { - if( typeflag != tf_directory ) + if( typeflag != tf_directory && + extended.mtime().sec() != (long long)st.st_mtime ) { - const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits - if( mtime != st.st_mtime ) - { format_file_diff( ostr, filename, "Mod time differs" ); diff = true; } + if( (time_t)extended.mtime().sec() == st.st_mtime ) + { if( !cl_opts.ignore_overflow ) { diff = true; + format_file_diff( ostr, filename, "Mod time overflow" ); } } + else { diff = true; + format_file_diff( ostr, filename, "Mod time differs" ); } } if( ( typeflag == tf_regular || typeflag == tf_hiperf ) && extended.file_size() != st.st_size ) // don't compare contents @@ -364,6 +375,7 @@ bool compare_file_contents( std::string & estr, std::string & ostr, int decode( const Cl_options & cl_opts ) { + if( !grbuf.size() ) { show_error( mem_msg ); return 1; } // open archive before changing working directory const Archive_descriptor ad( cl_opts.archive_name ); if( ad.infd < 0 ) return 1; @@ -378,8 +390,7 @@ int decode( const Cl_options & cl_opts ) { const char * const dir = cl_opts.parser.argument( i ).c_str(); if( chdir( dir ) != 0 ) - { show_file_error( dir, "Error changing working directory", errno ); - return 1; } + { show_file_error( dir, chdir_msg, errno ); return 1; } } if( !code && cl_opts.parser.argument( i ).size() && !Exclude::excluded( cl_opts.parser.argument( i ).c_str() ) ) @@ -389,16 +400,14 @@ int decode( const Cl_options & cl_opts ) // multi-threaded --list is faster even with 1 thread and 1 file in archive // but multi-threaded --diff and --extract probably need at least 2 of each if( ( cl_opts.program_mode == m_diff || cl_opts.program_mode == m_list || - cl_opts.program_mode == m_extract ) && cl_opts.num_workers > 0 ) + cl_opts.program_mode == m_extract ) && cl_opts.num_workers > 0 && + ad.indexed && ad.lzip_index.members() >= 2 ) // one file + EOA { - if( ad.indexed && ad.lzip_index.members() >= 2 ) // one file + eof - { - // show_file_error( ad.namep, "Is compressed seekable" ); - return decode_lz( cl_opts, ad, name_pending ); - } + // show_file_error( ad.namep, "Is compressed seekable" ); + return decode_lz( cl_opts, ad, name_pending ); } - Archive_reader ar( ad ); + Archive_reader ar( ad ); // serial reader Extended extended; // metadata from extended records int retval = 0; bool prev_extended = false; // prev header was extended @@ -406,12 +415,12 @@ int decode( const Cl_options & cl_opts ) { Tar_header header; const int ret = ar.read( header, header_size ); - if( ret != 0 ) { read_error( ar ); if( ret == 2 ) { retval = 2; break; } } - if( ret != 0 || !verify_ustar_chksum( header ) ) + if( ret != 0 ) { read_error( ar ); if( ar.fatal() ) { retval = ret; break; } } + if( ret != 0 || !verify_ustar_chksum( header ) ) // error or EOA { - if( ret == 0 && block_is_zero( header, header_size ) ) + if( ret == 0 && block_is_zero( header, header_size ) ) // EOA { - if( !prev_extended || cl_opts.permissive ) break; // EOF + if( !prev_extended || cl_opts.permissive ) break; show_file_error( ad.namep, fv_msg1 ); retval = 2; break; } @@ -427,23 +436,26 @@ int decode( const Cl_options & cl_opts ) if( prev_extended && !cl_opts.permissive ) { show_file_error( ad.namep, fv_msg2 ); retval = 2; break; } Extended dummy; // global headers are parsed and ignored - const int ret = ar.parse_records( dummy, header, grbuf, true ); - if( ret != 0 ) { show_file_error( ad.namep, gblrec_msg ); skip_warn(); - set_error_status( ret ); } + const int ret = ar.parse_records( dummy, header, grbuf, gblrec_msg, true ); + if( ret != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); + if( ar.fatal() ) { retval = ret; break; } + skip_warn(); set_error_status( ret ); } continue; } if( typeflag == tf_extended ) { if( prev_extended && !cl_opts.permissive ) { show_file_error( ad.namep, fv_msg3 ); retval = 2; break; } - const int ret = ar.parse_records( extended, header, grbuf, + const int ret = ar.parse_records( extended, header, grbuf, extrec_msg, cl_opts.permissive ); - if( ret != 0 ) { show_file_error( ad.namep, extrec_msg ); skip_warn(); - extended.reset(); set_error_status( ret ); } + if( ret != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); + if( ar.fatal() ) { retval = ret; break; } + skip_warn(); extended.reset(); set_error_status( ret ); } else if( !extended.crc_present() && cl_opts.missing_crc ) - { show_file_error( ad.namep, mcrc_msg ); retval = 2; break; } - prev_extended = true; - continue; + { show_file_error( ad.namep, miscrc_msg ); retval = 2; break; } + prev_extended = true; continue; } prev_extended = false; @@ -451,26 +463,30 @@ int decode( const Cl_options & cl_opts ) // members without name are skipped except when listing if( check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) ) - retval = skip_member( ar, extended ); - else if( cl_opts.program_mode == m_list ) - retval = list_member( ar, extended, header ); - else if( extended.path().empty() ) retval = skip_member( ar, extended ); - else if( cl_opts.program_mode == m_diff ) - retval = compare_member( cl_opts, ar, extended, header ); - else retval = extract_member( cl_opts, ar, extended, header ); + retval = skip_member( ar, extended, typeflag ); + else + { + print_removed_prefix( extended.removed_prefix ); + if( cl_opts.program_mode == m_list ) + retval = list_member( ar, extended, header ); + else if( extended.path().empty() ) + retval = skip_member( ar, extended, typeflag ); + else if( cl_opts.program_mode == m_diff ) + retval = compare_member( cl_opts, ar, extended, header ); + else retval = extract_member( cl_opts, ar, extended, header ); + } extended.reset(); if( retval ) { show_error( "Error is not recoverable: exiting now." ); break; } } - if( close( ad.infd ) != 0 && !retval ) - { show_file_error( ad.namep, "Error closing archive", errno ); - retval = 1; } + if( close( ad.infd ) != 0 && retval == 0 ) + { show_file_error( ad.namep, eclosa_msg, errno ); retval = 1; } if( retval == 0 ) for( int i = 0; i < cl_opts.parser.arguments(); ++i ) if( nonempty_arg( cl_opts.parser, i ) && name_pending[i] ) - { show_file_error( cl_opts.parser.argument( i ).c_str(), - "Not found in archive." ); retval = 1; } + { show_file_error( cl_opts.parser.argument( i ).c_str(), nfound_msg ); + retval = 1; } return final_exit_status( retval, cl_opts.program_mode != m_diff ); } diff --git a/decode.h b/decode.h new file mode 100644 index 0000000..45143fd --- /dev/null +++ b/decode.h @@ -0,0 +1,32 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +inline bool data_may_follow( const Typeflag typeflag ) + { return typeflag <= 0 || typeflag >= 7; } + +inline bool uid_gid_in_range( const long long uid, const long long gid ) + { return uid == (long long)( (uid_t)uid ) && + gid == (long long)( (gid_t)gid ); } + +const char * const dotdot_msg = "Contains a '..' component, skipping."; +const char * const intdir_msg = "Failed to create intermediate directory"; +const char * const cantln_msg = "Can't %slink '%s' to '%s'"; +const char * const mkdir_msg = "Can't create directory"; +const char * const mknod_msg = "Can't create device node"; +const char * const mkfifo_msg = "Can't create FIFO file"; +const char * const uftype_msg = "%s: Unknown file type '%c', skipping."; +const char * const chown_msg = "Can't change file owner"; diff --git a/decode_lz.cc b/decode_lz.cc index a941ace..8780eab 100644 --- a/decode_lz.cc +++ b/decode_lz.cc @@ -37,6 +37,7 @@ #include "arg_parser.h" #include "lzip_index.h" #include "archive_reader.h" +#include "decode.h" /* When a problem is detected by any worker: - the worker requests mastership and returns. @@ -48,14 +49,18 @@ namespace { const char * const other_msg = "Other worker found an error."; +/* line is preformatted and newline terminated except for prefix, error. + ok with an empty line is a no-op. */ struct Packet // member name and metadata or error message { - enum Status { ok, member_done, diag, error }; + enum Status { ok, member_done, diag, prefix, error1, error2 }; + long member_id; // lzip member containing the header of this tar member std::string line; // member name and metadata ready to print, if any Status status; // diagnostics and errors go to stderr - Packet( const long i, const char * const msg, const Status s = ok ) - : member_id( i ), line( msg ), status( s ) {} + int errcode; // for error + Packet( const long i, const char * const msg, const Status s, const int e ) + : member_id( i ), line( msg ), status( s ), errcode( e ) {} }; @@ -65,9 +70,9 @@ public: unsigned ocheck_counter; unsigned owait_counter; private: - long error_member_id; // first lzip member with error/misalign/eof + long error_member_id; // first lzip member with error/misalign/eoa/eof int deliver_worker_id; // worker queue currently delivering packets - int master_worker_id; // worker in charge if error/misalignment/eof + int master_worker_id; // worker in charge if error/misalign/eoa/eof std::vector< std::queue< const Packet * > > opacket_queues; int num_working; // number of workers still running const int num_workers; // number of workers @@ -76,7 +81,7 @@ private: pthread_cond_t oav_or_exit; // output packet available or all workers exited std::vector< pthread_cond_t > slot_av; // output slot available pthread_cond_t check_master; - bool eof_found_; + bool eoa_found_; // EOA blocks found Packet_courier( const Packet_courier & ); // declared as private void operator=( const Packet_courier & ); // declared as private @@ -87,7 +92,7 @@ public: error_member_id( -1 ), deliver_worker_id( 0 ), master_worker_id( -1 ), opacket_queues( workers ), num_working( workers ), num_workers( workers ), out_slots( slots ), slot_av( workers ), - eof_found_( false ) + eoa_found_( false ) { xinit_mutex( &omutex ); xinit_cond( &oav_or_exit ); for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] ); @@ -101,8 +106,8 @@ public: xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex ); } - bool eof_found() const { return eof_found_; } - void report_eof() { eof_found_ = true; } + bool eoa_found() const { return eoa_found_; } + void report_eoa() { eoa_found_ = true; } bool mastership_granted() const { return master_worker_id >= 0; } @@ -142,10 +147,10 @@ public: /* Collect a packet from a worker. If a packet is rejected, the worker must terminate. */ bool collect_packet( const long member_id, const int worker_id, - const char * const msg, - const Packet::Status status = Packet::ok ) + const char * const msg, const Packet::Status status, + const int errcode = 0 ) { - const Packet * const opacket = new Packet( member_id, msg, status ); + const Packet * const opacket = new Packet( member_id, msg, status, errcode ); xlock( &omutex ); if( ( mastership_granted() && master_worker_id != worker_id ) || ( error_member_id >= 0 && error_member_id < opacket->member_id ) ) @@ -228,19 +233,31 @@ public: }; -const char * skip_member_lz( Archive_reader_i & ar, Packet_courier & courier, - const Extended & extended, const long member_id, - const int worker_id ) +struct Trival // triple result value { - if( ar.skip_member( extended ) != 0 ) return ar.e_msg(); - if( !courier.collect_packet( member_id, worker_id, "", - ar.at_member_end() ? Packet::member_done : Packet::ok ) ) - return other_msg; - return 0; + const char * msg; + int errcode; + int retval; + explicit Trival( const char * const s = 0, const int e = 0, const int r = 0 ) + : msg( s ), errcode( e ), retval( r ) {} + }; + + +Trival skip_member_lz( Archive_reader_i & ar, Packet_courier & courier, + const Extended & extended, const long member_id, + const int worker_id, const Typeflag typeflag ) + { + if( data_may_follow( typeflag ) ) + { const int ret = ar.skip_member( extended ); + if( ret != 0 ) return Trival( ar.e_msg(), ar.e_code(), ret ); } + if( ar.at_member_end() && + !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) + return Trival( other_msg, 0, 1); + return Trival(); } -const char * compare_member_lz( const Cl_options & cl_opts, +Trival compare_member_lz( const Cl_options & cl_opts, Archive_reader_i & ar, Packet_courier & courier, const Extended & extended, const Tar_header header, Resizable_buffer & rbuf, const long member_id, @@ -248,7 +265,7 @@ const char * compare_member_lz( const Cl_options & cl_opts, { if( verbosity < 1 ) rbuf()[0] = 0; else if( !format_member_name( extended, header, rbuf, verbosity > 1 ) ) - return mem_msg; + return Trival( mem_msg, 0, 1 ); std::string estr, ostr; const bool stat_differs = !compare_file_type( estr, ostr, cl_opts, extended, header ); @@ -260,84 +277,95 @@ const char * compare_member_lz( const Cl_options & cl_opts, ostr.c_str(), Packet::ok ) ) || ( extended.file_size() <= 0 && ar.at_member_end() && !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) ) - return other_msg; - if( extended.file_size() <= 0 ) return 0; + return Trival( other_msg, 0, 1 ); + if( extended.file_size() <= 0 ) return Trival(); const Typeflag typeflag = (Typeflag)header[typeflag_o]; if( ( typeflag != tf_regular && typeflag != tf_hiperf ) || stat_differs ) - return skip_member_lz( ar, courier, extended, member_id, worker_id ); + return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag ); // else compare file contents const char * const filename = extended.path().c_str(); const int infd2 = open_instream( filename ); if( infd2 < 0 ) { set_error_status( 1 ); - return skip_member_lz( ar, courier, extended, member_id, worker_id ); } - int retval = compare_file_contents( estr, ostr, ar, extended.file_size(), - filename, infd2 ); - if( retval ) return ar.e_msg(); + return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag ); } + const int ret = compare_file_contents( estr, ostr, ar, extended.file_size(), + filename, infd2 ); + if( ret != 0 ) return Trival( ar.e_msg(), ar.e_code(), ret ); if( ( estr.size() && !courier.collect_packet( member_id, worker_id, estr.c_str(), Packet::diag ) ) || ( ostr.size() && !courier.collect_packet( member_id, worker_id, ostr.c_str(), Packet::ok ) ) || ( ar.at_member_end() && !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) ) - return other_msg; - return 0; + return Trival( other_msg, 0, 1 ); + return Trival(); } -const char * list_member_lz( Archive_reader_i & ar, Packet_courier & courier, - const Extended & extended, const Tar_header header, - Resizable_buffer & rbuf, const long member_id, - const int worker_id ) +Trival list_member_lz( Archive_reader_i & ar, Packet_courier & courier, + const Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const long member_id, + const int worker_id ) { if( verbosity < 0 ) rbuf()[0] = 0; else if( !format_member_name( extended, header, rbuf, verbosity > 0 ) ) - return mem_msg; - const int ret = ar.skip_member( extended ); // print name even on error + return Trival( mem_msg, 0, 1 ); + const int ret = data_may_follow( (Typeflag)header[typeflag_o] ) ? + ar.skip_member( extended ) : 0; // print name even on read error if( !courier.collect_packet( member_id, worker_id, rbuf(), ar.at_member_end() ? Packet::member_done : Packet::ok ) ) - return other_msg; - if( ret != 0 ) return ar.e_msg(); - return 0; + return Trival( other_msg, 0, 1 ); + if( ret != 0 ) return Trival( ar.e_msg(), ar.e_code(), ret ); + return Trival(); } -const char * extract_member_lz( const Cl_options & cl_opts, - Archive_reader_i & ar, Packet_courier & courier, - const Extended & extended, const Tar_header header, - Resizable_buffer & rbuf, const long member_id, - const int worker_id, Name_monitor & name_monitor ) +Trival extract_member_lz( const Cl_options & cl_opts, + Archive_reader_i & ar, Packet_courier & courier, + const Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const long member_id, + const int worker_id, Name_monitor & name_monitor ) { - // skip member if another copy is already being extracted by another thread - if( !name_monitor.reserve_name( worker_id, extended.path() ) ) - return skip_member_lz( ar, courier, extended, member_id, worker_id ); const char * const filename = extended.path().c_str(); + const Typeflag typeflag = (Typeflag)header[typeflag_o]; if( contains_dotdot( filename ) ) { - if( verbosity >= 0 ) - { std::string estr( extended.path() ); - estr += ": Contains a '..' component, skipping."; - if( !courier.collect_packet( member_id, worker_id, estr.c_str(), - Packet::diag ) ) return other_msg; } - return skip_member_lz( ar, courier, extended, member_id, worker_id ); + if( format_file_error( rbuf, filename, dotdot_msg ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag ); + } + // skip member if another copy is already being extracted by another thread + if( !name_monitor.reserve_name( worker_id, extended.path() ) ) + { + if( verbosity >= 3 && format_file_error( rbuf, filename, + "Is being extracted by another thread, skipping." ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag ); } mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits if( geteuid() != 0 && !cl_opts.preserve_permissions ) mode &= ~get_umask(); - const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits - const Typeflag typeflag = (Typeflag)header[typeflag_o]; - const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); int outfd = -1; - if( verbosity < 1 ) rbuf()[0] = 0; - else if( !format_member_name( extended, header, rbuf, verbosity > 1 ) ) - return mem_msg; - if( rbuf()[0] && !courier.collect_packet( member_id, worker_id, rbuf(), - Packet::ok ) ) return other_msg; + if( verbosity >= 1 ) + { + if( !format_member_name( extended, header, rbuf, verbosity > 1 ) ) + return Trival( mem_msg, 0, 1 ); + if( !courier.collect_packet( member_id, worker_id, rbuf(), Packet::ok ) ) + return Trival( other_msg, 0, 1 ); + } + /* Remove file before extraction to prevent following links. + Don't remove an empty dir because other thread may need it. */ if( typeflag != tf_directory ) std::remove( filename ); - if( !make_path( filename ) && verbosity >= 0 ) - { std::string estr( extended.path() ); - estr += ": warning: Failed to create intermediate directory."; - if( !courier.collect_packet( member_id, worker_id, estr.c_str(), - Packet::diag ) ) return other_msg; } + if( !make_path( filename ) ) + { + if( format_file_error( rbuf, filename, intdir_msg, errno ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag ); + } + switch( typeflag ) { case tf_regular: @@ -345,10 +373,12 @@ const char * extract_member_lz( const Cl_options & cl_opts, outfd = open_outstream( filename, true, &rbuf ); if( outfd < 0 ) { - if( verbosity >= 0 && !courier.collect_packet( member_id, worker_id, - rbuf(), Packet::diag ) ) return other_msg; - set_error_status( 2 ); - return skip_member_lz( ar, courier, extended, member_id, worker_id ); + if( verbosity >= 0 && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id, + typeflag ); } break; case tf_link: @@ -359,21 +389,11 @@ const char * extract_member_lz( const Cl_options & cl_opts, if( ( hard && link( linkname, filename ) != 0 ) || ( !hard && symlink( linkname, filename ) != 0 ) ) { - if( verbosity >= 0 ) - { - const int saved_errno = errno; - const int size = - snprintf( rbuf(), rbuf.size(), "Can't %slink '%s' to '%s': %s.\n", - hard ? "" : "sym", linkname, filename, - std::strerror( saved_errno ) ); - if( size > 0 && (unsigned)size > rbuf.size() && rbuf.resize( size ) ) - snprintf( rbuf(), rbuf.size(), "Can't %slink '%s' to '%s': %s.\n", - hard ? "" : "sym", linkname, filename, - std::strerror( saved_errno ) ); - if( verbosity >= 0 && !courier.collect_packet( member_id, worker_id, - rbuf(), Packet::diag ) ) return other_msg; - } - set_error_status( 2 ); + if( format_error( rbuf, errno, cantln_msg, hard ? "" : "sym", + linkname, filename ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); } } break; case tf_directory: @@ -384,12 +404,10 @@ const char * extract_member_lz( const Cl_options & cl_opts, { exists = false; std::remove( filename ); } if( !exists && mkdir( filename, mode ) != 0 && errno != EEXIST ) { - if( verbosity >= 0 ) - { snprintf( rbuf(), rbuf.size(), "%s: Can't create directory: %s\n", - filename, std::strerror( errno ) ); - if( !courier.collect_packet( member_id, worker_id, rbuf(), - Packet::diag ) ) return other_msg; } - set_error_status( 2 ); + if( format_file_error( rbuf, filename, mkdir_msg, errno ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); } } break; case tf_chardev: @@ -401,92 +419,93 @@ const char * extract_member_lz( const Cl_options & cl_opts, const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode; if( mknod( filename, dmode, dev ) != 0 ) { - if( verbosity >= 0 ) - { snprintf( rbuf(), rbuf.size(), "%s: Can't create device node: %s\n", - filename, std::strerror( errno ) ); - if( !courier.collect_packet( member_id, worker_id, rbuf(), - Packet::diag ) ) return other_msg; } - set_error_status( 2 ); + if( format_file_error( rbuf, filename, mknod_msg, errno ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); } break; } case tf_fifo: - if( mkfifo( filename, mode ) != 0 && errno != EEXIST ) + if( mkfifo( filename, mode ) != 0 ) { - if( verbosity >= 0 ) - { snprintf( rbuf(), rbuf.size(), "%s: Can't create FIFO file: %s\n", - filename, std::strerror( errno ) ); - if( !courier.collect_packet( member_id, worker_id, rbuf(), - Packet::diag ) ) return other_msg; } - set_error_status( 2 ); + if( format_file_error( rbuf, filename, mkfifo_msg, errno ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); } break; default: - if( verbosity >= 0 ) - { snprintf( rbuf(), rbuf.size(), - "File type '%c' not supported for file '%s'.\n", - typeflag, filename ); - if( !courier.collect_packet( member_id, worker_id, rbuf(), - Packet::diag ) ) return other_msg; } + if( format_error( rbuf, 0, uftype_msg, filename, typeflag ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); set_error_status( 2 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id, + typeflag ); } - const uid_t uid = (uid_t)parse_octal( header + uid_o, uid_l ); - const gid_t gid = (gid_t)parse_octal( header + gid_o, gid_l ); - if( !islink && chown( filename, uid, gid ) != 0 && - errno != EPERM && errno != EINVAL ) + const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); + errno = 0; + if( !islink && + ( !uid_gid_in_range( extended.get_uid(), extended.get_gid() ) || + chown( filename, extended.get_uid(), extended.get_gid() ) != 0 ) ) { - if( verbosity >= 0 ) - { snprintf( rbuf(), rbuf.size(), "%s: Can't change file owner: %s\n", - filename, std::strerror( errno ) ); - if( !courier.collect_packet( member_id, worker_id, rbuf(), - Packet::diag ) ) return other_msg; } - set_error_status( 2 ); + if( outfd >= 0 ) mode &= ~( S_ISUID | S_ISGID | S_ISVTX ); + // chown will in many cases return with EPERM, which can be safely ignored. + if( errno != EPERM && errno != EINVAL ) + { + if( format_file_error( rbuf, filename, chown_msg, errno ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); + } } - if( typeflag == tf_regular || typeflag == tf_directory || - typeflag == tf_hiperf ) fchmod( outfd, mode ); // ignore errors + if( outfd >= 0 ) fchmod( outfd, mode ); // ignore errors - const int bufsize = 32 * header_size; - uint8_t buf[bufsize]; - long long rest = extended.file_size(); - const int rem = rest % header_size; - const int padding = rem ? header_size - rem : 0; - while( rest > 0 ) + if( data_may_follow( typeflag ) ) { - const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; - const int ret = ar.read( buf, rsize ); - if( ret != 0 ) + const int bufsize = 32 * header_size; + uint8_t buf[bufsize]; + long long rest = extended.file_size(); + const int rem = rest % header_size; + const int padding = rem ? header_size - rem : 0; + while( rest > 0 ) { - if( outfd >= 0 ) + const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; + const int ret = ar.read( buf, rsize ); + if( ret != 0 ) { - if( cl_opts.keep_damaged ) - { writeblock( outfd, buf, std::min( rest, (long long)ar.e_size() ) ); - close( outfd ); } - else { close( outfd ); std::remove( filename ); } + if( outfd >= 0 ) + { + if( cl_opts.keep_damaged ) + { writeblock( outfd, buf, std::min( rest, (long long)ar.e_size() ) ); + close( outfd ); } + else { close( outfd ); std::remove( filename ); } + } + return Trival( ar.e_msg(), ar.e_code(), ret ); } - return ar.e_msg(); + const int wsize = ( rest >= bufsize ) ? bufsize : rest; + if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize ) + { format_file_error( rbuf, filename, werr_msg, errno ); + return Trival( rbuf(), 0, 1 ); } + rest -= wsize; } - const int wsize = ( rest >= bufsize ) ? bufsize : rest; - if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize ) - { snprintf( rbuf(), rbuf.size(), "%s: Error writing file: %s\n", - filename, std::strerror( errno ) ); return rbuf(); } - rest -= wsize; } if( outfd >= 0 && close( outfd ) != 0 ) - { snprintf( rbuf(), rbuf.size(), "%s: Error closing file: %s\n", - filename, std::strerror( errno ) ); return rbuf(); } + { format_file_error( rbuf, filename, eclosf_msg, errno ); + return Trival( rbuf(), 0, 1 ); } if( !islink ) { struct utimbuf t; - t.actime = mtime; - t.modtime = mtime; + t.actime = extended.atime().sec(); + t.modtime = extended.mtime().sec(); utime( filename, &t ); // ignore errors } if( ar.at_member_end() && !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) - return other_msg; - return 0; + return Trival( other_msg, 0, 1 ); + return Trival(); } @@ -521,7 +540,7 @@ extern "C" void * dworker( void * arg ) Archive_reader_i ar( ad ); // 1 of N parallel readers if( !rbuf.size() || ar.fatal() ) { if( courier.request_mastership( worker_id, worker_id ) ) - courier.collect_packet( worker_id, worker_id, mem_msg, Packet::error ); + courier.collect_packet( worker_id, worker_id, mem_msg, Packet::error1 ); goto done; } for( long i = worker_id; !master && i < ad.lzip_index.members(); i += num_workers ) @@ -545,7 +564,7 @@ extern "C" void * dworker( void * arg ) if( !courier.request_mastership( i, worker_id ) ) goto done; master = true; if( data_end >= ad.lzip_index.udata_size() ) - { courier.collect_packet( i, worker_id, end_msg, Packet::error ); + { courier.collect_packet( i, worker_id, end_msg, Packet::error2 ); goto done; } data_end = ad.lzip_index.udata_size(); if( ar.data_pos() == data_end && !prev_extended ) break; @@ -554,30 +573,31 @@ extern "C" void * dworker( void * arg ) const int ret = ar.read( header, header_size ); if( ret != 0 ) { if( courier.request_mastership( i, worker_id ) ) - courier.collect_packet( i, worker_id, ar.e_msg(), Packet::error ); + courier.collect_packet( i, worker_id, ar.e_msg(), + ( ret == 1 ) ? Packet::error1 : Packet::error2, ar.e_code() ); goto done; } - if( !verify_ustar_chksum( header ) ) + if( !verify_ustar_chksum( header ) ) // error or EOA { if( !courier.request_mastership( i, worker_id ) ) goto done; - if( block_is_zero( header, header_size ) ) // EOF + if( block_is_zero( header, header_size ) ) // EOA { - if( !prev_extended || cl_opts.permissive ) courier.report_eof(); - else courier.collect_packet( i, worker_id, fv_msg1, Packet::error ); + if( !prev_extended || cl_opts.permissive ) courier.report_eoa(); + else courier.collect_packet( i, worker_id, fv_msg1, Packet::error2 ); goto done; } courier.collect_packet( i, worker_id, ( ar.data_pos() > header_size ) ? - bad_hdr_msg : posix_lz_msg, Packet::error ); + bad_hdr_msg : posix_lz_msg, Packet::error2 ); goto done; } const Typeflag typeflag = (Typeflag)header[typeflag_o]; if( typeflag == tf_global ) { - const char * msg = 0; + const char * msg = 0; int ret = 2; Extended dummy; // global headers are parsed and ignored if( prev_extended && !cl_opts.permissive ) msg = fv_msg2; - else if( ar.parse_records( dummy, header, rbuf, true ) != 0 ) - msg = gblrec_msg; + else if( ( ret = ar.parse_records( dummy, header, rbuf, gblrec_msg, + true ) ) != 0 ) msg = ar.e_msg(); else { if( ar.data_pos() == data_end && // end of lzip member or EOF @@ -586,20 +606,22 @@ extern "C" void * dworker( void * arg ) continue; } if( courier.request_mastership( i, worker_id ) ) - courier.collect_packet( i, worker_id, msg, Packet::error ); + courier.collect_packet( i, worker_id, msg, ( ret == 1 ) ? + Packet::error1 : Packet::error2 ); goto done; } if( typeflag == tf_extended ) { - const char * msg = 0; + const char * msg = 0; int ret = 2; if( prev_extended && !cl_opts.permissive ) msg = fv_msg3; - else if( ar.parse_records( extended, header, rbuf, - cl_opts.permissive ) != 0 ) msg = extrec_msg; + else if( ( ret = ar.parse_records( extended, header, rbuf, extrec_msg, + cl_opts.permissive ) ) != 0 ) msg = ar.e_msg(); else if( !extended.crc_present() && cl_opts.missing_crc ) - msg = mcrc_msg; + { msg = miscrc_msg; ret = 2; } else { prev_extended = true; continue; } if( courier.request_mastership( i, worker_id ) ) - courier.collect_packet( i, worker_id, msg, Packet::error ); + courier.collect_packet( i, worker_id, msg, ( ret == 1 ) ? + Packet::error1 : Packet::error2 ); goto done; } prev_extended = false; @@ -609,21 +631,30 @@ extern "C" void * dworker( void * arg ) /* Skip members with an empty name in the ustar header. If there is an extended header in a previous lzip member, its worker will request mastership. Else the ustar-only unnamed member will be ignored. */ - const char * msg; + Trival trival; if( check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) ) - msg = skip_member_lz( ar, courier, extended, i, worker_id ); - else if( cl_opts.program_mode == m_list ) - msg = list_member_lz( ar, courier, extended, header, rbuf, i, worker_id ); - else if( extended.path().empty() ) - msg = skip_member_lz( ar, courier, extended, i, worker_id ); - else if( cl_opts.program_mode == m_diff ) - msg = compare_member_lz( cl_opts, ar, courier, extended, header, - rbuf, i, worker_id ); - else msg = extract_member_lz( cl_opts, ar, courier, extended, header, - rbuf, i, worker_id, name_monitor ); - if( msg ) // fatal error - { if( courier.request_mastership( i, worker_id ) ) - courier.collect_packet( i, worker_id, msg, Packet::error ); + trival = skip_member_lz( ar, courier, extended, i, worker_id, typeflag ); + else + { + std::string rpmsg; + if( print_removed_prefix( extended.removed_prefix, &rpmsg ) && + !courier.collect_packet( i, worker_id, rpmsg.c_str(), Packet::prefix ) ) + { trival = Trival( other_msg, 0, 1 ); goto fatal; } + if( cl_opts.program_mode == m_list ) + trival = list_member_lz( ar, courier, extended, header, rbuf, i, worker_id ); + else if( extended.path().empty() ) + trival = skip_member_lz( ar, courier, extended, i, worker_id, typeflag ); + else if( cl_opts.program_mode == m_diff ) + trival = compare_member_lz( cl_opts, ar, courier, extended, header, + rbuf, i, worker_id ); + else trival = extract_member_lz( cl_opts, ar, courier, extended, header, + rbuf, i, worker_id, name_monitor ); + } + if( trival.retval ) // fatal error +fatal: { if( courier.request_mastership( i, worker_id ) ) + courier.collect_packet( i, worker_id, trival.msg, + ( trival.retval == 1 ) ? Packet::error1 : Packet::error2, + trival.errcode ); goto done; } extended.reset(); } @@ -639,26 +670,28 @@ done: */ void muxer( const char * const archive_namep, Packet_courier & courier ) { - bool error = false; - while( !error ) + int retval = 0; + while( retval == 0 ) { const Packet * const opacket = courier.deliver_packet(); if( !opacket ) break; // queue is empty. all workers exited switch( opacket->status ) { - case Packet::error: - show_file_error( archive_namep, opacket->line.c_str() ); - error = true; break; + case Packet::error1: + case Packet::error2: + show_file_error( archive_namep, opacket->line.c_str(), opacket->errcode ); + retval = ( opacket->status == Packet::error1 ) ? 1 : 2; break; + case Packet::prefix: show_error( opacket->line.c_str() ); break; case Packet::diag: std::fputs( opacket->line.c_str(), stderr ); break; default: if( opacket->line.size() ) { std::fputs( opacket->line.c_str(), stdout ); std::fflush( stdout ); } } delete opacket; } - if( !error && !courier.eof_found() ) // no worker found EOF blocks - { show_file_error( archive_namep, end_msg ); error = true; } - if( error ) exit_fail_mt( 2 ); + if( retval == 0 && !courier.eoa_found() ) // no worker found EOA blocks + { show_file_error( archive_namep, end_msg ); retval = 2; } + if( retval ) exit_fail_mt( retval ); } } // end namespace @@ -710,13 +743,13 @@ int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, int retval = 0; if( close( ad.infd ) != 0 ) - { show_file_error( ad.namep, "Error closing archive", errno ); retval = 1; } + { show_file_error( ad.namep, eclosa_msg, errno ); retval = 1; } if( retval == 0 ) for( int i = 0; i < cl_opts.parser.arguments(); ++i ) if( nonempty_arg( cl_opts.parser, i ) && name_pending[i] ) - { show_file_error( cl_opts.parser.argument( i ).c_str(), - "Not found in archive." ); retval = 1; } + { show_file_error( cl_opts.parser.argument( i ).c_str(), nfound_msg ); + retval = 1; } if( cl_opts.debug_level & 1 ) std::fprintf( stderr, @@ -725,7 +758,6 @@ int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, courier.ocheck_counter, courier.owait_counter ); - Exclude::clear(); // avoid error with gcc 3.3.6 if( !courier.finished() ) internal_error( "courier not finished." ); return final_exit_status( retval, cl_opts.program_mode != m_diff ); } @@ -34,7 +34,7 @@ bool safe_seek( const int fd, const long long pos ) { if( lseek( fd, pos, SEEK_SET ) == pos ) return true; - show_error( "Seek error", errno ); return false; + show_error( seek_msg, errno ); return false; } @@ -49,7 +49,8 @@ int tail_copy( const Arg_parser & parser, const Archive_descriptor & ad, { show_file_error( ad.namep, "Error during tail copy." ); return retval ? retval : 1; } const long long ostream_pos = lseek( outfd, 0, SEEK_CUR ); - if( ostream_pos < 0 ) { show_error( "Seek error", errno ); retval = 1; } + if( ostream_pos < 0 ) + { show_file_error( ad.namep, seek_msg, errno ); retval = 1; } else if( ostream_pos > 0 && ostream_pos < ad.lzip_index.file_size() ) { int ret; @@ -62,15 +63,14 @@ int tail_copy( const Arg_parser & parser, const Archive_descriptor & ad, } } - if( ( close( outfd ) != 0 || close( ad.infd ) != 0 ) && !retval ) - { show_file_error( ad.namep, "Error closing archive", errno ); retval = 1; } + if( ( close( outfd ) | close( ad.infd ) ) != 0 && retval == 0 ) + { show_file_error( ad.namep, eclosa_msg, errno ); retval = 1; } - if( retval == 0 ) for( int i = 0; i < parser.arguments(); ++i ) - if( nonempty_arg( parser, i ) && name_pending[i] ) - { - show_file_error( parser.argument( i ).c_str(), "Not found in archive." ); - retval = 1; - } + if( retval == 0 ) + for( int i = 0; i < parser.arguments(); ++i ) + if( nonempty_arg( parser, i ) && name_pending[i] ) + { show_file_error( parser.argument( i ).c_str(), nfound_msg ); + retval = 1; } return retval; } @@ -101,33 +101,36 @@ int delete_members( const Cl_options & cl_opts ) if( !ad.seekable ) { show_file_error( ad.namep, "Archive is not seekable." ); return 1; } if( ad.lzip_index.file_size() < 3 * header_size ) - { show_file_error( ad.namep, posix_msg ); return 2; } + { show_file_error( ad.namep, has_lz_ext( ad.name ) ? posix_lz_msg : posix_msg ); + return 2; } // archive is uncompressed seekable, unless compressed corrupt - Archive_reader ar( ad ); + Archive_reader ar( ad ); // serial reader Resizable_buffer rbuf; long long istream_pos = 0; // source of next data move long long member_begin = 0; // first pos of current tar member Extended extended; // metadata from extended records int retval = 0; bool prev_extended = false; // prev header was extended + if( !rbuf.size() ) { show_error( mem_msg ); return 1; } + while( true ) // process one tar header per iteration { if( !prev_extended && ( member_begin = lseek( ad.infd, 0, SEEK_CUR ) ) < 0 ) - { show_error( "Seek error", errno ); retval = 1; break; } + { show_file_error( ad.namep, seek_msg, errno ); retval = 1; break; } Tar_header header; - const int ret = ar.read( header, header_size ); - if( ret != 0 ) { show_file_error( ar.ad.namep, ar.e_msg(), ar.e_code() ); - retval = ret; break; } - if( !verify_ustar_chksum( header ) ) + if( ( retval = ar.read( header, header_size ) ) != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); break; } + if( !verify_ustar_chksum( header ) ) // error or EOA { - if( block_is_zero( header, header_size ) ) // EOF + if( block_is_zero( header, header_size ) ) // EOA { if( prev_extended && !cl_opts.permissive ) { show_file_error( ad.namep, fv_msg1 ); retval = 2; } break; } - show_file_error( ad.namep, "Corrupt header in archive." ); + // posix format already verified by archive reader + show_file_error( ad.namep, bad_hdr_msg ); retval = 2; break; } @@ -137,43 +140,38 @@ int delete_members( const Cl_options & cl_opts ) if( prev_extended && !cl_opts.permissive ) { show_file_error( ad.namep, fv_msg2 ); retval = 2; break; } Extended dummy; // global headers are parsed and ignored - const int ret = ar.parse_records( dummy, header, rbuf, true ); - if( ret != 0 ) - { show_file_error( ad.namep, gblrec_msg ); retval = ret; break; } + retval = ar.parse_records( dummy, header, rbuf, gblrec_msg, true ); + if( retval ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); break; } continue; } if( typeflag == tf_extended ) { if( prev_extended && !cl_opts.permissive ) { show_file_error( ad.namep, fv_msg3 ); retval = 2; break; } - const int ret = ar.parse_records( extended, header, rbuf, - cl_opts.permissive ); - if( ret != 0 ) - { show_file_error( ad.namep, extrec_msg ); retval = ret; break; } - else if( !extended.crc_present() && cl_opts.missing_crc ) - { show_file_error( ad.namep, mcrc_msg ); retval = 2; break; } - prev_extended = true; - continue; + if( ( retval = ar.parse_records( extended, header, rbuf, extrec_msg, + cl_opts.permissive ) ) != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); break; } + if( !extended.crc_present() && cl_opts.missing_crc ) + { show_file_error( ad.namep, miscrc_msg ); retval = 2; break; } + prev_extended = true; continue; } prev_extended = false; extended.fill_from_ustar( header ); // copy metadata from header - { - const int ret = ar.skip_member( extended ); - if( ret != 0 ) - { show_file_error( ad.namep, "Seek error", errno ); retval = ret; break; } - } + if( ( retval = ar.skip_member( extended ) ) != 0 ) + { show_file_error( ad.namep, seek_msg, errno ); break; } // delete tar member if( !check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) ) { + print_removed_prefix( extended.removed_prefix ); if( !show_member_name( extended, header, 1, rbuf ) ) { retval = 1; break; } const long long pos = lseek( ad.infd, 0, SEEK_CUR ); if( pos <= 0 || pos <= member_begin || member_begin < istream_pos ) - { show_file_error( ad.namep, "Seek error", errno ); - retval = 1; break; } + { show_file_error( ad.namep, seek_msg, errno ); retval = 1; break; } const long long size = member_begin - istream_pos; if( size > 0 ) // move pending data each time a member is deleted { diff --git a/delete_lz.cc b/delete_lz.cc index 9b2b34f..2e536e3 100644 --- a/delete_lz.cc +++ b/delete_lz.cc @@ -39,8 +39,8 @@ int delete_members_lz( const Cl_options & cl_opts, std::vector< char > & name_pending, const int outfd ) { + Archive_reader_i ar( ad ); // indexed reader Resizable_buffer rbuf; - Archive_reader_i ar( ad ); // indexed reader if( !rbuf.size() || ar.fatal() ) { show_error( mem_msg ); return 1; } long long istream_pos = 0; // source of next data move @@ -63,16 +63,17 @@ int delete_members_lz( const Cl_options & cl_opts, } if( !prev_extended ) member_begin = ar.data_pos(); Tar_header header; - retval = ar.read( header, header_size ); - if( retval != 0 ) { show_file_error( ad.namep, ar.e_msg() ); goto done; } - if( !verify_ustar_chksum( header ) ) + if( ( retval = ar.read( header, header_size ) ) != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); goto done; } + if( !verify_ustar_chksum( header ) ) // error or EOA { - if( block_is_zero( header, header_size ) ) // EOF + if( block_is_zero( header, header_size ) ) // EOA { if( prev_extended && !cl_opts.permissive ) { show_file_error( ad.namep, fv_msg1 ); retval = 2; } goto done; } + // indexed archive reader does not verify posix format show_file_error( ad.namep, ( ar.data_pos() > header_size ) ? bad_hdr_msg : posix_lz_msg ); retval = 2; @@ -85,23 +86,21 @@ int delete_members_lz( const Cl_options & cl_opts, if( prev_extended && !cl_opts.permissive ) { show_file_error( ad.namep, fv_msg2 ); retval = 2; goto done; } Extended dummy; // global headers are parsed and ignored - retval = ar.parse_records( dummy, header, rbuf, true ); - if( retval == 0 ) continue; - show_file_error( ad.namep, gblrec_msg ); - goto done; + retval = ar.parse_records( dummy, header, rbuf, gblrec_msg, true ); + if( retval ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); goto done; } + continue; } if( typeflag == tf_extended ) { - const char * msg = 0; if( prev_extended && !cl_opts.permissive ) - { msg = fv_msg3; retval = 2; } - else retval = ar.parse_records( extended, header, rbuf, - cl_opts.permissive ); - if( retval == 0 && !extended.crc_present() && cl_opts.missing_crc ) - { msg = mcrc_msg; retval = 2; } - if( retval == 0 ) { prev_extended = true; continue; } - show_file_error( ad.namep, msg ? msg : extrec_msg ); - goto done; + { show_file_error( ad.namep, fv_msg3 ); retval = 2; goto done; } + if( ( retval = ar.parse_records( extended, header, rbuf, extrec_msg, + cl_opts.permissive ) ) != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); goto done; } + if( !extended.crc_present() && cl_opts.missing_crc ) + { show_file_error( ad.namep, miscrc_msg ); retval = 2; goto done; } + prev_extended = true; continue; } prev_extended = false; @@ -112,6 +111,7 @@ int delete_members_lz( const Cl_options & cl_opts, // delete tar member if( !check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) ) { + print_removed_prefix( extended.removed_prefix ); // verify that members match if( member_begin != ad.lzip_index.dblock( i ).pos() || !ar.at_member_end() ) { show_file_error( extended.path().c_str(), diff --git a/doc/tarlz.1 b/doc/tarlz.1 index d7cc093..d23b164 100644 --- a/doc/tarlz.1 +++ b/doc/tarlz.1 @@ -1,10 +1,10 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH TARLZ "1" "January 2022" "tarlz 0.22" "User Commands" +.TH TARLZ "1" "September 2022" "tarlz 0.23" "User Commands" .SH NAME tarlz \- creates tar archives with multimember lzip compression .SH SYNOPSIS .B tarlz -[\fI\,options\/\fR] [\fI\,files\/\fR] +\fI\,operation \/\fR[\fI\,options\/\fR] [\fI\,files\/\fR] .SH DESCRIPTION Tarlz is a massively parallel (multi\-threaded) combined implementation of the tar archiver and the lzip compressor. Tarlz uses the compression library @@ -28,7 +28,7 @@ archive, skipping over the damaged members, just like the standard (uncompressed) tar. Moreover, the option '\-\-keep\-damaged' can be used to recover as much data as possible from each damaged member, and lziprecover can be used to recover some of the damaged members. -.SH OPTIONS +.SS "Operations:" .TP \fB\-\-help\fR display this help and exit @@ -39,26 +39,36 @@ output version information and exit \fB\-A\fR, \fB\-\-concatenate\fR append archives to the end of an archive .TP -\fB\-B\fR, \fB\-\-data\-size=\fR<bytes> -set target size of input data blocks [2x8=16 MiB] -.TP \fB\-c\fR, \fB\-\-create\fR create a new archive .TP -\fB\-C\fR, \fB\-\-directory=\fR<dir> -change to directory <dir> -.TP \fB\-d\fR, \fB\-\-diff\fR find differences between archive and file system .TP -\fB\-\-ignore\-ids\fR -ignore differences in owner and group IDs -.TP \fB\-\-delete\fR delete files/directories from an archive .TP -\fB\-\-exclude=\fR<pattern> -exclude files matching a shell pattern +\fB\-r\fR, \fB\-\-append\fR +append files to the end of an archive +.TP +\fB\-t\fR, \fB\-\-list\fR +list the contents of an archive +.TP +\fB\-x\fR, \fB\-\-extract\fR +extract files/directories from an archive +.TP +\fB\-z\fR, \fB\-\-compress\fR +compress existing POSIX tar archives +.TP +\fB\-\-check\-lib\fR +check version of lzlib and exit +.SH OPTIONS +.TP +\fB\-B\fR, \fB\-\-data\-size=\fR<bytes> +set target size of input data blocks [2x8=16 MiB] +.TP +\fB\-C\fR, \fB\-\-directory=\fR<dir> +change to directory <dir> .TP \fB\-f\fR, \fB\-\-file=\fR<archive> use archive file <archive> @@ -66,9 +76,6 @@ use archive file <archive> \fB\-h\fR, \fB\-\-dereference\fR follow symlinks; archive the files they point to .TP -\fB\-\-mtime=\fR<date> -use <date> as mtime for files added to archive -.TP \fB\-n\fR, \fB\-\-threads=\fR<n> set number of (de)compression threads [2] .TP @@ -81,21 +88,9 @@ don't subtract the umask on extraction \fB\-q\fR, \fB\-\-quiet\fR suppress all messages .TP -\fB\-r\fR, \fB\-\-append\fR -append files to the end of an archive -.TP -\fB\-t\fR, \fB\-\-list\fR -list the contents of an archive -.TP \fB\-v\fR, \fB\-\-verbose\fR verbosely list files processed .TP -\fB\-x\fR, \fB\-\-extract\fR -extract files/directories from an archive -.TP -\fB\-z\fR, \fB\-\-compress\fR -compress existing POSIX tar archives -.TP \fB\-0\fR .. \fB\-9\fR set compression level [default 6] .TP @@ -126,32 +121,44 @@ use <owner> name/ID for files added to archive \fB\-\-group=\fR<group> use <group> name/ID for files added to archive .TP +\fB\-\-exclude=\fR<pattern> +exclude files matching a shell pattern +.TP +\fB\-\-ignore\-ids\fR +ignore differences in owner and group IDs +.TP +\fB\-\-ignore\-overflow\fR +ignore mtime overflow differences on 32\-bit +.TP \fB\-\-keep\-damaged\fR don't delete partially extracted files .TP \fB\-\-missing\-crc\fR exit with error status if missing extended CRC .TP +\fB\-\-mtime=\fR<date> +use <date> as mtime for files added to archive +.TP \fB\-\-out\-slots=\fR<n> number of 1 MiB output packets buffered [64] .TP -\fB\-\-check\-lib\fR -compare version of lzlib.h with liblz.{a,so} -.TP \fB\-\-warn\-newer\fR warn if any file is newer than the archive .PP -Exit status: 0 for a normal exit, 1 for environmental problems (file not -found, files differ, invalid flags, I/O errors, etc), 2 to indicate a -corrupt or invalid input file, 3 for an internal consistency error (e.g. bug) -which caused tarlz to panic. +If no archive is specified, tarlz tries to read it from standard input or +write it to standard output. +.PP +Exit status: 0 for a normal exit, 1 for environmental problems +(file not found, files differ, invalid command line options, I/O errors, +etc), 2 to indicate a corrupt or invalid input file, 3 for an internal +consistency error (e.g., bug) which caused tarlz to panic. .SH "REPORTING BUGS" Report bugs to lzip\-bug@nongnu.org .br Tarlz home page: http://www.nongnu.org/lzip/tarlz.html .SH COPYRIGHT Copyright \(co 2022 Antonio Diaz Diaz. -Using lzlib 1.13\-rc1 +Using lzlib 1.13 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. diff --git a/doc/tarlz.info b/doc/tarlz.info index 79661cc..d71c0a3 100644 --- a/doc/tarlz.info +++ b/doc/tarlz.info @@ -1,6 +1,6 @@ This is tarlz.info, produced by makeinfo version 4.13+ from tarlz.texi. -INFO-DIR-SECTION Data Compression +INFO-DIR-SECTION Archiving START-INFO-DIR-ENTRY * Tarlz: (tarlz). Archiver with multimember lzip compression END-INFO-DIR-ENTRY @@ -11,7 +11,7 @@ File: tarlz.info, Node: Top, Next: Introduction, Up: (dir) Tarlz Manual ************ -This manual is for Tarlz (version 0.22, 5 January 2022). +This manual is for Tarlz (version 0.23, 23 September 2022). * Menu: @@ -69,9 +69,9 @@ archive, but it has the following advantages: * The resulting multimember tar.lz archive can be decompressed in parallel, multiplying the decompression speed. - * New members can be appended to the archive (by removing the EOF - member), and unwanted members can be deleted from the archive. Just - like an uncompressed tar archive. + * New members can be appended to the archive (by removing the + end-of-archive member), and unwanted members can be deleted from the + archive. Just like an uncompressed tar archive. * It is a safe POSIX-style backup format. In case of corruption, tarlz can extract all the undamaged members from the tar.lz archive, @@ -99,19 +99,24 @@ File: tarlz.info, Node: Invoking tarlz, Next: Portable character set, Prev: I The format for running tarlz is: - tarlz [OPTIONS] [FILES] + tarlz OPERATION [OPTIONS] [FILES] All operations except '--concatenate' and '--compress' operate on whole trees if any FILE is a directory. All operations except '--compress' -overwrite output files without warning. - - On archive creation or appending tarlz archives the files specified, but -removes from member names any leading and trailing slashes and any file name -prefixes containing a '..' component. On extraction, leading and trailing -slashes are also removed from member names, and archive members containing -a '..' component in the file name are skipped. Tarlz detects when the -archive being created or enlarged is among the files to be dumped, appended -or concatenated, and skips it. +overwrite output files without warning. If no archive is specified, tarlz +tries to read it from standard input or write it to standard output. Tarlz +refuses to read archive data from a terminal or write archive data to a +terminal. Tarlz detects when the archive being created or enlarged is among +the files to be archived, appended, or concatenated, and skips it. + + Tarlz does not use absolute file names nor file names above the current +working directory (perhaps changed by option '-C'). On archive creation or +appending tarlz archives the files specified, but removes from member names +any leading and trailing slashes and any file name prefixes containing a +'..' component. On extraction, leading and trailing slashes are also +removed from member names, and archive members containing a '..' component +in the file name are skipped. Tarlz does not follow symbolic links during +extraction; not even symbolic links replacing intermediate directories. On extraction and listing, tarlz removes leading './' strings from member names in the archive or given in the command line, so that @@ -122,8 +127,7 @@ member names in the archive or given in the command line, so that setting is used. For example '-9 --solid --uncompressed -1' is equivalent to '-1 --solid'. - tarlz supports the following options: *Note Argument syntax: -(arg_parser)Argument syntax. + tarlz supports the following operations: '--help' Print an informative help message describing the options and exit. @@ -140,39 +144,22 @@ to '-1 --solid'. standard output. All the archives involved must be regular (seekable) files, and must be either all compressed or all uncompressed. Compressed and uncompressed archives can't be mixed. Compressed - archives must be multimember lzip files with the two end-of-file + archives must be multimember lzip files with the two end-of-archive blocks plus any zero padding contained in the last lzip member of each - archive. The intermediate end-of-file blocks are removed as each new - archive is concatenated. If the archive is uncompressed, tarlz parses - and skips tar headers until it finds the end-of-file blocks. Exit with + archive. The intermediate end-of-archive blocks are removed as each + new archive is concatenated. If the archive is uncompressed, tarlz + parses tar headers until it finds the end-of-archive blocks. Exit with status 0 without modifying the archive if no FILES have been specified. -'-B BYTES' -'--data-size=BYTES' - Set target size of input data blocks for the option '--bsolid'. *Note - --bsolid::. Valid values range from 8 KiB to 1 GiB. Default value is - two times the dictionary size, except for option '-0' where it - defaults to 1 MiB. *Note Minimum archive sizes::. + Concatenating archives containing files in common results in two or + more tar members with the same name in the resulting archive, which + may produce nondeterministic behavior during multi-threaded extraction. + *Note mt-extraction::. '-c' '--create' Create a new archive from FILES. -'-C DIR' -'--directory=DIR' - Change to directory DIR. When creating or appending, the position of - each '-C' option in the command line is significant; it will change the - current working directory for the following FILES until a new '-C' - option appears in the command line. When extracting or comparing, all - the '-C' options are executed in sequence before reading the archive. - Listing ignores any '-C' options specified. DIR is relative to the - then current working directory, perhaps changed by a previous '-C' - option. - - Note that a process can only have one current working directory (CWD). - Therefore multi-threading can't be used to create an archive if a '-C' - option appears after a relative file name in the command line. - '-d' '--diff' Compare and report differences between archive and file system. For @@ -188,10 +175,6 @@ to '-1 --solid'. on archive creation: 'tarlz -C / -d'. Alternatively, tarlz may be run from the root directory to perform the comparison. -'--ignore-ids' - Make '--diff' ignore differences in owner and group IDs. This option is - useful when comparing an '--anonymous' archive. - '--delete' Delete files and directories from an archive in place. It currently can delete only from uncompressed archives and from archives with files @@ -210,12 +193,102 @@ to '-1 --solid'. be dangerous. A corrupt archive, a power cut, or an I/O error may cause data loss. -'--exclude=PATTERN' - Exclude files matching a shell pattern like '*.o'. A file is considered - to match if any component of the file name matches. For example, '*.o' - matches 'foo.o', 'foo.o/bar' and 'foo/bar.o'. If PATTERN contains a - '/', it matches a corresponding '/' in the file name. For example, - 'foo/*.o' matches 'foo/bar.o'. +'-r' +'--append' + Append files to the end of an archive. The archive must be a regular + (seekable) file either compressed or uncompressed. Compressed members + can't be appended to an uncompressed archive, nor vice versa. If the + archive is compressed, it must be a multimember lzip file with the two + end-of-archive blocks plus any zero padding contained in the last lzip + member of the archive. It is possible to append files to an archive + with a different compression granularity. Appending works as follows; + first the end-of-archive blocks are removed, then the new members are + appended, and finally two new end-of-archive blocks are appended to + the archive. If the archive is uncompressed, tarlz parses and skips + tar headers until it finds the end-of-archive blocks. Exit with status + 0 without modifying the archive if no FILES have been specified. + + Appending files already present in the archive results in two or more + tar members with the same name, which may produce nondeterministic + behavior during multi-threaded extraction. *Note mt-extraction::. + +'-t' +'--list' + List the contents of an archive. If FILES are given, list only the + FILES given. + +'-x' +'--extract' + Extract files from an archive. If FILES are given, extract only the + FILES given. Else extract all the files in the archive. To extract a + directory without extracting the files under it, use + 'tarlz -xf foo --exclude='dir/*' dir'. Tarlz removes files and empty + directories unconditionally before extracting over them. Other than + that, it will not make any special effort to extract a file over an + incompatible type of file. For example, extracting a file over a + non-empty directory will usually fail. + +'-z' +'--compress' + Compress existing POSIX tar archives aligning the lzip members to the + tar members with choice of granularity (--bsolid by default, --dsolid + works like --asolid). The input archives are kept unchanged. Existing + compressed archives are not overwritten. A hyphen '-' used as the name + of an input archive reads from standard input and writes to standard + output (unless the option '--output' is used). Tarlz can be used as + compressor for GNU tar using a command like + 'tar -c -Hustar foo | tarlz -z -o foo.tar.lz'. Note that tarlz only + works reliably on archives without global headers, or with global + headers whose content can be ignored. + + The compression is reversible, including any garbage present after the + end-of-archive blocks. Tarlz stops parsing after the first + end-of-archive block is found, and then compresses the rest of the + archive. Unless solid compression is requested, the end-of-archive + blocks are compressed in a lzip member separated from the preceding + members and from any non-zero garbage following the end-of-archive + blocks. '--compress' implies plzip argument style, not tar style. Each + input archive is compressed to a file with the extension '.lz' added + unless the option '--output' is used. When '--output' is used, only + one input archive can be specified. '-f' can't be used with + '--compress'. + +'--check-lib' + Compare the version of lzlib used to compile tarlz with the version + actually being used at run time and exit. Report any differences + found. Exit with error status 1 if differences are found. A mismatch + may indicate that lzlib is not correctly installed or that a different + version of lzlib has been installed after compiling tarlz. Exit with + error status 2 if LZ_API_VERSION and LZ_version_string don't match. + 'tarlz -v --check-lib' shows the version of lzlib being used and the + value of LZ_API_VERSION (if defined). *Note Library version: + (lzlib)Library version. + + + tarlz supports the following options: *Note Argument syntax: +(arg_parser)Argument syntax. + +'-B BYTES' +'--data-size=BYTES' + Set target size of input data blocks for the option '--bsolid'. *Note + --bsolid::. Valid values range from 8 KiB to 1 GiB. Default value is + two times the dictionary size, except for option '-0' where it + defaults to 1 MiB. *Note Minimum archive sizes::. + +'-C DIR' +'--directory=DIR' + Change to directory DIR. When creating or appending, the position of + each '-C' option in the command line is significant; it will change the + current working directory for the following FILES until a new '-C' + option appears in the command line. When extracting or comparing, all + the '-C' options are executed in sequence before reading the archive. + Listing ignores any '-C' options specified. DIR is relative to the + then current working directory, perhaps changed by a previous '-C' + option. + + Note that a process can only have one current working directory (CWD). + Therefore multi-threading can't be used to create an archive if a '-C' + option appears after a relative file name in the command line. '-f ARCHIVE' '--file=ARCHIVE' @@ -228,14 +301,6 @@ to '-1 --solid'. Archive or compare the files they point to instead of the links themselves. -'--mtime=DATE' - When creating or appending, use DATE as the modification time for - files added to the archive instead of their actual modification times. - The value of DATE may be either '@' followed by the number of seconds - since the epoch, or a date in format 'YYYY-MM-DD HH:MM:SS', or the - name of an existing file starting with '.' or '/'. In the latter case, - the modification time of that file is used. - '-n N' '--threads=N' Set the number of (de)compression threads, overriding the system's @@ -268,65 +333,11 @@ to '-1 --solid'. '--quiet' Quiet operation. Suppress all messages. -'-r' -'--append' - Append files to the end of an archive. The archive must be a regular - (seekable) file either compressed or uncompressed. Compressed members - can't be appended to an uncompressed archive, nor vice versa. If the - archive is compressed, it must be a multimember lzip file with the two - end-of-file blocks plus any zero padding contained in the last lzip - member of the archive. It is possible to append files to an archive - with a different compression granularity. Appending works as follows; - first the end-of-file blocks are removed, then the new members are - appended, and finally two new end-of-file blocks are appended to the - archive. If the archive is uncompressed, tarlz parses and skips tar - headers until it finds the end-of-file blocks. Exit with status 0 - without modifying the archive if no FILES have been specified. - -'-t' -'--list' - List the contents of an archive. If FILES are given, list only the - FILES given. - '-v' '--verbose' Verbosely list files processed. Further -v's (up to 4) increase the verbosity level. -'-x' -'--extract' - Extract files from an archive. If FILES are given, extract only the - FILES given. Else extract all the files in the archive. To extract a - directory without extracting the files under it, use - 'tarlz -xf foo --exclude='dir/*' dir'. Tarlz will not make any special - effort to extract a file over an incompatible type of file. For - example, extracting a link over a directory will usually fail. - (Principle of least surprise). - -'-z' -'--compress' - Compress existing POSIX tar archives aligning the lzip members to the - tar members with choice of granularity (--bsolid by default, --dsolid - works like --asolid). The input archives are kept unchanged. Existing - compressed archives are not overwritten. A hyphen '-' used as the name - of an input archive reads from standard input and writes to standard - output (unless the option '--output' is used). Tarlz can be used as - compressor for GNU tar using a command like - 'tar -c -Hustar foo | tarlz -z -o foo.tar.lz'. Note that tarlz only - works reliably on archives without global headers, or with global - headers whose content can be ignored. - - The compression is reversible, including any garbage present after the - EOF blocks. Tarlz stops parsing after the first EOF block is found, - and then compresses the rest of the archive. Unless solid compression - is requested, the EOF blocks are compressed in a lzip member separated - from the preceding members and from any non-zero garbage following the - EOF blocks. '--compress' implies plzip argument style, not tar style. - Each input archive is compressed to a file with the extension '.lz' - added unless the option '--output' is used. When '--output' is used, - only one input archive can be specified. '-f' can't be used with - '--compress'. - '-0 .. -9' Set the compression level for '--create', '--append', and '--compress'. The default compression level is '-6'. Like lzip, tarlz @@ -354,8 +365,8 @@ to '-1 --solid'. '--asolid' When creating or appending to a compressed archive, use appendable solid compression. All the files being added to the archive are - compressed into a single lzip member, but the end-of-file blocks are - compressed into a separate lzip member. This creates a solidly + compressed into a single lzip member, but the end-of-archive blocks + are compressed into a separate lzip member. This creates a solidly compressed appendable archive. Solid archives can't be created nor decoded in parallel. @@ -375,20 +386,20 @@ to '-1 --solid'. When creating or appending to a compressed archive, compress each file specified in the command line separately in its own lzip member, and use solid compression for each directory specified in the command - line. The end-of-file blocks are compressed into a separate lzip + line. The end-of-archive blocks are compressed into a separate lzip member. This creates a compressed appendable archive with a separate lzip member for each file or top-level directory specified. '--no-solid' When creating or appending to a compressed archive, compress each file - separately in its own lzip member. The end-of-file blocks are + separately in its own lzip member. The end-of-archive blocks are compressed into a separate lzip member. This creates a compressed appendable archive with a lzip member for each file. '--solid' When creating or appending to a compressed archive, use solid compression. The files being added to the archive, along with the - end-of-file blocks, are compressed into a single lzip member. The + end-of-archive blocks, are compressed into a single lzip member. The resulting archive is not appendable. No more files can be later appended to the archive. Solid archives can't be created nor decoded in parallel. @@ -406,22 +417,50 @@ to '-1 --solid'. If GROUP is not a valid group name, it is decoded as a decimal numeric group ID. +'--exclude=PATTERN' + Exclude files matching a shell pattern like '*.o'. A file is considered + to match if any component of the file name matches. For example, '*.o' + matches 'foo.o', 'foo.o/bar' and 'foo/bar.o'. If PATTERN contains a + '/', it matches a corresponding '/' in the file name. For example, + 'foo/*.o' matches 'foo/bar.o'. Multiple '--exclude' options can be + specified. + +'--ignore-ids' + Make '--diff' ignore differences in owner and group IDs. This option is + useful when comparing an '--anonymous' archive. + +'--ignore-overflow' + Make '--diff' ignore differences in mtime caused by overflow on 32-bit + systems with a 32-bit time_t. + '--keep-damaged' Don't delete partially extracted files. If a decompression error happens while extracting a file, keep the partial data extracted. Use this option to recover as much data as possible from each damaged member. It is recommended to run tarlz in single-threaded mode - (-threads=0) when using this option. + (--threads=0) when using this option. '--missing-crc' - Exit with error status 2 if the CRC of the extended records is missing. - When this option is used, tarlz detects any corruption in the extended - records (only limited by CRC collisions). But note that a corrupt - 'GNU.crc32' keyword, for example 'GNU.crc33', is reported as a missing - CRC instead of as a corrupt record. This misleading 'Missing CRC' - message is the consequence of a flaw in the POSIX pax format; i.e., - the lack of a mandatory check sequence in the extended records. *Note - crc32::. + Exit with error status 2 if the CRC of the extended records is + missing. When this option is used, tarlz detects any corruption in the + extended records (only limited by CRC collisions). But note that a + corrupt 'GNU.crc32' keyword, for example 'GNU.crc33', is reported as a + missing CRC instead of as a corrupt record. This misleading + 'Missing CRC' message is the consequence of a flaw in the POSIX pax + format; i.e., the lack of a mandatory check sequence of the extended + records. *Note crc32::. + +'--mtime=DATE' + When creating or appending, use DATE as the modification time for + files added to the archive instead of their actual modification times. + The value of DATE may be either '@' followed by the number of seconds + since (or before) the epoch, or a date in format + '[-]YYYY-MM-DD HH:MM:SS' or '[-]YYYY-MM-DDTHH:MM:SS', or the name of + an existing reference file starting with '.' or '/' whose modification + time is used. The time of day 'HH:MM:SS' in the date format is + optional and defaults to '00:00:00'. The epoch is + '1970-01-01 00:00:00 UTC'. Negative seconds or years define a + modification time before the epoch. '--out-slots=N' Number of 1 MiB output packets buffered per worker thread during @@ -431,17 +470,6 @@ to '-1 --solid'. more memory. Valid values range from 1 to 1024. The default value is 64. -'--check-lib' - Compare the version of lzlib used to compile tarlz with the version - actually being used at run time and exit. Report any differences - found. Exit with error status 1 if differences are found. A mismatch - may indicate that lzlib is not correctly installed or that a different - version of lzlib has been installed after compiling tarlz. Exit with - error status 2 if LZ_API_VERSION and LZ_version_string don't match. - 'tarlz -v --check-lib' shows the version of lzlib being used and the - value of LZ_API_VERSION (if defined). *Note Library version: - (lzlib)Library version. - '--warn-newer' During archive creation, warn if any file being archived has a modification time newer than the archive creation time. This option @@ -453,9 +481,9 @@ to '-1 --solid'. Exit status: 0 for a normal exit, 1 for environmental problems (file not -found, files differ, invalid flags, I/O errors, etc), 2 to indicate a -corrupt or invalid input file, 3 for an internal consistency error (e.g. -bug) which caused tarlz to panic. +found, files differ, invalid command line options, I/O errors, etc), 2 to +indicate a corrupt or invalid input file, 3 for an internal consistency +error (e.g., bug) which caused tarlz to panic. File: tarlz.info, Node: Portable character set, Next: File format, Prev: Invoking tarlz, Up: Top @@ -473,9 +501,7 @@ The set of characters from which portable file names are constructed. characters, respectively. File names are identifiers. Therefore, archiving works better when file -names use only the portable character set without spaces added. Unicode is -for human consumption. It should be avoided in computing environments, -specially in file names. *Note why not Unicode: (moe)why not Unicode. +names use only the portable character set without spaces added. File: tarlz.info, Node: File format, Next: Amendments to pax format, Prev: Portable character set, Up: Top @@ -512,10 +538,11 @@ limitations of the ustar format. Each tar member contains one file archived, and is represented by the following sequence: - * An optional extended header block with extended header records. This - header block is of the form described in pax header block, with a - typeflag value of 'x'. The extended header records are included as the - data for this header block. + * An optional extended header block followed by one or more blocks that + contain the extended header records as if they were the contents of a + file; i.e., the extended header records are included as the data for + this header block. This header block is of the form described in pax + header block, with a typeflag value of 'x'. * A header block in ustar format that describes the file. Any fields defined in the preceding optional extended header records override the @@ -529,9 +556,11 @@ split over two or more lzip members, the archive must be decoded sequentially. *Note Multi-threaded decoding::. At the end of the archive file there are two 512-byte blocks filled with -binary zeros, interpreted as an end-of-archive indicator. These EOF blocks -are either compressed in a separate lzip member or compressed along with -the tar members contained in the last lzip member. +binary zeros, interpreted as an end-of-archive indicator. These EOA blocks +are either compressed in a separate lzip member or compressed along with the +tar members contained in the last lzip member. For a compressed archive to +be recognized by tarlz as appendable, the last lzip member must contain +between 512 and 32256 zeros alone. The diagram below shows the correspondence between each tar member (formed by one or two headers plus optional data) in the tar archive and @@ -540,7 +569,7 @@ compression is used: *Note File format: (lzip)File format. tar +========+======+=================+===============+========+======+========+ -| header | data | extended header | extended data | header | data | EOF | +| header | data | extended header | extended data | header | data | EOA | +========+======+=================+===============+========+======+========+ tar.lz @@ -572,25 +601,57 @@ space, equal-sign, and newline. These are the <keyword> values currently supported by tarlz: +'atime' + The signed decimal representation of the access time of the following + file in seconds since (or before) the epoch, obtained from the function + 'stat'. The atime record is created only for files with a modification + time outside of the ustar range. *Note ustar-mtime::. + +'gid' + The unsigned decimal representation of the group ID of the group that + owns the following file. The gid record is created only for files with + a group ID greater than 2_097_151 (octal 7777777). *Note + ustar-uid-gid::. + 'linkpath' - The pathname of a link being created to another file, of any type, + The file name of a link being created to another file, of any type, previously archived. This record overrides the field 'linkname' in the following ustar header block. The following ustar header block determines the type of link created. If typeflag of the following header block is 1, it will be a hard link. If typeflag is 2, it will be a symbolic link and the linkpath value will be used as the contents - of the symbolic link. + of the symbolic link. The linkpath record is created only for links + with a link name that does not fit in the space provided by the ustar + header. + +'mtime' + The signed decimal representation of the modification time of the + following file in seconds since (or before) the epoch, obtained from + the function 'stat'. This record overrides the field 'mtime' in the + following ustar header block. The mtime record is created only for + files with a modification time outside of the ustar range. *Note + ustar-mtime::. 'path' - The pathname of the following file. This record overrides the fields - 'name' and 'prefix' in the following ustar header block. + The file name of the following file. This record overrides the fields + 'name' and 'prefix' in the following ustar header block. The path + record is created for files with a name that does not fit in the space + provided by the ustar header, but is also created for files that + require any other extended record so that the fields 'name' and + 'prefix' in the following ustar header block can be zeroed. 'size' The size of the file in bytes, expressed as a decimal number using digits from the ISO/IEC 646:1991 (ASCII) standard. This record - overrides the size field in the following ustar header block. The size - record is used only for files with a size value greater than - 8_589_934_591 (octal 77777777777). This is 2^33 bytes or larger. + overrides the field 'size' in the following ustar header block. The + size record is created only for files with a size value greater than + 8_589_934_591 (octal 77777777777); that is, 8 GiB (2^33 bytes) or + larger. + +'uid' + The unsigned decimal representation of the user ID of the file owner + of the following file. The uid record is created only for files with a + user ID greater than 2_097_151 (octal 7777777). *Note ustar-uid-gid::. 'GNU.crc32' CRC32-C (Castagnoli) of the extended header data excluding the 8 bytes @@ -643,18 +704,18 @@ and groups, tarlz will use the byte values in these names unmodified. character strings except when all characters in the array contain non-null characters including the last character. - The fields 'prefix' and 'name' produce the pathname of the file. A new -pathname is formed, if prefix is not an empty string (its first character -is not null), by concatenating prefix (up to the first null character), a -slash character, and name; otherwise, name is used alone. In either case, -name is terminated at the first null character. If prefix begins with a -null character, it is ignored. In this manner, pathnames of at most 256 -characters can be supported. If a pathname does not fit in the space -provided, an extended record is used to store the pathname. + The fields 'name' and 'prefix' produce the file name. A new file name is +formed, if prefix is not an empty string (its first character is not null), +by concatenating prefix (up to the first null character), a slash +character, and name; otherwise, name is used alone. In either case, name is +terminated at the first null character. If prefix begins with a null +character, it is ignored. In this manner, file names of at most 256 +characters can be supported. If a file name does not fit in the space +provided, an extended record is used to store the file name. - The field 'linkname' does not use the prefix to produce a pathname. If -the linkname does not fit in the 100 characters provided, an extended record -is used to store the linkname. + The field 'linkname' does not use the prefix to produce a file name. If +the link name does not fit in the 100 characters provided, an extended +record is used to store the link name. The field 'mode' provides 12 access permission bits. The following table shows the symbolic name of each bit and its octal value: @@ -667,7 +728,9 @@ S_IRGRP 00040 S_IWGRP 00020 S_IXGRP 00010 S_IROTH 00004 S_IWOTH 00002 S_IXOTH 00001 The fields 'uid' and 'gid' are the user and group IDs of the owner and -group of the file, respectively. +group of the file, respectively. If the file uid or gid are greater than +2_097_151 (octal 7777777), an extended record is used to store the uid or +gid. The field 'size' contains the octal representation of the size of the file in bytes. If the field 'typeflag' specifies a file of type '0' @@ -680,7 +743,10 @@ header. If the file size is larger than 8_589_934_591 bytes The field 'mtime' contains the octal representation of the modification time of the file at the time it was archived, obtained from the function -'stat'. +'stat'. If the modification time is negative or larger than 8_589_934_591 +(octal 77777777777) seconds since the epoch, an extended record is used to +store the modification time. The ustar range of mtime goes from +'1970-01-01 00:00:00 UTC' to '2242-03-16 12:56:31 UTC'. The field 'chksum' contains the octal representation of the value of the simple sum of all bytes in the header logical record. Each byte in the @@ -694,7 +760,8 @@ file archived: Regular file. ''1'' - Hard link to another file, of any type, previously archived. + Hard link to another file, of any type, previously archived. Hard + links must not contain file data. ''2'' Symbolic link. @@ -712,8 +779,8 @@ file archived: ''7'' Reserved to represent a file to which an implementation has associated - some high-performance attribute. Tarlz treats this type of file as a - regular file (type 0). + some high-performance attribute (contiguous file). Tarlz treats this + type of file as a regular file (type 0). The field 'magic' contains the ASCII null-terminated string "ustar". The @@ -735,9 +802,9 @@ Tarlz creates safe archives that allow the reliable detection of invalid or corrupt metadata during decoding even when the integrity checking of lzip can't be used because the lzip members are only decompressed partially, as it happens in parallel '--diff', '--list', and '--extract'. In order to -achieve this goal, tarlz makes some changes to the variant of the pax -format that it uses. This chapter describes these changes and the concrete -reasons to implement them. +achieve this goal and avoid some other flaws in the pax format, tarlz makes +some changes to the variant of the pax format that it uses. This chapter +describes these changes and the concrete reasons to implement them. 5.1 Add a CRC of the extended records @@ -775,45 +842,73 @@ In order to allow the extraction of pax archives by a tar utility conforming to the POSIX-2:1993 standard, POSIX.1-2008 recommends selecting extended header field values that allow such tar to create a regular file containing the extended header records as data. This approach is broken because if the -extended header is needed because of a long file name, the fields 'prefix' -and 'name' will be unable to contain the full pathname of the file. -Therefore the files corresponding to both the extended header and the -overridden ustar header will be extracted using truncated file names, -perhaps overwriting existing files or directories. It may be a security risk -to extract a file with a truncated file name. +extended header is needed because of a long file name, the fields 'name' +and 'prefix' will be unable to contain the full file name. (Some tar +implementations store the truncated name in the field 'name' alone, +truncating the name to only 100 bytes instead of 256). Therefore the files +corresponding to both the extended header and the overridden ustar header +will be extracted using truncated file names, perhaps overwriting existing +files or directories. It may be a security risk to extract a file with a +truncated file name. To avoid this problem, tarlz writes extended headers with all fields -zeroed except size, chksum, typeflag, magic and version. This prevents old -tar programs from extracting the extended records as a file in the wrong -place. Tarlz also sets to zero those fields of the ustar header overridden -by extended records. Finally, tarlz skips members without name when decoding -except when listing. This is needed to detect certain format violations -during parallel extraction. - - If an extended header is required for any reason (for example a file size -larger than 8 GiB or a link name longer than 100 bytes), tarlz moves the -file name also to the extended header to prevent an ustar tool from trying -to extract the file or link. This also makes easier during parallel decoding -the detection of a tar member split between two lzip members at the boundary -between the extended header and the ustar header. +zeroed except 'size' (which contains the size of the extended records), +'chksum', 'typeflag', 'magic', and 'version'. In particular, tarlz sets the +fields 'name' and 'prefix' to zero. This prevents old tar programs from +extracting the extended records as a file in the wrong place. Tarlz also +sets to zero those fields of the ustar header overridden by extended +records. Finally, tarlz skips members with zeroed 'name' and 'prefix' when +decoding, except when listing. This is needed to detect certain format +violations during parallel extraction. + + If an extended header is required for any reason (for example a file +size of 8 GiB or larger, or a link name longer than 100 bytes), tarlz also +moves the file name to the extended records to prevent an ustar tool from +trying to extract the file or link. This also makes easier during parallel +decoding the detection of a tar member split between two lzip members at +the boundary between the extended header and the ustar header. 5.3 As simple as possible (but not simpler) =========================================== The tarlz format is mainly ustar. Extended pax headers are used only when -needed because the length of a file name or link name, or the size of a file -exceed the limits of the ustar format. Adding 1 KiB of extended headers to -each member just to record subsecond timestamps seems wasteful for a backup -format. Moreover, minimizing the overhead may help recovering the archive -with lziprecover in case of corruption. +needed because the length of a file name or link name, or the size or other +attribute of a file exceed the limits of the ustar format. Adding 1 KiB of +extended header and records to each member just to save subsecond +timestamps seems wasteful for a backup format. Moreover, minimizing the +overhead may help recovering the archive with lziprecover in case of +corruption. Global pax headers are tolerated, but not supported; they are parsed and ignored. Some operations may not behave as expected if the archive contains global headers. -5.4 Avoid misconversions to/from UTF-8 +5.4 Improve reproducibility +=========================== + +Pax includes by default the process ID of the pax process in the ustar name +of the extended headers, making the archive not reproducible. Tarlz stores +the true name of the file just once, either in the ustar header or in the +extended records, making it easier to produce reproducible archives. + + Pax allows an extended record to have length x-1 or x if x is a power of +ten; '99<97_bytes>' or '100<97_bytes>'. Tarlz minimizes the length of the +record and always produces a length of x-1 in these cases. + + +5.5 No data in hard links +========================= + +Tarlz does not allow data in hard link members. The data (if any) must be in +the member determining the type of the file (which can't be a link). If all +the names of a file are stored as hard links, the type of the file is lost. +Not allowing data in hard links also prevents invalid actions like +extracting file data for a hard link to a symbolic link or to a directory. + + +5.6 Avoid misconversions to/from UTF-8 ====================================== There is no portable way to tell what charset a text string is coded into. @@ -968,12 +1063,12 @@ headers must provide their own integrity checking. Multi-threaded extraction may produce different output than single-threaded extraction in some cases: - During multi-threaded extraction, several independent processes are + During multi-threaded extraction, several independent threads are simultaneously reading the archive and creating files in the file system. The archive is not read sequentially. As a consequence, any error or -weirdness in the archive (like a corrupt member or an EOF block in the -middle of the archive) won't be usually detected until part of the archive -beyond that point has been processed. +weirdness in the archive (like a corrupt member or an end-of-archive block +in the middle of the archive) won't be usually detected until part of the +archive beyond that point has been processed. If the archive contains two or more tar members with the same name, single-threaded extraction extracts the members in the order they appear in @@ -986,6 +1081,9 @@ unspecified which of the tar members is extracted. names resolve to the same file in the file system), the result is undefined. (Probably the resulting file will be mangled). + Extraction of a hard link may fail if it is extracted before the file it +links to. + File: tarlz.info, Node: Minimum archive sizes, Next: Examples, Prev: Multi-threaded decoding, Up: Top @@ -1054,7 +1152,7 @@ Example 4: Create a compressed appendable archive containing directories 'dir1', 'dir2' and 'dir3' with a separate lzip member per directory. Then append files 'a', 'b', 'c', 'd' and 'e' to the archive, all of them contained in a single lzip member. The resulting archive 'archive.tar.lz' -contains 5 lzip members (including the EOF member). +contains 5 lzip members (including the end-of-archive member). tarlz --dsolid -cf archive.tar.lz dir1 dir2 dir3 tarlz --asolid -rf archive.tar.lz a b c d e @@ -1081,7 +1179,7 @@ Example 7: Extract files 'a' and 'c', and the whole tree under directory Example 8: Copy the contents of directory 'sourcedir' to the directory 'destdir'. - tarlz -C sourcedir -c . | tarlz -C destdir -x + tarlz -C sourcedir --uncompressed -cf - . | tarlz -C destdir -xf - Example 9: Compress the existing POSIX archive 'archive.tar' and write the @@ -1091,6 +1189,18 @@ other members can still be extracted). tarlz -z --no-solid archive.tar + +Example 10: Compress the archive 'archive.tar' and write the output to +'foo.tar.lz'. + + tarlz -z -o foo.tar.lz archive.tar + + +Example 11: Concatenate and compress two archives 'archive1.tar' and +'archive2.tar', and write the output to 'foo.tar.lz'. + + tarlz -A archive1.tar archive2.tar | tarlz -z -o foo.tar.lz + File: tarlz.info, Node: Problems, Next: Concept index, Prev: Examples, Up: Top @@ -1133,23 +1243,26 @@ Concept index Tag Table: -Node: Top223 -Node: Introduction1214 -Node: Invoking tarlz4022 -Ref: --data-size6436 -Ref: --bsolid16388 -Node: Portable character set21224 -Node: File format22019 -Ref: key_crc3226944 -Node: Amendments to pax format32572 -Ref: crc3233236 -Ref: flawed-compat34547 -Node: Program design37348 -Node: Multi-threaded decoding41273 -Node: Minimum archive sizes45764 -Node: Examples47902 -Node: Problems49918 -Node: Concept index50473 +Node: Top216 +Node: Introduction1210 +Node: Invoking tarlz4029 +Ref: --data-size12880 +Ref: --bsolid17192 +Node: Portable character set22788 +Node: File format23431 +Ref: key_crc3230188 +Ref: ustar-uid-gid33452 +Ref: ustar-mtime34254 +Node: Amendments to pax format36254 +Ref: crc3236963 +Ref: flawed-compat38274 +Node: Program design42364 +Node: Multi-threaded decoding46289 +Ref: mt-extraction49570 +Node: Minimum archive sizes50876 +Node: Examples53014 +Node: Problems55381 +Node: Concept index55936 End Tag Table diff --git a/doc/tarlz.texi b/doc/tarlz.texi index bfa6d9d..5bdd2af 100644 --- a/doc/tarlz.texi +++ b/doc/tarlz.texi @@ -6,10 +6,10 @@ @finalout @c %**end of header -@set UPDATED 5 January 2022 -@set VERSION 0.22 +@set UPDATED 23 September 2022 +@set VERSION 0.23 -@dircategory Data Compression +@dircategory Archiving @direntry * Tarlz: (tarlz). Archiver with multimember lzip compression @end direntry @@ -96,9 +96,9 @@ The resulting multimember tar.lz archive can be decompressed in parallel, multiplying the decompression speed. @item -New members can be appended to the archive (by removing the EOF -member), and unwanted members can be deleted from the archive. Just -like an uncompressed tar archive. +New members can be appended to the archive (by removing the +end-of-archive member), and unwanted members can be deleted from the +archive. Just like an uncompressed tar archive. @item It is a safe POSIX-style backup format. In case of corruption, tarlz @@ -111,7 +111,7 @@ lziprecover can be used to recover some of the damaged members. @item A multimember tar.lz archive is usually smaller than the corresponding solidly compressed tar.gz archive, except when individually -compressing files smaller than about 32 KiB. +compressing files smaller than about @w{32 KiB}. @end itemize Tarlz protects the extended records with a Cyclic Redundancy Check (CRC) in @@ -133,21 +133,28 @@ the format of the archive is compatible with tarlz. The format for running tarlz is: @example -tarlz [@var{options}] [@var{files}] +tarlz @var{operation} [@var{options}] [@var{files}] @end example @noindent All operations except @samp{--concatenate} and @samp{--compress} operate on whole trees if any @var{file} is a directory. All operations except -@samp{--compress} overwrite output files without warning. - -On archive creation or appending tarlz archives the files specified, but -removes from member names any leading and trailing slashes and any file name -prefixes containing a @samp{..} component. On extraction, leading and -trailing slashes are also removed from member names, and archive members -containing a @samp{..} component in the file name are skipped. Tarlz detects -when the archive being created or enlarged is among the files to be dumped, -appended or concatenated, and skips it. +@samp{--compress} overwrite output files without warning. If no archive is +specified, tarlz tries to read it from standard input or write it to +standard output. Tarlz refuses to read archive data from a terminal or write +archive data to a terminal. Tarlz detects when the archive being created or +enlarged is among the files to be archived, appended, or concatenated, and +skips it. + +Tarlz does not use absolute file names nor file names above the current +working directory (perhaps changed by option @samp{-C}). On archive creation +or appending tarlz archives the files specified, but removes from member +names any leading and trailing slashes and any file name prefixes containing +a @samp{..} component. On extraction, leading and trailing slashes are also +removed from member names, and archive members containing a @samp{..} +component in the file name are skipped. Tarlz does not follow symbolic links +during extraction; not even symbolic links replacing intermediate +directories. On extraction and listing, tarlz removes leading @samp{./} strings from member names in the archive or given in the command line, so that @@ -158,11 +165,7 @@ If several compression levels or @samp{--*solid} options are given, the last setting is used. For example @w{@samp{-9 --solid --uncompressed -1}} is equivalent to @w{@samp{-1 --solid}}. -tarlz supports the following -@uref{http://www.nongnu.org/arg-parser/manual/arg_parser_manual.html#Argument-syntax,,options}: -@ifnothtml -@xref{Argument syntax,,,arg_parser}. -@end ifnothtml +tarlz supports the following operations: @table @code @item --help @@ -180,40 +183,22 @@ specified with the option @samp{-f}, the input archives are concatenated to standard output. All the archives involved must be regular (seekable) files, and must be either all compressed or all uncompressed. Compressed and uncompressed archives can't be mixed. Compressed archives must be -multimember lzip files with the two end-of-file blocks plus any zero padding -contained in the last lzip member of each archive. The intermediate -end-of-file blocks are removed as each new archive is concatenated. If the -archive is uncompressed, tarlz parses and skips tar headers until it finds -the end-of-file blocks. Exit with status 0 without modifying the archive if +multimember lzip files with the two end-of-archive blocks plus any zero +padding contained in the last lzip member of each archive. The intermediate +end-of-archive blocks are removed as each new archive is concatenated. If +the archive is uncompressed, tarlz parses tar headers until it finds the +end-of-archive blocks. Exit with status 0 without modifying the archive if no @var{files} have been specified. -@anchor{--data-size} -@item -B @var{bytes} -@itemx --data-size=@var{bytes} -Set target size of input data blocks for the option @samp{--bsolid}. -@xref{--bsolid}. Valid values range from @w{8 KiB} to @w{1 GiB}. Default -value is two times the dictionary size, except for option @samp{-0} where it -defaults to @w{1 MiB}. @xref{Minimum archive sizes}. +Concatenating archives containing files in common results in two or more tar +members with the same name in the resulting archive, which may produce +nondeterministic behavior during multi-threaded extraction. +@xref{mt-extraction}. @item -c @itemx --create Create a new archive from @var{files}. -@item -C @var{dir} -@itemx --directory=@var{dir} -Change to directory @var{dir}. When creating or appending, the position of -each @samp{-C} option in the command line is significant; it will change the -current working directory for the following @var{files} until a new -@samp{-C} option appears in the command line. When extracting or comparing, -all the @samp{-C} options are executed in sequence before reading the -archive. Listing ignores any @samp{-C} options specified. @var{dir} is -relative to the then current working directory, perhaps changed by a -previous @samp{-C} option. - -Note that a process can only have one current working directory (CWD). -Therefore multi-threading can't be used to create an archive if a @samp{-C} -option appears after a relative file name in the command line. - @item -d @itemx --diff Compare and report differences between archive and file system. For each tar @@ -228,10 +213,6 @@ be used in combination with @samp{--diff} when absolute file names were used on archive creation: @w{@samp{tarlz -C / -d}}. Alternatively, tarlz may be run from the root directory to perform the comparison. -@item --ignore-ids -Make @samp{--diff} ignore differences in owner and group IDs. This option is -useful when comparing an @samp{--anonymous} archive. - @item --delete Delete files and directories from an archive in place. It currently can delete only from uncompressed archives and from archives with files @@ -249,12 +230,109 @@ To delete a directory without deleting the files under it, use may be dangerous. A corrupt archive, a power cut, or an I/O error may cause data loss. -@item --exclude=@var{pattern} -Exclude files matching a shell pattern like @samp{*.o}. A file is considered -to match if any component of the file name matches. For example, @samp{*.o} -matches @samp{foo.o}, @samp{foo.o/bar} and @samp{foo/bar.o}. If -@var{pattern} contains a @samp{/}, it matches a corresponding @samp{/} in -the file name. For example, @samp{foo/*.o} matches @samp{foo/bar.o}. +@item -r +@itemx --append +Append files to the end of an archive. The archive must be a regular +(seekable) file either compressed or uncompressed. Compressed members can't +be appended to an uncompressed archive, nor vice versa. If the archive is +compressed, it must be a multimember lzip file with the two end-of-archive +blocks plus any zero padding contained in the last lzip member of the +archive. It is possible to append files to an archive with a different +compression granularity. Appending works as follows; first the +end-of-archive blocks are removed, then the new members are appended, and +finally two new end-of-archive blocks are appended to the archive. If the +archive is uncompressed, tarlz parses and skips tar headers until it finds +the end-of-archive blocks. Exit with status 0 without modifying the archive +if no @var{files} have been specified. + +Appending files already present in the archive results in two or more tar +members with the same name, which may produce nondeterministic behavior +during multi-threaded extraction. @xref{mt-extraction}. + +@item -t +@itemx --list +List the contents of an archive. If @var{files} are given, list only the +@var{files} given. + +@item -x +@itemx --extract +Extract files from an archive. If @var{files} are given, extract only the +@var{files} given. Else extract all the files in the archive. To extract a +directory without extracting the files under it, use +@w{@samp{tarlz -xf foo --exclude='dir/*' dir}}. Tarlz removes files and +empty directories unconditionally before extracting over them. Other than +that, it will not make any special effort to extract a file over an +incompatible type of file. For example, extracting a file over a non-empty +directory will usually fail. + +@item -z +@itemx --compress +Compress existing POSIX tar archives aligning the lzip members to the tar +members with choice of granularity (---bsolid by default, ---dsolid works +like ---asolid). The input archives are kept unchanged. Existing compressed +archives are not overwritten. A hyphen @samp{-} used as the name of an input +archive reads from standard input and writes to standard output (unless the +option @samp{--output} is used). Tarlz can be used as compressor for GNU tar +using a command like @w{@samp{tar -c -Hustar foo | tarlz -z -o foo.tar.lz}}. +Note that tarlz only works reliably on archives without global headers, or +with global headers whose content can be ignored. + +The compression is reversible, including any garbage present after the +end-of-archive blocks. Tarlz stops parsing after the first end-of-archive +block is found, and then compresses the rest of the archive. Unless solid +compression is requested, the end-of-archive blocks are compressed in a lzip +member separated from the preceding members and from any non-zero garbage +following the end-of-archive blocks. @samp{--compress} implies plzip +argument style, not tar style. Each input archive is compressed to a file +with the extension @samp{.lz} added unless the option @samp{--output} is +used. When @samp{--output} is used, only one input archive can be specified. +@samp{-f} can't be used with @samp{--compress}. + +@item --check-lib +Compare the +@uref{http://www.nongnu.org/lzip/manual/lzlib_manual.html#Library-version,,version of lzlib} +used to compile tarlz with the version actually being used at run time and +exit. Report any differences found. Exit with error status 1 if differences +are found. A mismatch may indicate that lzlib is not correctly installed or +that a different version of lzlib has been installed after compiling tarlz. +Exit with error status 2 if LZ_API_VERSION and LZ_version_string don't +match. @w{@samp{tarlz -v --check-lib}} shows the version of lzlib being used +and the value of LZ_API_VERSION (if defined). +@ifnothtml +@xref{Library version,,,lzlib}. +@end ifnothtml + +@end table + +tarlz supports the following +@uref{http://www.nongnu.org/arg-parser/manual/arg_parser_manual.html#Argument-syntax,,options}: +@ifnothtml +@xref{Argument syntax,,,arg_parser}. +@end ifnothtml + +@table @code +@anchor{--data-size} +@item -B @var{bytes} +@itemx --data-size=@var{bytes} +Set target size of input data blocks for the option @samp{--bsolid}. +@xref{--bsolid}. Valid values range from @w{8 KiB} to @w{1 GiB}. Default +value is two times the dictionary size, except for option @samp{-0} where it +defaults to @w{1 MiB}. @xref{Minimum archive sizes}. + +@item -C @var{dir} +@itemx --directory=@var{dir} +Change to directory @var{dir}. When creating or appending, the position of +each @samp{-C} option in the command line is significant; it will change the +current working directory for the following @var{files} until a new +@samp{-C} option appears in the command line. When extracting or comparing, +all the @samp{-C} options are executed in sequence before reading the +archive. Listing ignores any @samp{-C} options specified. @var{dir} is +relative to the then current working directory, perhaps changed by a +previous @samp{-C} option. + +Note that a process can only have one current working directory (CWD). +Therefore multi-threading can't be used to create an archive if a @samp{-C} +option appears after a relative file name in the command line. @item -f @var{archive} @itemx --file=@var{archive} @@ -266,14 +344,6 @@ argument reads from standard input or writes to standard output. Follow symbolic links during archive creation, appending or comparison. Archive or compare the files they point to instead of the links themselves. -@item --mtime=@var{date} -When creating or appending, use @var{date} as the modification time for -files added to the archive instead of their actual modification times. The -value of @var{date} may be either @samp{@@} followed by the number of -seconds since the epoch, or a date in format @w{@samp{YYYY-MM-DD HH:MM:SS}}, -or the name of an existing file starting with @samp{.} or @samp{/}. In the -latter case, the modification time of that file is used. - @item -n @var{n} @itemx --threads=@var{n} Set the number of (de)compression threads, overriding the system's default. @@ -305,64 +375,11 @@ permissions specified in the archive. @itemx --quiet Quiet operation. Suppress all messages. -@item -r -@itemx --append -Append files to the end of an archive. The archive must be a regular -(seekable) file either compressed or uncompressed. Compressed members can't -be appended to an uncompressed archive, nor vice versa. If the archive is -compressed, it must be a multimember lzip file with the two end-of-file -blocks plus any zero padding contained in the last lzip member of the -archive. It is possible to append files to an archive with a different -compression granularity. Appending works as follows; first the end-of-file -blocks are removed, then the new members are appended, and finally two new -end-of-file blocks are appended to the archive. If the archive is -uncompressed, tarlz parses and skips tar headers until it finds the -end-of-file blocks. Exit with status 0 without modifying the archive if no -@var{files} have been specified. - -@item -t -@itemx --list -List the contents of an archive. If @var{files} are given, list only the -@var{files} given. - @item -v @itemx --verbose Verbosely list files processed. Further -v's (up to 4) increase the verbosity level. -@item -x -@itemx --extract -Extract files from an archive. If @var{files} are given, extract only the -@var{files} given. Else extract all the files in the archive. To extract a -directory without extracting the files under it, use -@w{@samp{tarlz -xf foo --exclude='dir/*' dir}}. Tarlz will not make any -special effort to extract a file over an incompatible type of file. For -example, extracting a link over a directory will usually fail. (Principle of -least surprise). - -@item -z -@itemx --compress -Compress existing POSIX tar archives aligning the lzip members to the tar -members with choice of granularity (---bsolid by default, ---dsolid works -like ---asolid). The input archives are kept unchanged. Existing compressed -archives are not overwritten. A hyphen @samp{-} used as the name of an input -archive reads from standard input and writes to standard output (unless the -option @samp{--output} is used). Tarlz can be used as compressor for GNU tar -using a command like @w{@samp{tar -c -Hustar foo | tarlz -z -o foo.tar.lz}}. -Note that tarlz only works reliably on archives without global headers, or -with global headers whose content can be ignored. - -The compression is reversible, including any garbage present after the EOF -blocks. Tarlz stops parsing after the first EOF block is found, and then -compresses the rest of the archive. Unless solid compression is requested, -the EOF blocks are compressed in a lzip member separated from the preceding -members and from any non-zero garbage following the EOF blocks. -@samp{--compress} implies plzip argument style, not tar style. Each input -archive is compressed to a file with the extension @samp{.lz} added unless -the option @samp{--output} is used. When @samp{--output} is used, only one -input archive can be specified. @samp{-f} can't be used with -@samp{--compress}. - @item -0 .. -9 Set the compression level for @samp{--create}, @samp{--append}, and @samp{--compress}. The default compression level is @samp{-6}. Like lzip, @@ -392,7 +409,7 @@ appended to an uncompressed archive, nor vice versa. @item --asolid When creating or appending to a compressed archive, use appendable solid compression. All the files being added to the archive are compressed into a -single lzip member, but the end-of-file blocks are compressed into a +single lzip member, but the end-of-archive blocks are compressed into a separate lzip member. This creates a solidly compressed appendable archive. Solid archives can't be created nor decoded in parallel. @@ -405,58 +422,85 @@ compressed data block must contain an integer number of tar members. Block compression is the default because it improves compression ratio for archives with many files smaller than the block size. This option allows tarlz revert to default behavior if, for example, it is invoked through an -alias like @samp{tar='tarlz --solid'}. @xref{--data-size}, to set the target -block size. +alias like @w{@samp{tar='tarlz --solid'}}. @xref{--data-size}, to set the +target block size. @item --dsolid When creating or appending to a compressed archive, compress each file specified in the command line separately in its own lzip member, and use solid compression for each directory specified in the command line. The -end-of-file blocks are compressed into a separate lzip member. This creates -a compressed appendable archive with a separate lzip member for each file or -top-level directory specified. +end-of-archive blocks are compressed into a separate lzip member. This +creates a compressed appendable archive with a separate lzip member for each +file or top-level directory specified. @item --no-solid When creating or appending to a compressed archive, compress each file -separately in its own lzip member. The end-of-file blocks are compressed +separately in its own lzip member. The end-of-archive blocks are compressed into a separate lzip member. This creates a compressed appendable archive with a lzip member for each file. @item --solid When creating or appending to a compressed archive, use solid compression. -The files being added to the archive, along with the end-of-file blocks, are -compressed into a single lzip member. The resulting archive is not +The files being added to the archive, along with the end-of-archive blocks, +are compressed into a single lzip member. The resulting archive is not appendable. No more files can be later appended to the archive. Solid archives can't be created nor decoded in parallel. @item --anonymous -Equivalent to @samp{--owner=root --group=root}. +Equivalent to @w{@samp{--owner=root --group=root}}. @item --owner=@var{owner} -When creating or appending, use @var{owner} for files added to the -archive. If @var{owner} is not a valid user name, it is decoded as a -decimal numeric user ID. +When creating or appending, use @var{owner} for files added to the archive. +If @var{owner} is not a valid user name, it is decoded as a decimal numeric +user ID. @item --group=@var{group} -When creating or appending, use @var{group} for files added to the -archive. If @var{group} is not a valid group name, it is decoded as a -decimal numeric group ID. +When creating or appending, use @var{group} for files added to the archive. +If @var{group} is not a valid group name, it is decoded as a decimal numeric +group ID. + +@item --exclude=@var{pattern} +Exclude files matching a shell pattern like @samp{*.o}. A file is considered +to match if any component of the file name matches. For example, @samp{*.o} +matches @samp{foo.o}, @samp{foo.o/bar} and @samp{foo/bar.o}. If +@var{pattern} contains a @samp{/}, it matches a corresponding @samp{/} in +the file name. For example, @samp{foo/*.o} matches @samp{foo/bar.o}. +Multiple @samp{--exclude} options can be specified. + +@item --ignore-ids +Make @samp{--diff} ignore differences in owner and group IDs. This option is +useful when comparing an @samp{--anonymous} archive. + +@item --ignore-overflow +Make @samp{--diff} ignore differences in mtime caused by overflow on 32-bit +systems with a 32-bit time_t. @item --keep-damaged Don't delete partially extracted files. If a decompression error happens while extracting a file, keep the partial data extracted. Use this option to recover as much data as possible from each damaged member. It is recommended -to run tarlz in single-threaded mode (--threads=0) when using this option. +to run tarlz in single-threaded mode (---threads=0) when using this option. @item --missing-crc -Exit with error status 2 if the CRC of the extended records is missing. -When this option is used, tarlz detects any corruption in the extended -records (only limited by CRC collisions). But note that a corrupt -@samp{GNU.crc32} keyword, for example @samp{GNU.crc33}, is reported as a -missing CRC instead of as a corrupt record. This misleading -@samp{Missing CRC} message is the consequence of a flaw in the POSIX pax -format; i.e., the lack of a mandatory check sequence in the extended -records. @xref{crc32}. +Exit with error status 2 if the CRC of the extended records is missing. When +this option is used, tarlz detects any corruption in the extended records +(only limited by CRC collisions). But note that a corrupt @samp{GNU.crc32} +keyword, for example @samp{GNU.crc33}, is reported as a missing CRC instead +of as a corrupt record. This misleading @w{@samp{Missing CRC}} message is +the consequence of a flaw in the POSIX pax format; i.e., the lack of a +mandatory check sequence of the extended records. @xref{crc32}. + +@item --mtime=@var{date} +When creating or appending, use @var{date} as the modification time for +files added to the archive instead of their actual modification times. The +value of @var{date} may be either @samp{@@} followed by the number of +seconds since (or before) the epoch, or a date in format +@w{@samp{[-]YYYY-MM-DD HH:MM:SS}} or @samp{[-]YYYY-MM-DDTHH:MM:SS}, or the +name of an existing reference file starting with @samp{.} or @samp{/} whose +modification time is used. The time of day @samp{HH:MM:SS} in the date +format is optional and defaults to @samp{00:00:00}. The epoch is +@w{@samp{1970-01-01 00:00:00 UTC}}. Negative seconds or years define a +modification time before the epoch. @item --out-slots=@var{n} Number of @w{1 MiB} output packets buffered per worker thread during @@ -465,20 +509,6 @@ number of packets may increase compression speed if the files being archived are larger than @w{64 MiB} compressed, but requires more memory. Valid values range from 1 to 1024. The default value is 64. -@item --check-lib -Compare the -@uref{http://www.nongnu.org/lzip/manual/lzlib_manual.html#Library-version,,version of lzlib} -used to compile tarlz with the version actually being used at run time and -exit. Report any differences found. Exit with error status 1 if differences -are found. A mismatch may indicate that lzlib is not correctly installed or -that a different version of lzlib has been installed after compiling tarlz. -Exit with error status 2 if LZ_API_VERSION and LZ_version_string don't -match. @w{@samp{tarlz -v --check-lib}} shows the version of lzlib being used -and the value of LZ_API_VERSION (if defined). -@ifnothtml -@xref{Library version,,,lzlib}. -@end ifnothtml - @item --warn-newer During archive creation, warn if any file being archived has a modification time newer than the archive creation time. This option may slow archive @@ -496,10 +526,10 @@ keyword appearing in the same block of extended records. @end table -Exit status: 0 for a normal exit, 1 for environmental problems (file not -found, files differ, invalid flags, I/O errors, etc), 2 to indicate a -corrupt or invalid input file, 3 for an internal consistency error (e.g. bug) -which caused tarlz to panic. +Exit status: 0 for a normal exit, 1 for environmental problems +(file not found, files differ, invalid command line options, I/O errors, +etc), 2 to indicate a corrupt or invalid input file, 3 for an internal +consistency error (e.g., bug) which caused tarlz to panic. @node Portable character set @@ -518,13 +548,7 @@ The last three characters are the period, underscore, and hyphen-minus characters, respectively. File names are identifiers. Therefore, archiving works better when file -names use only the portable character set without spaces added. Unicode is -for human consumption. It should be -@uref{http://www.gnu.org/software/moe/manual/moe_manual.html#why-not-Unicode,,avoided} -in computing environments, specially in file names. -@ifnothtml -@xref{why not Unicode,,,moe}. -@end ifnothtml +names use only the portable character set without spaces added. @node File format @@ -552,29 +576,30 @@ bytes (for example 512). @sp 1 A tar.lz file consists of a series of lzip members (compressed data sets). -The members simply appear one after another in the file, with no -additional information before, between, or after them. +The members simply appear one after another in the file, with no additional +information before, between, or after them. -Each lzip member contains one or more tar members in a simplified POSIX -pax interchange format. The only pax typeflag value supported by tarlz -(in addition to the typeflag values defined by the ustar format) is -@samp{x}. The pax format is an extension on top of the ustar format that -removes the size limitations of the ustar format. +Each lzip member contains one or more tar members in a simplified POSIX pax +interchange format. The only pax typeflag value supported by tarlz (in +addition to the typeflag values defined by the ustar format) is @samp{x}. +The pax format is an extension on top of the ustar format that removes the +size limitations of the ustar format. Each tar member contains one file archived, and is represented by the following sequence: @itemize @bullet @item -An optional extended header block with extended header records. This -header block is of the form described in pax header block, with a -typeflag value of @samp{x}. The extended header records are included as -the data for this header block. +An optional extended header block followed by one or more blocks that +contain the extended header records as if they were the contents of a file; +i.e., the extended header records are included as the data for this header +block. This header block is of the form described in pax header block, with +a typeflag value of @samp{x}. @item -A header block in ustar format that describes the file. Any fields -defined in the preceding optional extended header records override the -associated fields in this header block for this file. +A header block in ustar format that describes the file. Any fields defined +in the preceding optional extended header records override the associated +fields in this header block for this file. @item Zero or more blocks that contain the contents of the file. @@ -586,9 +611,11 @@ is split over two or more lzip members, the archive must be decoded sequentially. @xref{Multi-threaded decoding}. At the end of the archive file there are two 512-byte blocks filled with -binary zeros, interpreted as an end-of-archive indicator. These EOF -blocks are either compressed in a separate lzip member or compressed -along with the tar members contained in the last lzip member. +binary zeros, interpreted as an end-of-archive indicator. These EOA blocks +are either compressed in a separate lzip member or compressed along with the +tar members contained in the last lzip member. For a compressed archive to +be recognized by tarlz as appendable, the last lzip member must contain +between 512 and 32256 zeros alone. The diagram below shows the correspondence between each tar member (formed by one or two headers plus optional data) in the tar archive and each @@ -602,7 +629,7 @@ used: @verbatim tar +========+======+=================+===============+========+======+========+ -| header | data | extended header | extended data | header | data | EOF | +| header | data | extended header | extended data | header | data | EOA | +========+======+=================+===============+========+======+========+ tar.lz @@ -620,7 +647,7 @@ Similarly, if several records with the same keyword appear in the same block of extended records, only the last record for the repeated keyword takes effect. The other records for the repeated keyword are ignored.@* A global header inserted between an extended header and an ustar header.@* -An extended header just before the EOF blocks. +An extended header just before the end-of-archive blocks. @end ignore @sp 1 @@ -635,7 +662,7 @@ archive extraction. @xref{flawed-compat}. The pax extended header data consists of one or more records, each of them constructed as follows:@* -@samp{"%d %s=%s\n", <length>, <keyword>, <value>} +@w{@samp{"%d %s=%s\n", <length>, <keyword>, <value>}} The fields <length> and <keyword> in the record must be limited to the portable character set (@pxref{Portable character set}). The field <length> @@ -647,25 +674,53 @@ space, equal-sign, and newline. These are the <keyword> values currently supported by tarlz: @table @code +@item atime +The signed decimal representation of the access time of the following file +in seconds since (or before) the epoch, obtained from the function +@samp{stat}. The atime record is created only for files with a modification +time outside of the ustar range. @xref{ustar-mtime}. + +@item gid +The unsigned decimal representation of the group ID of the group that owns +the following file. The gid record is created only for files with a group ID +greater than 2_097_151 (octal 7777777). @xref{ustar-uid-gid}. + @item linkpath -The pathname of a link being created to another file, of any type, +The file name of a link being created to another file, of any type, previously archived. This record overrides the field @samp{linkname} in the -following ustar header block. The following ustar header block -determines the type of link created. If typeflag of the following header -block is 1, it will be a hard link. If typeflag is 2, it will be a -symbolic link and the linkpath value will be used as the contents of the -symbolic link. +following ustar header block. The following ustar header block determines +the type of link created. If typeflag of the following header block is 1, it +will be a hard link. If typeflag is 2, it will be a symbolic link and the +linkpath value will be used as the contents of the symbolic link. The +linkpath record is created only for links with a link name that does not fit +in the space provided by the ustar header. + +@item mtime +The signed decimal representation of the modification time of the following +file in seconds since (or before) the epoch, obtained from the function +@samp{stat}. This record overrides the field @samp{mtime} in the following +ustar header block. The mtime record is created only for files with a +modification time outside of the ustar range. @xref{ustar-mtime}. @item path -The pathname of the following file. This record overrides the fields -@samp{name} and @samp{prefix} in the following ustar header block. +The file name of the following file. This record overrides the fields +@samp{name} and @samp{prefix} in the following ustar header block. The path +record is created for files with a name that does not fit in the space +provided by the ustar header, but is also created for files that require any +other extended record so that the fields @samp{name} and @samp{prefix} in +the following ustar header block can be zeroed. @item size -The size of the file in bytes, expressed as a decimal number using -digits from the ISO/IEC 646:1991 (ASCII) standard. This record overrides -the size field in the following ustar header block. The size record is -used only for files with a size value greater than 8_589_934_591 -@w{(octal 77777777777)}. This is 2^33 bytes or larger. +The size of the file in bytes, expressed as a decimal number using digits +from the ISO/IEC 646:1991 (ASCII) standard. This record overrides the field +@samp{size} in the following ustar header block. The size record is created +only for files with a size value greater than 8_589_934_591 +@w{(octal 77777777777)}; that is, @w{8 GiB} (2^33 bytes) or larger. + +@item uid +The unsigned decimal representation of the user ID of the file owner of the +following file. The uid record is created only for files with a user ID +greater than 2_097_151 (octal 7777777). @xref{ustar-uid-gid}. @anchor{key_crc32} @item GNU.crc32 @@ -722,18 +777,18 @@ The fields @samp{name}, @samp{linkname}, and @samp{prefix} are null-terminated character strings except when all characters in the array contain non-null characters including the last character. -The fields @samp{prefix} and @samp{name} produce the pathname of the file. A -new pathname is formed, if prefix is not an empty string (its first -character is not null), by concatenating prefix (up to the first null -character), a slash character, and name; otherwise, name is used alone. In -either case, name is terminated at the first null character. If prefix -begins with a null character, it is ignored. In this manner, pathnames of at -most 256 characters can be supported. If a pathname does not fit in the -space provided, an extended record is used to store the pathname. +The fields @samp{name} and @samp{prefix} produce the file name. A new file +name is formed, if prefix is not an empty string (its first character is not +null), by concatenating prefix (up to the first null character), a slash +character, and name; otherwise, name is used alone. In either case, name is +terminated at the first null character. If prefix begins with a null +character, it is ignored. In this manner, file names of at most 256 +characters can be supported. If a file name does not fit in the space +provided, an extended record is used to store the file name. -The field @samp{linkname} does not use the prefix to produce a pathname. If -the linkname does not fit in the 100 characters provided, an extended record -is used to store the linkname. +The field @samp{linkname} does not use the prefix to produce a file name. If +the link name does not fit in the 100 characters provided, an extended +record is used to store the link name. The field @samp{mode} provides 12 access permission bits. The following table shows the symbolic name of each bit and its octal value: @@ -746,8 +801,10 @@ table shows the symbolic name of each bit and its octal value: @item S_IROTH @tab 00004 @tab S_IWOTH @tab 00002 @tab S_IXOTH @tab 00001 @end multitable +@anchor{ustar-uid-gid} The fields @samp{uid} and @samp{gid} are the user and group IDs of the owner -and group of the file, respectively. +and group of the file, respectively. If the file uid or gid are greater than +2_097_151 (octal 7777777), an extended record is used to store the uid or gid. The field @samp{size} contains the octal representation of the size of the file in bytes. If the field @samp{typeflag} specifies a file of type '0' @@ -758,9 +815,13 @@ to 0 or ignores it, and does not store or expect any logical records following the header. If the file size is larger than 8_589_934_591 bytes @w{(octal 77777777777)}, an extended record is used to store the file size. +@anchor{ustar-mtime} The field @samp{mtime} contains the octal representation of the modification time of the file at the time it was archived, obtained from the function -@samp{stat}. +@samp{stat}. If the modification time is negative or larger than +8_589_934_591 @w{(octal 77777777777)} seconds since the epoch, an extended +record is used to store the modification time. The ustar range of mtime goes +from @w{@samp{1970-01-01 00:00:00 UTC}} to @w{@samp{2242-03-16 12:56:31 UTC}}. The field @samp{chksum} contains the octal representation of the value of the simple sum of all bytes in the header logical record. Each byte in the @@ -775,7 +836,8 @@ file archived: Regular file. @item '1' -Hard link to another file, of any type, previously archived. +Hard link to another file, of any type, previously archived. Hard links must +not contain file data. @item '2' Symbolic link. @@ -792,9 +854,9 @@ Directory. FIFO special file. @item '7' -Reserved to represent a file to which an implementation has associated -some high-performance attribute. Tarlz treats this type of file as a -regular file (type 0). +Reserved to represent a file to which an implementation has associated some +high-performance attribute (contiguous file). Tarlz treats this type of file +as a regular file (type 0). @end table @@ -817,9 +879,9 @@ Tarlz creates safe archives that allow the reliable detection of invalid or corrupt metadata during decoding even when the integrity checking of lzip can't be used because the lzip members are only decompressed partially, as it happens in parallel @samp{--diff}, @samp{--list}, and @samp{--extract}. -In order to achieve this goal, tarlz makes some changes to the variant of -the pax format that it uses. This chapter describes these changes and the -concrete reasons to implement them. +In order to achieve this goal and avoid some other flaws in the pax format, +tarlz makes some changes to the variant of the pax format that it uses. This +chapter describes these changes and the concrete reasons to implement them. @sp 1 @anchor{crc32} @@ -857,23 +919,28 @@ to the POSIX-2:1993 standard, POSIX.1-2008 recommends selecting extended header field values that allow such tar to create a regular file containing the extended header records as data. This approach is broken because if the extended header is needed because of a long file name, the fields -@samp{prefix} and @samp{name} will be unable to contain the full pathname of -the file. Therefore the files corresponding to both the extended header and -the overridden ustar header will be extracted using truncated file names, -perhaps overwriting existing files or directories. It may be a security risk -to extract a file with a truncated file name. +@samp{name} and @samp{prefix} will be unable to contain the full file name. +(Some tar implementations store the truncated name in the field @samp{name} +alone, truncating the name to only 100 bytes instead of 256). Therefore the +files corresponding to both the extended header and the overridden ustar +header will be extracted using truncated file names, perhaps overwriting +existing files or directories. It may be a security risk to extract a file +with a truncated file name. To avoid this problem, tarlz writes extended headers with all fields zeroed -except size, chksum, typeflag, magic and version. This prevents old tar -programs from extracting the extended records as a file in the wrong place. -Tarlz also sets to zero those fields of the ustar header overridden by -extended records. Finally, tarlz skips members without name when decoding -except when listing. This is needed to detect certain format violations -during parallel extraction. - -If an extended header is required for any reason (for example a file size -larger than @w{8 GiB} or a link name longer than 100 bytes), tarlz moves the -file name also to the extended header to prevent an ustar tool from trying +except @samp{size} (which contains the size of the extended records), +@samp{chksum}, @samp{typeflag}, @samp{magic}, and @samp{version}. In +particular, tarlz sets the fields @samp{name} and @samp{prefix} to zero. +This prevents old tar programs from extracting the extended records as a +file in the wrong place. Tarlz also sets to zero those fields of the ustar +header overridden by extended records. Finally, tarlz skips members with +zeroed @samp{name} and @samp{prefix} when decoding, except when listing. +This is needed to detect certain format violations during parallel +extraction. + +If an extended header is required for any reason (for example a file size of +@w{8 GiB} or larger, or a link name longer than 100 bytes), tarlz also moves +the file name to the extended records to prevent an ustar tool from trying to extract the file or link. This also makes easier during parallel decoding the detection of a tar member split between two lzip members at the boundary between the extended header and the ustar header. @@ -882,17 +949,39 @@ between the extended header and the ustar header. @section As simple as possible (but not simpler) The tarlz format is mainly ustar. Extended pax headers are used only when -needed because the length of a file name or link name, or the size of a file -exceed the limits of the ustar format. Adding @w{1 KiB} of extended headers -to each member just to record subsecond timestamps seems wasteful for a -backup format. Moreover, minimizing the overhead may help recovering the -archive with lziprecover in case of corruption. +needed because the length of a file name or link name, or the size or other +attribute of a file exceed the limits of the ustar format. Adding @w{1 KiB} +of extended header and records to each member just to save subsecond +timestamps seems wasteful for a backup format. Moreover, minimizing the +overhead may help recovering the archive with lziprecover in case of +corruption. Global pax headers are tolerated, but not supported; they are parsed and ignored. Some operations may not behave as expected if the archive contains global headers. @sp 1 +@section Improve reproducibility + +Pax includes by default the process ID of the pax process in the ustar name +of the extended headers, making the archive not reproducible. Tarlz stores +the true name of the file just once, either in the ustar header or in the +extended records, making it easier to produce reproducible archives. + +Pax allows an extended record to have length x-1 or x if x is a power of +ten; @samp{99<97_bytes>} or @samp{100<97_bytes>}. Tarlz minimizes the length +of the record and always produces a length of x-1 in these cases. + +@sp 1 +@section No data in hard links + +Tarlz does not allow data in hard link members. The data (if any) must be in +the member determining the type of the file (which can't be a link). If all +the names of a file are stored as hard links, the type of the file is lost. +Not allowing data in hard links also prevents invalid actions like +extracting file data for a hard link to a symbolic link or to a directory. + +@sp 1 @section Avoid misconversions to/from UTF-8 There is no portable way to tell what charset a text string is coded into. @@ -1047,17 +1136,18 @@ corresponding to the tar member header. This is another reason why the tar headers must provide their own integrity checking. @sp 1 +@anchor{mt-extraction} @section Limitations of multi-threaded extraction Multi-threaded extraction may produce different output than single-threaded extraction in some cases: -During multi-threaded extraction, several independent processes are -simultaneously reading the archive and creating files in the file system. The -archive is not read sequentially. As a consequence, any error or weirdness -in the archive (like a corrupt member or an EOF block in the middle of the -archive) won't be usually detected until part of the archive beyond that -point has been processed. +During multi-threaded extraction, several independent threads are +simultaneously reading the archive and creating files in the file system. +The archive is not read sequentially. As a consequence, any error or +weirdness in the archive (like a corrupt member or an end-of-archive block +in the middle of the archive) won't be usually detected until part of the +archive beyond that point has been processed. If the archive contains two or more tar members with the same name, single-threaded extraction extracts the members in the order they appear in @@ -1070,6 +1160,9 @@ If the same file is extracted through several paths (different member names resolve to the same file in the file system), the result is undefined. (Probably the resulting file will be mangled). +Extraction of a hard link may fail if it is extracted before the file it +links to. + @node Minimum archive sizes @chapter Minimum archive sizes required for multi-threaded block compression @@ -1123,8 +1216,8 @@ tarlz -cf archive.tar.lz a b c @sp 1 @noindent -Example 2: Append files @samp{d} and @samp{e} to the multimember -compressed archive @samp{archive.tar.lz}. +Example 2: Append files @samp{d} and @samp{e} to the multimember compressed +archive @samp{archive.tar.lz}. @example tarlz -rf archive.tar.lz d e @@ -1148,7 +1241,7 @@ Example 4: Create a compressed appendable archive containing directories directory. Then append files @samp{a}, @samp{b}, @samp{c}, @samp{d} and @samp{e} to the archive, all of them contained in a single lzip member. The resulting archive @samp{archive.tar.lz} contains 5 lzip members -(including the EOF member). +(including the end-of-archive member). @example tarlz --dsolid -cf archive.tar.lz dir1 dir2 dir3 @@ -1184,11 +1277,11 @@ tarlz -xf archive.tar.lz a c dir1 @sp 1 @noindent -Example 8: Copy the contents of directory @samp{sourcedir} to the -directory @samp{destdir}. +Example 8: Copy the contents of directory @samp{sourcedir} to the directory +@samp{destdir}. @example -tarlz -C sourcedir -c . | tarlz -C destdir -x +tarlz -C sourcedir --uncompressed -cf - . | tarlz -C destdir -xf - @end example @sp 1 @@ -1202,6 +1295,24 @@ the other members can still be extracted). tarlz -z --no-solid archive.tar @end example +@sp 1 +@noindent +Example 10: Compress the archive @samp{archive.tar} and write the output to +@samp{foo.tar.lz}. + +@example +tarlz -z -o foo.tar.lz archive.tar +@end example + +@sp 1 +@noindent +Example 11: Concatenate and compress two archives @samp{archive1.tar} and +@samp{archive2.tar}, and write the output to @samp{foo.tar.lz}. + +@example +tarlz -A archive1.tar archive2.tar | tarlz -z -o foo.tar.lz +@end example + @node Problems @chapter Reporting bugs @@ -33,15 +33,12 @@ std::vector< std::string > patterns; // list of patterns void Exclude::add_pattern( const std::string & arg ) { patterns.push_back( arg ); } -void Exclude::clear() { patterns.clear(); } - bool Exclude::excluded( const char * const filename ) { if( patterns.empty() ) return false; const char * p = filename; - while( *p ) - { + do { for( unsigned i = 0; i < patterns.size(); ++i ) // ignore a trailing sequence starting with '/' in filename #ifdef FNM_LEADING_DIR @@ -52,6 +49,6 @@ bool Exclude::excluded( const char * const filename ) #endif while( *p && *p != '/' ) ++p; // skip component while( *p == '/' ) ++p; // skip slashes - } + } while( *p ); return false; } diff --git a/extended.cc b/extended.cc index 721634a..f05d15f 100644 --- a/extended.cc +++ b/extended.cc @@ -18,6 +18,7 @@ #define _FILE_OFFSET_BITS 64 #include <cctype> +#include <cerrno> #include <cstdio> #include <cstdlib> @@ -29,21 +30,13 @@ const CRC32 crc32c( true ); namespace { -unsigned decimal_digits( unsigned long long value ) - { - unsigned digits = 1; - while( value >= 10 ) { value /= 10; ++digits; } - return digits; - } - - unsigned long long record_size( const unsigned keyword_size, const unsigned long value_size ) { - // size = ' ' + keyword + '=' + value + '\n' + /* length + ' ' + keyword + '=' + value + '\n' + minimize length; prefer "99<97_bytes>" to "100<97_bytes>" */ unsigned long long size = 1 + keyword_size + 1 + value_size + 1; - const unsigned d1 = decimal_digits( size ); - size += decimal_digits( d1 + size ); + size += decimal_digits( decimal_digits( size ) + size ); return size; } @@ -54,14 +47,14 @@ unsigned long long parse_decimal( const char * const ptr, { unsigned long long result = 0; unsigned long long i = 0; - while( i < size && std::isspace( ptr[i] ) ) ++i; + while( i < size && std::isspace( (unsigned char)ptr[i] ) ) ++i; if( !std::isdigit( (unsigned char)ptr[i] ) ) { if( tailp ) *tailp = ptr; return 0; } for( ; i < size && std::isdigit( (unsigned char)ptr[i] ); ++i ) { const unsigned long long prev = result; result *= 10; result += ptr[i] - '0'; - if( result < prev || result > max_file_size ) // overflow + if( result < prev || result > LLONG_MAX ) // overflow { if( tailp ) *tailp = ptr; return 0; } } if( tailp ) *tailp = ptr + i; @@ -92,21 +85,25 @@ unsigned char xdigit( const unsigned value ) } void print_hex( char * const buf, int size, unsigned long long num ) - { - while( --size >= 0 ) { buf[size] = xdigit( num & 0x0F ); num >>= 4; } - } + { while( --size >= 0 ) { buf[size] = xdigit( num & 0x0F ); num >>= 4; } } void print_decimal( char * const buf, int size, unsigned long long num ) - { while( --size >= 0 ) { buf[size] = '0' + ( num % 10 ); num /= 10; } } - + { while( --size >= 0 ) { buf[size] = num % 10 + '0'; num /= 10; } } -bool print_record( char * const buf, const unsigned long long size, - const char * keyword, const std::string & value ) +unsigned long long print_size_keyword( char * const buf, + const unsigned long long size, const char * keyword ) { // "size keyword=value\n" unsigned long long pos = decimal_digits( size ); print_decimal( buf, pos, size ); buf[pos++] = ' '; while( *keyword ) { buf[pos++] = *keyword; ++keyword; } buf[pos++] = '='; + return pos; + } + +bool print_record( char * const buf, const unsigned long long size, + const char * keyword, const std::string & value ) + { + unsigned long long pos = print_size_keyword( buf, size, keyword ); std::memcpy( buf + pos, value.c_str(), value.size() ); pos += value.size(); buf[pos++] = '\n'; return pos == size; @@ -115,18 +112,71 @@ bool print_record( char * const buf, const unsigned long long size, bool print_record( char * const buf, const int size, const char * keyword, const unsigned long long value ) { - // "size keyword=value\n" - int pos = decimal_digits( size ); - print_decimal( buf, pos, size ); buf[pos++] = ' '; - while( *keyword ) { buf[pos++] = *keyword; ++keyword; } buf[pos++] = '='; - const int vd = decimal_digits( value ); + int pos = print_size_keyword( buf, size, keyword ); + const int vd = decimal_digits( value ); print_decimal( buf + pos, vd, value ); pos += vd; buf[pos++] = '\n'; return pos == size; } +bool print_record( char * const buf, const int size, + const char * keyword, const Etime & value ) + { + int pos = print_size_keyword( buf, size, keyword ); + pos += value.print( buf + pos ); buf[pos++] = '\n'; + return pos == size; + } + } // end namespace +unsigned Etime::decimal_size() const + { + unsigned size = 1 + ( sec_ < 0 ); // first digit + negative sign + for( long long n = sec_; n >= 10 || n <= -10; n /= 10 ) ++size; + if( nsec_ > 0 && nsec_ <= 999999999 ) + { size += 2; // decimal point + first fractional digit + for( int n = nsec_; n >= 10; n /= 10 ) ++size; } + return size; + } + +unsigned Etime::print( char * const buf ) const + { + int len = 0; + if( nsec_ > 0 && nsec_ <= 999999999 ) + { for( int n = nsec_; n > 0; n /= 10 ) buf[len++] = n % 10 + '0'; + buf[len++] = '.'; } + long long n = sec_; + do { long long on = n; n /= 10; buf[len++] = llabs( on - 10 * n ) + '0'; } + while( n != 0 ); + if( sec_ < 0 ) buf[len++] = '-'; + for( int i = 0; i < len / 2; ++i ) std::swap( buf[i], buf[len-i-1] ); + return len; + } + +bool Etime::parse( const char * const ptr, const char ** const tailp, + const long long size ) + { + char * tail; + errno = 0; + long long s = strtoll( ptr, &tail, 10 ); + if( tail == ptr || errno || + ( *tail != 0 && *tail != '\n' && *tail != '.' ) ) return false; + int ns = 0; + if( *tail == '.' ) // parse nanoseconds and any extra digits + { + ++tail; + if( tail - ptr >= size || !std::isdigit( (unsigned char)*tail ) ) + return false; + for( int factor = 100000000; + tail - ptr < size && std::isdigit( (unsigned char)*tail ); + ++tail, factor /= 10 ) + ns += factor * ( *tail - '0' ); + } + sec_ = s; nsec_ = ns; if( tailp ) *tailp = tail; + return true; + } + + std::vector< std::string > Extended::unknown_keywords; const std::string Extended::crc_record( "22 GNU.crc32=00000000\n" ); @@ -136,7 +186,14 @@ void Extended::calculate_sizes() const path_recsize_ = path_.size() ? record_size( 4, path_.size() ) : 0; file_size_recsize_ = ( file_size_ > 0 ) ? record_size( 4, decimal_digits( file_size_ ) ) : 0; + uid_recsize_ = ( uid_ >= 0 ) ? record_size( 3, decimal_digits( uid_ ) ) : 0; + gid_recsize_ = ( gid_ >= 0 ) ? record_size( 3, decimal_digits( gid_ ) ) : 0; + atime_recsize_ = + atime_.out_of_ustar_range() ? record_size( 5, atime_.decimal_size() ) : 0; + mtime_recsize_ = + mtime_.out_of_ustar_range() ? record_size( 5, mtime_.decimal_size() ) : 0; edsize_ = linkpath_recsize_ + path_recsize_ + file_size_recsize_ + + uid_recsize_ + gid_recsize_ + atime_recsize_ + mtime_recsize_ + crc_record.size(); padded_edsize_ = round_up( edsize_ ); full_size_ = header_size + padded_edsize_; @@ -153,20 +210,19 @@ void Extended::unknown_keyword( const char * const buf, for( unsigned i = 0; i < unknown_keywords.size(); ++i ) if( keyword == unknown_keywords[i] ) return; unknown_keywords.push_back( keyword ); - std::string msg( "Ignoring unknown extended header keyword '" ); - msg += keyword; msg += '\''; - show_error( msg.c_str() ); + print_error( 0, "Ignoring unknown extended header keyword '%s'", + keyword.c_str() ); } -// Return the size of the extended block, or -1 if error. +// Return the size of the extended block, -1 if error, -2 if out of memory. long long Extended::format_block( Resizable_buffer & rbuf ) const { if( empty() ) return 0; // no extended data const unsigned long long bufsize = full_size(); // recalculate sizes if( edsize_ <= 0 ) return 0; // no extended data if( edsize_ >= 1LL << 33 ) return -1; // too much extended data - if( !rbuf.resize( bufsize ) ) return -1; // extended block buffer + if( !rbuf.resize( bufsize ) ) return -2; // extended block buffer uint8_t * const header = rbuf.u8(); // extended header char * const buf = rbuf() + header_size; // extended records init_tar_header( header ); @@ -185,13 +241,27 @@ long long Extended::format_block( Resizable_buffer & rbuf ) const !print_record( buf + pos, file_size_recsize_, "size", file_size_ ) ) return -1; pos += file_size_recsize_; + if( uid_recsize_ && !print_record( buf + pos, uid_recsize_, "uid", uid_ ) ) + return -1; + pos += uid_recsize_; + if( gid_recsize_ && !print_record( buf + pos, gid_recsize_, "gid", gid_ ) ) + return -1; + pos += gid_recsize_; + if( atime_recsize_ && + !print_record( buf + pos, atime_recsize_, "atime", atime_ ) ) + return -1; + pos += atime_recsize_; + if( mtime_recsize_ && + !print_record( buf + pos, mtime_recsize_, "mtime", mtime_ ) ) + return -1; + pos += mtime_recsize_; const unsigned crc_size = Extended::crc_record.size(); std::memcpy( buf + pos, Extended::crc_record.c_str(), crc_size ); pos += crc_size; if( pos != edsize_ ) return -1; print_hex( buf + edsize_ - 9, 8, crc32c.windowed_crc( (const uint8_t *)buf, edsize_ - 9, edsize_ ) ); - if( padded_edsize_ > edsize_ ) // wipe padding + if( padded_edsize_ > edsize_ ) // set padding to zero std::memset( buf + edsize_, 0, padded_edsize_ - edsize_ ); crc_present_ = true; return bufsize; @@ -219,7 +289,7 @@ bool Extended::parse( const char * const buf, const unsigned long long edsize, while( len > 1 && tail[5+len-1] == '/' ) --len; // trailing '/' path_.assign( tail + 5, len ); // this also truncates path_ at the first embedded null character - path_.assign( remove_leading_dotslash( path_.c_str() ) ); + path_.assign( remove_leading_dotslash( path_.c_str(), &removed_prefix ) ); } else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 ) { @@ -233,8 +303,34 @@ bool Extended::parse( const char * const buf, const unsigned long long edsize, if( file_size_ != 0 && !permissive ) return false; file_size_ = parse_decimal( tail + 5, &tail, rest - 5 ); // parse error or size fits in ustar header - if( file_size_ < 1LL << 33 || tail != buf + ( pos + rsize - 1 ) ) - return false; + if( file_size_ < 1LL << 33 || file_size_ > max_file_size || + tail != buf + ( pos + rsize - 1 ) ) return false; + } + else if( rest > 4 && std::memcmp( tail, "uid=", 4 ) == 0 ) + { + if( uid_ >= 0 && !permissive ) return false; + uid_ = parse_decimal( tail + 4, &tail, rest - 4 ); + // parse error or uid fits in ustar header + if( uid_ < 1 << 21 || tail != buf + ( pos + rsize - 1 ) ) return false; + } + else if( rest > 4 && std::memcmp( tail, "gid=", 4 ) == 0 ) + { + if( gid_ >= 0 && !permissive ) return false; + gid_ = parse_decimal( tail + 4, &tail, rest - 4 ); + // parse error or gid fits in ustar header + if( gid_ < 1 << 21 || tail != buf + ( pos + rsize - 1 ) ) return false; + } + else if( rest > 6 && std::memcmp( tail, "atime=", 6 ) == 0 ) + { + if( atime_.isvalid() && !permissive ) return false; + if( !atime_.parse( tail + 6, &tail, rest - 6 ) || // parse error + tail != buf + ( pos + rsize - 1 ) ) return false; + } + else if( rest > 6 && std::memcmp( tail, "mtime=", 6 ) == 0 ) + { + if( mtime_.isvalid() && !permissive ) return false; + if( !mtime_.parse( tail + 6, &tail, rest - 6 ) || // parse error + tail != buf + ( pos + rsize - 1 ) ) return false; } else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 ) { @@ -259,7 +355,8 @@ bool Extended::parse( const char * const buf, const unsigned long long edsize, } -// if needed, copy linkpath, path and file_size from ustar header +/* If not already initialized, copy linkpath, path, file_size, uid, gid, + atime, and mtime from ustar header. */ void Extended::fill_from_ustar( const Tar_header header ) { if( linkpath_.empty() ) // copy linkpath from ustar header @@ -275,7 +372,7 @@ void Extended::fill_from_ustar( const Tar_header header ) } if( path_.empty() ) // copy path from ustar header - { + { // the entire path may be in prefix char stored_name[prefix_l+1+name_l+1]; int len = 0; while( len < prefix_l && header[prefix_o+len] ) @@ -285,13 +382,19 @@ void Extended::fill_from_ustar( const Tar_header header ) { stored_name[len] = header[name_o+i]; ++len; } while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/' stored_name[len] = 0; - path( remove_leading_dotslash( stored_name ) ); + path( remove_leading_dotslash( stored_name, &removed_prefix ) ); } const Typeflag typeflag = (Typeflag)header[typeflag_o]; if( file_size_ == 0 && // copy file_size from ustar header ( typeflag == tf_regular || typeflag == tf_hiperf ) ) file_size( parse_octal( header + size_o, size_l ) ); + if( uid_ < 0 ) uid_ = parse_octal( header + uid_o, uid_l ); + if( gid_ < 0 ) gid_ = parse_octal( header + gid_o, gid_l ); + if( !atime_.isvalid() ) + atime_.set( parse_octal( header + mtime_o, mtime_l ) ); // 33 bits + if( !mtime_.isvalid() ) + mtime_.set( parse_octal( header + mtime_o, mtime_l ) ); // 33 bits } @@ -301,7 +404,7 @@ void Extended::fill_from_ustar( const Tar_header header ) long long Extended::get_file_size_and_reset( const Tar_header header ) { const long long tmp = file_size_; - file_size( 0 ); + file_size( 0 ); // reset full_size_ const Typeflag typeflag = (Typeflag)header[typeflag_o]; if( typeflag == tf_regular || typeflag == tf_hiperf ) { diff --git a/lzip_index.cc b/lzip_index.cc index baf4513..b886d2b 100644 --- a/lzip_index.cc +++ b/lzip_index.cc @@ -20,10 +20,6 @@ #include <algorithm> #include <cerrno> #include <cstdio> -#include <cstring> -#include <string> -#include <vector> -#include <stdint.h> #include <unistd.h> #include "tarlz.h" diff --git a/lzip_index.h b/lzip_index.h index dd6ad67..af8aaa4 100644 --- a/lzip_index.h +++ b/lzip_index.h @@ -16,7 +16,7 @@ */ #ifndef INT64_MAX -#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL +#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL #endif @@ -15,16 +15,17 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* - Exit status: 0 for a normal exit, 1 for environmental problems (file not - found, files differ, invalid flags, I/O errors, etc), 2 to indicate a - corrupt or invalid input file, 3 for an internal consistency error - (e.g. bug) which caused tarlz to panic. + Exit status: 0 for a normal exit, 1 for environmental problems + (file not found, files differ, invalid command line options, I/O errors, + etc), 2 to indicate a corrupt or invalid input file, 3 for an internal + consistency error (e.g., bug) which caused tarlz to panic. */ #define _FILE_OFFSET_BITS 64 #include <cctype> #include <cerrno> +#include <cstdarg> #include <cstdio> #include <cstdlib> #include <ctime> @@ -81,30 +82,29 @@ void show_help( const long num_online ) "(uncompressed) tar. Moreover, the option '--keep-damaged' can be used to\n" "recover as much data as possible from each damaged member, and lziprecover\n" "can be used to recover some of the damaged members.\n" - "\nUsage: %s [options] [files]\n", invocation_name ); - std::printf( "\nOptions:\n" + "\nUsage: %s operation [options] [files]\n", invocation_name ); + std::printf( "\nOperations:\n" " --help display this help and exit\n" " -V, --version output version information and exit\n" " -A, --concatenate append archives to the end of an archive\n" - " -B, --data-size=<bytes> set target size of input data blocks [2x8=16 MiB]\n" " -c, --create create a new archive\n" - " -C, --directory=<dir> change to directory <dir>\n" " -d, --diff find differences between archive and file system\n" - " --ignore-ids ignore differences in owner and group IDs\n" " --delete delete files/directories from an archive\n" - " --exclude=<pattern> exclude files matching a shell pattern\n" + " -r, --append append files to the end of an archive\n" + " -t, --list list the contents of an archive\n" + " -x, --extract extract files/directories from an archive\n" + " -z, --compress compress existing POSIX tar archives\n" + " --check-lib check version of lzlib and exit\n" + "\nOptions:\n" + " -B, --data-size=<bytes> set target size of input data blocks [2x8=16 MiB]\n" + " -C, --directory=<dir> change to directory <dir>\n" " -f, --file=<archive> use archive file <archive>\n" " -h, --dereference follow symlinks; archive the files they point to\n" - " --mtime=<date> use <date> as mtime for files added to archive\n" " -n, --threads=<n> set number of (de)compression threads [%ld]\n" " -o, --output=<file> compress to <file>\n" " -p, --preserve-permissions don't subtract the umask on extraction\n" " -q, --quiet suppress all messages\n" - " -r, --append append files to the end of an archive\n" - " -t, --list list the contents of an archive\n" " -v, --verbose verbosely list files processed\n" - " -x, --extract extract files/directories from an archive\n" - " -z, --compress compress existing POSIX tar archives\n" " -0 .. -9 set compression level [default 6]\n" " --uncompressed don't compress the archive created\n" " --asolid create solidly compressed appendable archive\n" @@ -115,10 +115,13 @@ void show_help( const long num_online ) " --anonymous equivalent to '--owner=root --group=root'\n" " --owner=<owner> use <owner> name/ID for files added to archive\n" " --group=<group> use <group> name/ID for files added to archive\n" + " --exclude=<pattern> exclude files matching a shell pattern\n" + " --ignore-ids ignore differences in owner and group IDs\n" + " --ignore-overflow ignore mtime overflow differences on 32-bit\n" " --keep-damaged don't delete partially extracted files\n" " --missing-crc exit with error status if missing extended CRC\n" + " --mtime=<date> use <date> as mtime for files added to archive\n" " --out-slots=<n> number of 1 MiB output packets buffered [64]\n" - " --check-lib compare version of lzlib.h with liblz.{a,so}\n" " --warn-newer warn if any file is newer than the archive\n" /* " --permissive allow repeated extended headers and records\n"*/, num_online ); @@ -126,10 +129,12 @@ void show_help( const long num_online ) { std::printf( " --debug=<level> (0-1) print debug statistics to stderr\n" ); } - std::printf( "\nExit status: 0 for a normal exit, 1 for environmental problems (file not\n" - "found, files differ, invalid flags, I/O errors, etc), 2 to indicate a\n" - "corrupt or invalid input file, 3 for an internal consistency error (e.g. bug)\n" - "which caused tarlz to panic.\n" + std::printf( "\nIf no archive is specified, tarlz tries to read it from standard input or\n" + "write it to standard output.\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems\n" + "(file not found, files differ, invalid command line options, I/O errors,\n" + "etc), 2 to indicate a corrupt or invalid input file, 3 for an internal\n" + "consistency error (e.g., bug) which caused tarlz to panic.\n" "\nReport bugs to lzip-bug@nongnu.org\n" "Tarlz home page: http://www.nongnu.org/lzip/tarlz.html\n" ); } @@ -204,56 +209,63 @@ int check_lib() } -// separate large numbers >= 100_000 in groups of 3 digits using '_' -const char * format_num3( unsigned long long num ) +// separate numbers of 6 or more digits in groups of 3 digits using '_' +const char * format_num3( long long num ) { const char * const si_prefix = "kMGTPEZY"; const char * const binary_prefix = "KMGTPEZY"; - enum { buffers = 8, bufsize = 4 * sizeof (long long) }; + enum { buffers = 8, bufsize = 4 * sizeof num }; static char buffer[buffers][bufsize]; // circle of static buffers for printf static int current = 0; char * const buf = buffer[current++]; current %= buffers; char * p = buf + bufsize - 1; // fill the buffer backwards *p = 0; // terminator + const bool negative = num < 0; char prefix = 0; // try binary first, then si - for( int i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i ) + for( int i = 0; i < 8 && num != 0 && ( num / 1024 ) * 1024 == num; ++i ) { num /= 1024; prefix = binary_prefix[i]; } if( prefix ) *(--p) = 'i'; else - for( int i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i ) + for( int i = 0; i < 8 && num != 0 && ( num / 1000 ) * 1000 == num; ++i ) { num /= 1000; prefix = si_prefix[i]; } if( prefix ) *(--p) = prefix; - const bool split = num >= 100000; + const bool split = num >= 100000 || num <= -100000; for( int i = 0; ; ) { - *(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break; + long long onum = num; num /= 10; + *(--p) = llabs( onum - 10 * num ) + '0'; if( num == 0 ) break; if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; } } + if( negative ) *(--p) = '-'; return p; } -unsigned long long getnum( const char * const arg, - const char * const option_name, - const unsigned long long llimit, - const unsigned long long ulimit ) +void show_option_error( const char * const arg, const char * const msg, + const char * const option_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: '%s': %s option '%s'.\n", + program_name, arg, msg, option_name ); + } + + +long long getnum( const char * const arg, const char * const option_name, + const long long llimit = LLONG_MIN, + const long long ulimit = LLONG_MAX ) { char * tail; errno = 0; - unsigned long long result = strtoull( arg, &tail, 0 ); + long long result = strtoll( arg, &tail, 0 ); if( tail == arg ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Bad or missing numerical argument in " - "option '%s'.\n", program_name, option_name ); - std::exit( 1 ); - } + { show_option_error( arg, "Bad or missing numerical argument in", + option_name ); std::exit( 1 ); } if( !errno && tail[0] ) { - const unsigned factor = ( tail[1] == 'i' ) ? 1024 : 1000; + const int factor = ( tail[1] == 'i' ) ? 1024 : 1000; int exponent = 0; // 0 = bad multiplier switch( tail[0] ) { @@ -268,15 +280,12 @@ unsigned long long getnum( const char * const arg, case 'k': if( factor == 1000 ) exponent = 1; break; } if( exponent <= 0 ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Bad multiplier in numerical argument of " - "option '%s'.\n", program_name, option_name ); - std::exit( 1 ); - } + { show_option_error( arg, "Bad multiplier in numerical argument of", + option_name ); std::exit( 1 ); } for( int i = 0; i < exponent; ++i ) { - if( ulimit / factor >= result ) result *= factor; + if( ( result >= 0 && LLONG_MAX / factor >= result ) || + ( result < 0 && LLONG_MIN / factor <= result ) ) result *= factor; else { errno = ERANGE; break; } } } @@ -284,8 +293,8 @@ unsigned long long getnum( const char * const arg, if( errno ) { if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] " - "in option '%s'.\n", program_name, format_num3( llimit ), + std::fprintf( stderr, "%s: '%s': Value out of limits [%s,%s] in " + "option '%s'.\n", program_name, arg, format_num3( llimit ), format_num3( ulimit ), option_name ); std::exit( 1 ); } @@ -315,51 +324,61 @@ void set_mode( Program_mode & program_mode, const Program_mode new_mode ) } -void set_mtime( long long & mtime, const char * arg, const char * const pn ) +// parse time as 'long long' even if time_t is 32-bit +long long parse_mtime( const char * arg, const char * const pn ) { - if( *arg == '@' ) - { mtime = getnum( arg + 1, pn, 0, ( 1ULL << 33 ) - 1 ); return; } + if( *arg == '@' ) return getnum( arg + 1, pn ); else if( *arg == '.' || *arg == '/' ) { struct stat st; - if( stat( arg, &st ) == 0 ) { mtime = st.st_mtime; return; } - show_file_error( arg, "Can't stat", errno ); std::exit( 1 ); + if( stat( arg, &st ) == 0 ) return st.st_mtime; + show_file_error( arg, "Can't stat mtime reference file", errno ); + std::exit( 1 ); } - else // format 'YYYY-MM-DD HH:MM:SS' + else // format '[-]YYYY-MM-DD[[[<separator>HH]:MM]:SS]' { - unsigned y, mo, d, h, m, s; - const int n = std::sscanf( arg, "%u-%u-%u %u:%u:%u", - &y, &mo, &d, &h, &m, &s ); - if( n == 6 && y >= 1970 && mo >= 1 ) + long long y; // long long because 2147483648-01-01 overflows int + unsigned mo, d, h, m, s; + char sep; + const int n = std::sscanf( arg, "%lld-%u-%u%c%u:%u:%u", + &y, &mo, &d, &sep, &h, &m, &s ); + if( n >= 3 && n <= 7 && n != 4 && ( n == 3 || sep == ' ' || sep == 'T' ) ) { - struct tm t; - t.tm_year = y - 1900; t.tm_mon = mo - 1; t.tm_mday = d; - t.tm_hour = h; t.tm_min = m; t.tm_sec = s; t.tm_isdst = -1; - mtime = std::mktime( &t ); if( mtime >= 0 ) return; + if( y >= INT_MIN + 1900 && y <= INT_MAX && mo >= 1 && mo <= 12 ) + { + struct tm t; + t.tm_year = y - 1900; t.tm_mon = mo - 1; t.tm_mday = d; + t.tm_hour = ( n >= 5 ) ? h : 0; t.tm_min = ( n >= 6 ) ? m : 0; + t.tm_sec = ( n >= 7 ) ? s : 0; t.tm_isdst = -1; + errno = 0; + const long long mtime = std::mktime( &t ); + if( mtime != -1 || errno == 0 ) return mtime; // valid datetime + } + show_option_error( arg, "Date out of limits in", pn ); std::exit( 1 ); } } - show_error( "Invalid mtime.", 0, true ); std::exit( 1 ); + show_option_error( arg, "Unknown date format in", pn ); std::exit( 1 ); } -void set_owner( int & owner, const char * const arg, const char * const pn ) +long long parse_owner( const char * const arg, const char * const pn ) { const struct passwd * const pw = getpwnam( arg ); - if( pw ) owner = pw->pw_uid; - else if( std::isdigit( (unsigned char)arg[0] ) ) - owner = getnum( arg, pn, 0, INT_MAX ); - else if( std::strcmp( arg, "root" ) == 0 ) owner = 0; - else { show_file_error( arg, "Invalid owner" ); std::exit( 1 ); } + if( pw ) return pw->pw_uid; + if( std::isdigit( (unsigned char)arg[0] ) ) + return getnum( arg, pn, 0, LLONG_MAX ); + if( std::strcmp( arg, "root" ) == 0 ) return 0; + show_option_error( arg, "Invalid owner in", pn ); std::exit( 1 ); } -void set_group( int & group, const char * const arg, const char * const pn ) +long long parse_group( const char * const arg, const char * const pn ) { const struct group * const gr = getgrnam( arg ); - if( gr ) group = gr->gr_gid; - else if( std::isdigit( (unsigned char)arg[0] ) ) - group = getnum( arg, pn, 0, INT_MAX ); - else if( std::strcmp( arg, "root" ) == 0 ) group = 0; - else { show_file_error( arg, "Invalid group" ); std::exit( 1 ); } + if( gr ) return gr->gr_gid; + if( std::isdigit( (unsigned char)arg[0] ) ) + return getnum( arg, pn, 0, LLONG_MAX ); + if( std::strcmp( arg, "root" ) == 0 ) return 0; + show_option_error( arg, "Invalid group in", pn ); std::exit( 1 ); } } // end namespace @@ -378,7 +397,7 @@ int open_instream( const std::string & name ) return -1; } struct stat st; // infd must not be a directory if( fstat( infd, &st ) == 0 && S_ISDIR( st.st_mode ) ) - { show_file_error( name.c_str(), "Is a directory." ); + { show_file_error( name.c_str(), "Can't read. Is a directory." ); close( infd ); return -1; } return infd; } @@ -397,9 +416,7 @@ int open_outstream( const std::string & name, const bool create, const char * msg = !create ? "Error opening file" : ( ( errno == EEXIST ) ? "Skipping file" : "Can't create file" ); if( !rbufp ) show_file_error( name.c_str(), msg, errno ); - else - snprintf( (*rbufp)(), (*rbufp).size(), "%s: %s: %s\n", name.c_str(), - msg, std::strerror( errno ) ); + else format_file_error( *rbufp, name.c_str(), msg, errno ); } return outfd; } @@ -432,6 +449,41 @@ void show_error( const char * const msg, const int errcode, const bool help ) } +bool format_error( Resizable_buffer & rbuf, const int errcode, + const char * const format, ... ) + { + if( verbosity < 0 ) { rbuf.resize( 1 ); rbuf()[0] = 0; return false; } + va_list args; + for( int i = 0; i < 2; ++i ) // resize rbuf if not large enough + { + int len = snprintf( rbuf(), rbuf.size(), "%s: ", program_name ); + if( len >= (int)rbuf.size() && !rbuf.resize( len + 1 ) ) break; + va_start( args, format ); + len += vsnprintf( rbuf() + len, rbuf.size() - len, format, args ); + va_end( args ); + if( len >= (int)rbuf.size() && !rbuf.resize( len + 1 ) ) break; + if( errcode <= 0 ) rbuf()[len++] = '\n'; + else len += snprintf( rbuf() + len, rbuf.size() - len, ": %s\n", + std::strerror( errcode ) ); + if( len < (int)rbuf.size() || !rbuf.resize( len + 1 ) ) break; + } + return true; + } + + +void print_error( const int errcode, const char * const format, ... ) + { + if( verbosity < 0 ) return; + va_list args; + std::fprintf( stderr, "%s: ", program_name ); + va_start( args, format ); + std::vfprintf( stderr, format, args ); + va_end( args ); + if( errcode <= 0 ) std::fputc( '\n', stderr ); + else std::fprintf( stderr, ": %s\n", std::strerror( errcode ) ); + } + + void format_file_error( std::string & estr, const char * const filename, const char * const msg, const int errcode ) { @@ -442,6 +494,19 @@ void format_file_error( std::string & estr, const char * const filename, estr += '\n'; } +bool format_file_error( Resizable_buffer & rbuf, const char * const filename, + const char * const msg, const int errcode ) + { + if( verbosity < 0 ) { rbuf.resize( 1 ); rbuf()[0] = 0; return false; } + for( int i = 0; i < 2; ++i ) // resize rbuf if not large enough + { + const int len = snprintf( rbuf(), rbuf.size(), "%s: %s: %s%s%s\n", + program_name, filename, msg, ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); + if( len < (int)rbuf.size() || !rbuf.resize( len + 1 ) ) break; + } + return true; + } void show_file_error( const char * const filename, const char * const msg, const int errcode ) @@ -467,7 +532,7 @@ int main( const int argc, const char * const argv[] ) enum { opt_ano = 256, opt_aso, opt_bso, opt_chk, opt_crc, opt_dbg, opt_del, opt_dso, opt_exc, opt_grp, opt_hlp, opt_id, opt_kd, opt_mti, opt_nso, - opt_out, opt_own, opt_per, opt_sol, opt_un, opt_wn }; + opt_ofl, opt_out, opt_own, opt_per, opt_sol, opt_un, opt_wn }; const Arg_parser::Option options[] = { { '0', 0, Arg_parser::no }, @@ -513,6 +578,7 @@ int main( const int argc, const char * const argv[] ) { opt_crc, "missing-crc", Arg_parser::no }, { opt_mti, "mtime", Arg_parser::yes }, { opt_nso, "no-solid", Arg_parser::no }, + { opt_ofl, "ignore-overflow", Arg_parser::no }, { opt_out, "out-slots", Arg_parser::yes }, { opt_own, "owner", Arg_parser::yes }, { opt_per, "permissive", Arg_parser::no }, @@ -571,8 +637,8 @@ int main( const int argc, const char * const argv[] ) case 'V': show_version(); return 0; case 'x': set_mode( cl_opts.program_mode, m_extract ); break; case 'z': set_mode( cl_opts.program_mode, m_compress ); z_pn = pn; break; - case opt_ano: set_owner( cl_opts.owner, "root", pn ); - set_group( cl_opts.group, "root", pn ); break; + case opt_ano: cl_opts.uid = parse_owner( "root", pn ); + cl_opts.gid = parse_group( "root", pn ); break; case opt_aso: cl_opts.solidity = asolid; break; case opt_bso: cl_opts.solidity = bsolid; break; case opt_crc: cl_opts.missing_crc = true; break; @@ -581,14 +647,16 @@ int main( const int argc, const char * const argv[] ) case opt_del: set_mode( cl_opts.program_mode, m_delete ); break; case opt_dso: cl_opts.solidity = dsolid; break; case opt_exc: Exclude::add_pattern( sarg ); break; - case opt_grp: set_group( cl_opts.group, arg, pn ); break; + case opt_grp: cl_opts.gid = parse_group( arg, pn ); break; case opt_hlp: show_help( num_online ); return 0; case opt_id: cl_opts.ignore_ids = true; break; case opt_kd: cl_opts.keep_damaged = true; break; - case opt_mti: set_mtime( cl_opts.mtime, arg, pn ); break; + case opt_mti: cl_opts.mtime = parse_mtime( arg, pn ); + cl_opts.mtime_set = true; break; case opt_nso: cl_opts.solidity = no_solid; break; + case opt_ofl: cl_opts.ignore_overflow = true; break; case opt_out: cl_opts.out_slots = getnum( arg, pn, 1, 1024 ); break; - case opt_own: set_owner( cl_opts.owner, arg, pn ); break; + case opt_own: cl_opts.uid = parse_owner( arg, pn ); break; case opt_per: cl_opts.permissive = true; break; case opt_sol: cl_opts.solidity = solid; break; case opt_un: cl_opts.level = -1; break; @@ -597,7 +665,7 @@ int main( const int argc, const char * const argv[] ) } } // end process options - if( cl_opts.program_mode != m_compress && cl_opts.output_filename.size() ) + if( cl_opts.program_mode != m_compress && o_pn ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Option '%s' can only be used with " @@ -611,6 +679,14 @@ int main( const int argc, const char * const argv[] ) program_name, f_pn, z_pn ); return 1; } + if( cl_opts.program_mode == m_compress && + ( cl_opts.level < 0 || cl_opts.level > 9 ) ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Option '--uncompressed' can't be used with '%s'.\n", + program_name, z_pn ); + return 1; + } #if !defined LZ_API_VERSION || LZ_API_VERSION < 1012 // compile-time test #error "lzlib 1.12 or newer needed." @@ -624,12 +700,17 @@ int main( const int argc, const char * const argv[] ) setmode( STDOUT_FILENO, O_BINARY ); #endif + if( cl_opts.data_size <= 0 && cl_opts.level >= 0 && cl_opts.level <= 9 ) + { + if( cl_opts.level == 0 ) cl_opts.data_size = 1 << 20; + else cl_opts.data_size = 2 * option_mapping[cl_opts.level].dictionary_size; + } if( cl_opts.num_workers < 0 ) // 0 disables multi-threading cl_opts.num_workers = std::min( num_online, max_workers ); switch( cl_opts.program_mode ) { - case m_none: show_error( "Missing operation.", 0, true ); return 1; + case m_none: show_error( "Missing operation.", 0, true ); return 1; case m_append: case m_create: return encode( cl_opts ); case m_compress: return compress( cl_opts ); @@ -57,7 +57,7 @@ inline void init_tar_header( Tar_header header ) // set magic and version } inline void print_octal( uint8_t * const buf, int size, unsigned long long num ) - { while( --size >= 0 ) { buf[size] = '0' + ( num % 8 ); num /= 8; } } + { while( --size >= 0 ) { buf[size] = num % 8 + '0'; num /= 8; } } // Round "size" to the next multiple of header size (512). @@ -70,6 +70,14 @@ inline unsigned long long round_up( const unsigned long long size ) } +inline int decimal_digits( unsigned long long value ) + { + int digits = 1; + while( value >= 10 ) { value /= 10; ++digits; } + return digits; + } + + inline bool dotdot_at_i( const char * const filename, const int i ) { return ( filename[i] == '.' && filename[i+1] == '.' && @@ -119,12 +127,45 @@ public: }; -class Extended // stores metadata from/for extended records +inline bool uid_in_ustar_range( const long long uid ) // also for gid + { return uid >= 0 && uid < 1 << 21; } + +inline bool time_in_ustar_range( const long long seconds ) + { return seconds >= 0 && seconds < 1LL << 33; } + + +/* The sign of the seconds field applies to the whole time value. + A nanoseconds value out of range means an invalid time. */ +class Etime // time since (or before) the epoch + { + long long sec_; + int nsec_; // range [0, 999_999_999] + +public: + Etime() : sec_( 0 ), nsec_( -1 ) {} + void reset() { sec_ = 0; nsec_ = -1; } + void set( const long long s ) { sec_ = s; nsec_ = 0; } + long long sec() const { return sec_; } + int nsec() const { return nsec_; } + bool isvalid() const { return nsec_ >= 0 && nsec_ <= 999999999; } + bool out_of_ustar_range() const + { return isvalid() && !time_in_ustar_range( sec_ ); } + + unsigned decimal_size() const; + unsigned print( char * const buf ) const; + bool parse( const char * const ptr, const char ** const tailp, + const long long size ); + }; + + +class Extended // stores metadata from/for extended records { static std::vector< std::string > unknown_keywords; // already diagnosed std::string linkpath_; // these are the real metadata std::string path_; long long file_size_; // >= 0 && <= max_file_size + long long uid_, gid_; // may not fit in unsigned int + Etime atime_, mtime_; // cached sizes; if full_size_ < 0 they must be recalculated mutable long long edsize_; // extended data size @@ -133,6 +174,10 @@ class Extended // stores metadata from/for extended records mutable long long linkpath_recsize_; mutable long long path_recsize_; mutable int file_size_recsize_; + mutable int uid_recsize_; + mutable int gid_recsize_; + mutable int atime_recsize_; + mutable int mtime_recsize_; // true if CRC present in parsed or formatted records mutable bool crc_present_; @@ -143,29 +188,47 @@ class Extended // stores metadata from/for extended records public: static const std::string crc_record; + std::string removed_prefix; Extended() - : file_size_( 0 ), edsize_( 0 ), padded_edsize_( 0 ), full_size_( 0 ), - linkpath_recsize_( 0 ), path_recsize_( 0 ), file_size_recsize_( 0 ), + : file_size_( 0 ), uid_( -1 ), gid_( -1 ), edsize_( 0 ), + padded_edsize_( 0 ), full_size_( 0 ), linkpath_recsize_( 0 ), + path_recsize_( 0 ), file_size_recsize_( 0 ), uid_recsize_( 0 ), + gid_recsize_( 0 ), atime_recsize_( 0 ), mtime_recsize_( 0 ), crc_present_( false ) {} void reset() - { linkpath_.clear(); path_.clear(); file_size_ = 0; edsize_ = 0; - padded_edsize_ = 0; full_size_ = 0; linkpath_recsize_ = 0; - path_recsize_ = 0; file_size_recsize_ = 0; crc_present_ = false; } + { linkpath_.clear(); path_.clear(); file_size_ = 0; uid_ = -1; gid_ = -1; + atime_.reset(); mtime_.reset(); edsize_ = 0; padded_edsize_ = 0; + full_size_ = 0; linkpath_recsize_ = 0; path_recsize_ = 0; + file_size_recsize_ = 0; uid_recsize_ = 0; gid_recsize_ = 0; + atime_recsize_ = 0; mtime_recsize_ = 0; crc_present_ = false; + removed_prefix.clear(); } bool empty() const - { return linkpath_.empty() && path_.empty() && file_size_ == 0; } + { return linkpath_.empty() && path_.empty() && file_size_ == 0 && + uid_ < 0 && gid_ < 0 && + !atime_.out_of_ustar_range() && !mtime_.out_of_ustar_range(); } const std::string & linkpath() const { return linkpath_; } const std::string & path() const { return path_; } long long file_size() const { return file_size_; } long long get_file_size_and_reset( const Tar_header header ); + long long get_uid() const { return uid_; } + long long get_gid() const { return gid_; } + const Etime & atime() const { return atime_; } + const Etime & mtime() const { return mtime_; } void linkpath( const char * const lp ) { linkpath_ = lp; full_size_ = -1; } void path( const char * const p ) { path_ = p; full_size_ = -1; } void file_size( const long long fs ) { full_size_ = -1; file_size_ = ( fs >= 0 && fs <= max_file_size ) ? fs : 0; } + bool set_uid( const long long id ) + { if( id >= 0 ) { uid_ = id; full_size_ = -1; } return id >= 0; } + bool set_gid( const long long id ) + { if( id >= 0 ) { gid_ = id; full_size_ = -1; } return id >= 0; } + void set_atime( const long long s ) { atime_.set( s ); full_size_ = -1; } + void set_mtime( const long long s ) { mtime_.set( s ); full_size_ = -1; } long long full_size() const { if( full_size_ < 0 ) calculate_sizes(); return full_size_; } @@ -269,10 +332,10 @@ const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP" struct Lzip_header { - uint8_t data[6]; // 0-3 magic bytes + enum { size = 6 }; + uint8_t data[size]; // 0-3 magic bytes // 4 version // 5 coded dictionary size - enum { size = 6 }; bool verify_magic() const { return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); } @@ -283,6 +346,7 @@ struct Lzip_header if( data[i] != lzip_magic[i] ) return false; return ( sz > 0 ); } + bool verify_corrupt() const // detect corrupt header { int matches = 0; @@ -310,10 +374,10 @@ struct Lzip_header struct Lzip_trailer { - uint8_t data[20]; // 0-3 CRC32 of the uncompressed data + enum { size = 20 }; + uint8_t data[size]; // 0-3 CRC32 of the uncompressed data // 4-11 size of the uncompressed data // 12-19 member size including header and trailer - enum { size = 20 }; unsigned data_crc() const { @@ -356,12 +420,15 @@ enum Program_mode { m_none, m_append, m_compress, m_concatenate, m_create, m_delete, m_diff, m_extract, m_list }; enum Solidity { no_solid, bsolid, dsolid, asolid, solid }; class Arg_parser; + struct Cl_options // command line options { const Arg_parser & parser; std::string archive_name; std::string output_filename; long long mtime; + long long uid; + long long gid; Program_mode program_mode; Solidity solidity; int data_size; @@ -370,29 +437,28 @@ struct Cl_options // command line options int num_files; int num_workers; // start this many worker threads int out_slots; - int owner; - int group; bool dereference; bool filenames_given; bool ignore_ids; + bool ignore_overflow; bool keep_damaged; bool missing_crc; + bool mtime_set; bool permissive; bool preserve_permissions; bool warn_newer; Cl_options( const Arg_parser & ap ) - : parser( ap ), mtime( -1 ), program_mode( m_none ), solidity( bsolid ), - data_size( 0 ), debug_level( 0 ), level( 6 ), num_files( 0 ), - num_workers( -1 ), out_slots( 64 ), owner( -1 ), group( -1 ), - dereference( false ), filenames_given( false ), ignore_ids( false ), - keep_damaged( false ), missing_crc( false ), permissive( false ), - preserve_permissions( false ), warn_newer( false ) {} + : parser( ap ), mtime( 0 ), uid( -1 ), gid( -1 ), program_mode( m_none ), + solidity( bsolid ), data_size( 0 ), debug_level( 0 ), level( 6 ), + num_files( 0 ), num_workers( -1 ), out_slots( 64 ), dereference( false ), + filenames_given( false ), ignore_ids( false ), ignore_overflow( false ), + keep_damaged( false ), missing_crc( false ), mtime_set( false ), + permissive( false ), preserve_permissions( false ), warn_newer( false ) {} bool to_stdout() const { return output_filename == "-"; } }; - inline void set_retval( int & retval, const int new_val ) { if( retval < new_val ) retval = new_val; } @@ -403,15 +469,23 @@ const char * const trailing_msg = "Trailing data not allowed."; const char * const bad_hdr_msg = "Corrupt or invalid tar header."; const char * const gblrec_msg = "Error in global extended records."; const char * const extrec_msg = "Error in extended records."; -const char * const mcrc_msg = "Missing CRC in extended records."; +const char * const miscrc_msg = "Missing CRC in extended records."; +const char * const misrec_msg = "Missing extended records."; +const char * const longrec_msg = "Extended records are too long."; const char * const end_msg = "Archive ends unexpectedly."; const char * const mem_msg = "Not enough memory."; const char * const mem_msg2 = "Not enough memory. Try a lower compression level."; -const char * const fv_msg1 = "Format violation: extended header followed by EOF blocks."; +const char * const fv_msg1 = "Format violation: extended header followed by EOA blocks."; const char * const fv_msg2 = "Format violation: extended header followed by global header."; const char * const fv_msg3 = "Format violation: consecutive extended headers found."; const char * const posix_msg = "This does not look like a POSIX tar archive."; const char * const posix_lz_msg = "This does not look like a POSIX tar.lz archive."; +const char * const eclosa_msg = "Error closing archive"; +const char * const eclosf_msg = "Error closing file"; +const char * const nfound_msg = "Not found in archive."; +const char * const seek_msg = "Seek error"; +const char * const werr_msg = "Write error"; +const char * const chdir_msg = "Error changing working directory"; // defined in common.cc void xinit_mutex( pthread_mutex_t * const mutex ); @@ -441,15 +515,17 @@ mode_t get_umask(); bool make_path( const std::string & name ); // defined in compress.cc -int compress( Cl_options & cl_opts ); +int compress( const Cl_options & cl_opts ); // defined in create.cc bool copy_file( const int infd, const int outfd, const long long max_size = -1 ); bool writeblock_wrapper( const int outfd, const uint8_t * const buffer, const int size ); -bool write_eof_records( const int outfd, const bool compressed ); +bool write_eoa_records( const int outfd, const bool compressed ); const char * remove_leading_dotslash( const char * const filename, - const bool dotdot = false ); + std::string * const removed_prefixp, const bool dotdot = false ); +bool print_removed_prefix( const std::string & prefix, + std::string * const msgp = 0 ); bool fill_headers( const char * const filename, Extended & extended, Tar_header header, long long & file_size, const int flag ); bool block_is_full( const long long extended_size, @@ -462,11 +538,10 @@ unsigned ustar_chksum( const Tar_header header ); bool verify_ustar_chksum( const Tar_header header ); bool has_lz_ext( const std::string & name ); int concatenate( const Cl_options & cl_opts ); -int encode( Cl_options & cl_opts ); +int encode( const Cl_options & cl_opts ); // defined in create_lz.cc int encode_lz( const Cl_options & cl_opts, const char * const archive_namep, - const int dictionary_size, const int match_len_limit, const int outfd ); // defined in decode.cc @@ -522,8 +597,13 @@ int open_outstream( const std::string & name, const bool create = true, void exit_fail_mt( const int retval = 1 ); // terminate the program void show_error( const char * const msg, const int errcode = 0, const bool help = false ); +bool format_error( Resizable_buffer & rbuf, const int errcode, + const char * const format, ... ); +void print_error( const int errcode, const char * const format, ... ); void format_file_error( std::string & estr, const char * const filename, const char * const msg, const int errcode = 0 ); +bool format_file_error( Resizable_buffer & rbuf, const char * const filename, + const char * const msg, const int errcode = 0 ); void show_file_error( const char * const filename, const char * const msg, const int errcode = 0 ); void internal_error( const char * const msg ); diff --git a/testsuite/check.sh b/testsuite/check.sh index 56aafdb..348e447 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -54,8 +54,8 @@ bad3_lz="${testdir}"/test3_bad3.tar.lz bad4_lz="${testdir}"/test3_bad4.tar.lz bad5_lz="${testdir}"/test3_bad5.tar.lz bad6_lz="${testdir}"/test3_bad6.tar.lz -eof="${testdir}"/eof.tar -eof_lz="${testdir}"/eof.tar.lz +eoa="${testdir}"/eoa_blocks.tar +eoa_lz="${testdir}"/eoa_blocks.tar.lz fail=0 lwarnc=0 test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } @@ -65,12 +65,12 @@ cyg_symlink() { [ ${lwarnc} = 0 ] && # Description of test files for tarlz: # test.txt.tar.lz: 1 member (test.txt). -# t155.tar[.lz]: directory + 3 links + file + eof, all with 155 char names +# t155.tar[.lz]: directory + 3 links + file + EOA, all with 155 char names # t155_fv?.tar[.lz]: like t155.tar but with 3 kinds of format violations -# t155_fv1.tar[.lz]: extended header followed by EOF blocks -# t155_fv2.tar[.lz]: extended header followed by global header -# t155_fv3.tar[.lz]: consecutive extended headers -# t155_fv[456].tar.lz: like t155_fv[123].tar.lz but violation starts member +# t155_fv1.tar[.lz]: extra extended header before EOA blocks +# t155_fv2.tar[.lz]: first extended header followed by global header +# t155_fv3.tar[.lz]: consecutive extended headers in last member +# t155_fv[456].tar.lz: like t155_fv[123].tar.lz but violation alone in member # tar_in_tlz1.tar.lz: 2 members (test.txt.tar test3.tar) 3 lzip members # tar_in_tlz2.tar.lz: 2 members (test.txt.tar test3.tar) 5 lzip members # ts_in_link.tar.lz: 4 symbolic links (link[1-4]) to / /dir/ dir/ dir(107/) @@ -90,21 +90,21 @@ cyg_symlink() { [ ${lwarnc} = 0 ] && # test3_bad2.tar.lz: byte at offset 49 changed from 0x49 to 0x69 (mid stream) # test3_bad3.tar.lz: byte at offset 176 changed from 0x7D to 0x6D (mid stream) # test3_bad4.tar.lz: combined damage of test3_bad2.tar.lz and test3_bad3.tar.lz -# test3_bad5.tar.lz: [71-134] --> zeroed (first trailer + seconf header) +# test3_bad5.tar.lz: [71-134] --> zeroed (first trailer + second header) # test3_bad6.tar.lz: 510 zeros prepended to test3.tar.lz (header in two blocks) -# test3_eof?.tar: like test3_eof?.tar.lz but uncompressed -# test3_eof1.tar.lz: test3.tar.lz without eof blocks -# test3_eof2.tar.lz: test3.tar.lz with only one eof block -# test3_eof3.tar.lz: test3.tar.lz with one zeroed block between foo and bar -# test3_eof4.tar.lz: test3.tar.lz ended by extended header without eof blocks -# test3_eof5.tar.lz: test3.tar.lz split ext first member, without eof blocks +# test3_eoa?.tar: like test3_eoa?.tar.lz but uncompressed +# test3_eoa1.tar.lz: test3.tar.lz without EOA blocks +# test3_eoa2.tar.lz: test3.tar.lz with only one EOA block +# test3_eoa3.tar.lz: test3.tar.lz with one zeroed block between foo and bar +# test3_eoa4.tar.lz: test3.tar.lz ended by extended header without EOA blocks +# test3_eoa5.tar.lz: test3.tar.lz split extended bar member, without EOA blocks # test3_em?.tar.lz: test3.tar.lz with one empty lzip member at each position # test3_em6.tar.lz: test3.tar.lz preceded by four empty lzip members # test3_gh?.tar: test3.tar with global header at each position # test3_gh?.tar.lz: test3.tar.lz with global before bar split in 4 ways # test3_gh5.tar.lz: test3.tar.lz with global in lzip member before foo # test3_gh6.tar.lz: test3.tar.lz with global before foo in same member -# test3_nn.tar[.lz]: test3.tar[.lz] with no name in bar member +# test3_nn.tar[.lz]: test3.tar[.lz] with zeroed name (no name) in bar member # test3_sm?.tar.lz: test3.tar.lz with extended bar member split in 4 ways # tlz_in_tar1.tar: 1 member (test3.tar.lz) first magic damaged # tlz_in_tar2.tar: 2 members (foo test3.tar.lz) first magic damaged @@ -179,7 +179,9 @@ done [ $? = 1 ] || test_failed $LINENO "${TARLZ}" -q -z . [ $? = 1 ] || test_failed $LINENO -"${TARLZ}" -q -tf "${in_tar_lz}" "" +"${TARLZ}" -z -o - --uncompressed "${test3}" > /dev/null 2>&1 +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -tf "${in_tar_lz}" "" # empty non-option argument [ $? = 1 ] || test_failed $LINENO "${TARLZ}" --help > /dev/null || test_failed $LINENO "${TARLZ}" -V > /dev/null || test_failed $LINENO @@ -187,25 +189,32 @@ done [ $? = 1 ] || test_failed $LINENO "${TARLZ}" -tf 2> /dev/null [ $? = 1 ] || test_failed $LINENO -"${TARLZ}" --owner=invalid_oner_name -tf "${test3_lz}" 2> /dev/null +bad_dates='@-9223372036854775809 @9223372036854775808 + -2147481749-01-01T00:00:00 2147483648-01-01T00:00:00 + 2017-10-01T 2017-10 ./nx_file' +for i in ${bad_dates} ; do + "${TARLZ}" -c --mtime="$i" "${in}" > /dev/null 2>&1 + [ $? = 1 ] || test_failed $LINENO "$i" +done +"${TARLZ}" --owner=invalid_owner_name -tf "${test3_lz}" 2> /dev/null [ $? = 1 ] || test_failed $LINENO -"${TARLZ}" --group=invalid_goup_name -tf "${test3_lz}" 2> /dev/null +"${TARLZ}" --group=invalid_group_name -tf "${test3_lz}" 2> /dev/null [ $? = 1 ] || test_failed $LINENO printf "\ntesting --list and --extract..." # test --list and --extract -"${TARLZ}" -tf "${eof_lz}" --missing-crc || test_failed $LINENO -"${TARLZ}" -xf "${eof_lz}" --missing-crc || test_failed $LINENO +"${TARLZ}" -tf "${eoa_lz}" --missing-crc || test_failed $LINENO +"${TARLZ}" -xf "${eoa_lz}" --missing-crc || test_failed $LINENO "${TARLZ}" -C nx_dir -tf "${in_tar}" > /dev/null || test_failed $LINENO "${TARLZ}" -xf "${in_tar}" --missing-crc || test_failed $LINENO cmp "${in}" test.txt || test_failed $LINENO rm -f test.txt || framework_failure "${TARLZ}" -tf "${in_tar_lz}" --missing-crc > /dev/null || test_failed $LINENO for i in 0 2 6 ; do - "${TARLZ}" -n$i -xf "${in_tar_lz}" --missing-crc || test_failed $LINENO $i - cmp "${in}" test.txt || test_failed $LINENO $i - rm -f test.txt || framework_failure + "${TARLZ}" -n$i -xf "${in_tar_lz}" --missing-crc || test_failed $LINENO $i + cmp "${in}" test.txt || test_failed $LINENO $i + rm -f test.txt || framework_failure done # test3 reference files for -t and -tv (list3, vlist3) @@ -323,51 +332,51 @@ for i in 0 2 6 ; do rm -rf dir || framework_failure done -# test --list and --extract eof -"${TARLZ}" -tvf "${testdir}"/test3_eof1.tar > out 2> /dev/null +# test --list and --extract EOA +"${TARLZ}" -tvf "${testdir}"/test3_eoa1.tar > out 2> /dev/null [ $? = 2 ] || test_failed $LINENO diff -u vlist3 out || test_failed $LINENO -"${TARLZ}" -tvf "${testdir}"/test3_eof2.tar > out || test_failed $LINENO +"${TARLZ}" -tvf "${testdir}"/test3_eoa2.tar > out || test_failed $LINENO diff -u vlist3 out || test_failed $LINENO -"${TARLZ}" -q -tf "${testdir}"/test3_eof3.tar || test_failed $LINENO -"${TARLZ}" -tvf "${testdir}"/test3_eof4.tar > out 2> /dev/null +"${TARLZ}" -q -tf "${testdir}"/test3_eoa3.tar || test_failed $LINENO +"${TARLZ}" -tvf "${testdir}"/test3_eoa4.tar > out 2> /dev/null [ $? = 2 ] || test_failed $LINENO diff -u vlist3 out || test_failed $LINENO for i in 0 2 6 ; do - "${TARLZ}" -n$i -tvf "${testdir}"/test3_eof1.tar.lz > out 2> /dev/null + "${TARLZ}" -n$i -tvf "${testdir}"/test3_eoa1.tar.lz > out 2> /dev/null [ $? = 2 ] || test_failed $LINENO $i - diff -u vlist3 out || test_failed $LINENO - "${TARLZ}" -n$i -tvf "${testdir}"/test3_eof2.tar.lz > out || + diff -u vlist3 out || test_failed $LINENO $i + "${TARLZ}" -n$i -tvf "${testdir}"/test3_eoa2.tar.lz > out || test_failed $LINENO $i - diff -u vlist3 out || test_failed $LINENO - "${TARLZ}" -q -n$i -tf "${testdir}"/test3_eof3.tar.lz || + diff -u vlist3 out || test_failed $LINENO $i + "${TARLZ}" -q -n$i -tf "${testdir}"/test3_eoa3.tar.lz || test_failed $LINENO $i - "${TARLZ}" -n$i -tvf "${testdir}"/test3_eof4.tar.lz > out 2> /dev/null + "${TARLZ}" -n$i -tvf "${testdir}"/test3_eoa4.tar.lz > out 2> /dev/null [ $? = 2 ] || test_failed $LINENO $i - diff -u vlist3 out || test_failed $LINENO - "${TARLZ}" -n$i -tvf "${testdir}"/test3_eof5.tar.lz > out 2> /dev/null + diff -u vlist3 out || test_failed $LINENO $i + "${TARLZ}" -n$i -tvf "${testdir}"/test3_eoa5.tar.lz > out 2> /dev/null [ $? = 2 ] || test_failed $LINENO $i - diff -u vlist3 out || test_failed $LINENO + diff -u vlist3 out || test_failed $LINENO $i done rm -f out || framework_failure # -"${TARLZ}" -q -xf "${testdir}"/test3_eof1.tar +"${TARLZ}" -q -xf "${testdir}"/test3_eoa1.tar [ $? = 2 ] || test_failed $LINENO cmp cfoo foo || test_failed $LINENO cmp cbar bar || test_failed $LINENO cmp cbaz baz || test_failed $LINENO rm -f foo bar baz || framework_failure -"${TARLZ}" -xf "${testdir}"/test3_eof2.tar || test_failed $LINENO +"${TARLZ}" -xf "${testdir}"/test3_eoa2.tar || test_failed $LINENO cmp cfoo foo || test_failed $LINENO cmp cbar bar || test_failed $LINENO cmp cbaz baz || test_failed $LINENO rm -f foo bar baz || framework_failure -"${TARLZ}" -xf "${testdir}"/test3_eof3.tar || test_failed $LINENO +"${TARLZ}" -xf "${testdir}"/test3_eoa3.tar || test_failed $LINENO cmp cfoo foo || test_failed $LINENO [ ! -e bar ] || test_failed $LINENO [ ! -e baz ] || test_failed $LINENO rm -f foo bar baz || framework_failure -"${TARLZ}" -q -xf "${testdir}"/test3_eof4.tar +"${TARLZ}" -q -xf "${testdir}"/test3_eoa4.tar [ $? = 2 ] || test_failed $LINENO cmp cfoo foo || test_failed $LINENO cmp cbar bar || test_failed $LINENO @@ -375,32 +384,31 @@ cmp cbaz baz || test_failed $LINENO rm -f foo bar baz || framework_failure # for i in 0 2 6 ; do - "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eof1.tar.lz - [ $? = 2 ] || test_failed $LINENO $i - cmp cfoo foo || test_failed $LINENO $i - cmp cbar bar || test_failed $LINENO $i - cmp cbaz baz || test_failed $LINENO $i - rm -f foo bar baz || framework_failure - "${TARLZ}" -n$i -xf "${testdir}"/test3_eof2.tar.lz || - test_failed $LINENO $i - cmp cfoo foo || test_failed $LINENO $i - cmp cbar bar || test_failed $LINENO $i - cmp cbaz baz || test_failed $LINENO $i - rm -f foo bar baz || framework_failure - "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eof4.tar.lz - [ $? = 2 ] || test_failed $LINENO $i - cmp cfoo foo || test_failed $LINENO $i - cmp cbar bar || test_failed $LINENO $i - cmp cbaz baz || test_failed $LINENO $i - rm -f foo bar baz || framework_failure - "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eof5.tar.lz - [ $? = 2 ] || test_failed $LINENO $i - cmp cfoo foo || test_failed $LINENO $i - cmp cbar bar || test_failed $LINENO $i - cmp cbaz baz || test_failed $LINENO $i - rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eoa1.tar.lz + [ $? = 2 ] || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -n$i -xf "${testdir}"/test3_eoa2.tar.lz || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eoa4.tar.lz + [ $? = 2 ] || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eoa5.tar.lz + [ $? = 2 ] || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure done -"${TARLZ}" -n0 -xf "${testdir}"/test3_eof3.tar.lz || test_failed $LINENO +"${TARLZ}" -n0 -xf "${testdir}"/test3_eoa3.tar.lz || test_failed $LINENO cmp cfoo foo || test_failed $LINENO $i [ ! -e bar ] || test_failed $LINENO $i [ ! -e baz ] || test_failed $LINENO $i @@ -431,17 +439,15 @@ done # test --list and --extract with global headers uncompressed for i in gh1 gh2 gh3 gh4 ; do - "${TARLZ}" -tf "${testdir}"/test3_${i}.tar > out || - test_failed $LINENO $i - diff -u list3 out || test_failed $LINENO $i - "${TARLZ}" -tvf "${testdir}"/test3_${i}.tar > out || - test_failed $LINENO $i - diff -u vlist3 out || test_failed $LINENO $i - "${TARLZ}" -xf "${testdir}"/test3_${i}.tar || test_failed $LINENO $i - cmp cfoo foo || test_failed $LINENO $i - cmp cbar bar || test_failed $LINENO $i - cmp cbaz baz || test_failed $LINENO $i - rm -f foo bar baz out || framework_failure + "${TARLZ}" -tf "${testdir}"/test3_${i}.tar > out || test_failed $LINENO $i + diff -u list3 out || test_failed $LINENO $i + "${TARLZ}" -tvf "${testdir}"/test3_${i}.tar > out || test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i + "${TARLZ}" -xf "${testdir}"/test3_${i}.tar || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz out || framework_failure done # test --list and --extract with empty lzip members, global headers and @@ -495,7 +501,7 @@ cmp out.tar.lz aout.tar.lz || test_failed $LINENO cmp "${in_tar_lz}" aout.tar.lz || test_failed $LINENO "${TARLZ}" -A "${in_tar_lz}" "${test3_lz}" > aout.tar.lz || test_failed $LINENO cmp out.tar.lz aout.tar.lz || test_failed $LINENO -cat "${eof_lz}" > aout.tar.lz || framework_failure +cat "${eoa_lz}" > aout.tar.lz || framework_failure "${TARLZ}" -Aqf aout.tar.lz "${in_tar}" # concatenate to empty archive [ $? = 2 ] || test_failed $LINENO "${TARLZ}" -Af aout.tar.lz "${in_tar_lz}" "${test3_lz}" || test_failed $LINENO @@ -539,7 +545,7 @@ cmp out.tar aout.tar || test_failed $LINENO cmp "${in_tar}" aout.tar || test_failed $LINENO "${TARLZ}" -A "${in_tar}" "${test3}" > aout.tar || test_failed $LINENO cmp out.tar aout.tar || test_failed $LINENO -cat "${eof}" > aout.tar || framework_failure # concatenate to empty archive +cat "${eoa}" > aout.tar || framework_failure # concatenate to empty archive "${TARLZ}" -Aqf aout.tar "${in_tar_lz}" [ $? = 2 ] || test_failed $LINENO "${TARLZ}" -Af aout.tar "${in_tar}" "${test3}" || test_failed $LINENO @@ -672,6 +678,42 @@ cmp cfoo foo || test_failed $LINENO [ ! -e baz ] || test_failed $LINENO rm -f out.tar foo bar baz || framework_failure +# test --create --mtime +dates='@-9223372036854775808 @-9223372036854775807 + -2147481748-12-31T23:59:59 -1970-01-01T00:00:00 + 0000-01-01T00:00:00 0000-01-01T00:00:01 0000-01-02T00:00:00 + 1697-10-17T11:03:27 1697-10-17T11:03:28 1697-10-17T11:03:29 + 1833-11-24T17:31:43 1833-11-24T17:31:44 1833-11-24T17:31:45 + 1901-12-13T20:45:51 1901-12-13T20:45:52 1901-12-13T20:45:53 + 1901-12-14T20:45:51 + 1969-12-31T23:59:58 1969-12-31T23:59:59 + 1970-01-01T00:00:00 1970-01-01T00:00:01 @0 + 2038-01-18T03:14:07 2038-01-19T03:14:07 2038-01-19T03:14:08 + 2106-02-07T06:28:15 2106-02-07T06:28:16 + 2242-03-16T12:56:31 2242-03-16T12:56:32 @8589934591 @8589934592 + 9999-12-31T23:59:58 9999-12-31T23:59:59 + 2147483647-12-31T23:59:59 @9223372036854775807' +touch -d 2022-01-05T12:22:13 bar || framework_failure +for i in ${dates} @-8Ei '2017-10-01 09:00:00' '2017-10-1 9:0:0' \ + '2017-10-01 09:00' '2017-10-01 09' 2017-10-01 ./bar ; do + touch foo || framework_failure + "${TARLZ}" --un -cf out.tar --mtime="$i" foo || test_failed $LINENO "$i" + "${TARLZ}" -q -df out.tar && test_failed $LINENO "$i" + "${TARLZ}" -xf out.tar || test_failed $LINENO "$i" + "${TARLZ}" -df out.tar --ignore-overflow || test_failed $LINENO "$i" +done +rm -f out.tar foo bar || framework_failure + +mkdir dir || framework_failure +for i in ${dates} ; do + # Skip a time stamp $i if it's out of range for this platform, + # of if it uses a notation that this platform does not recognize. + touch -d $i dir/f$i >/dev/null 2>&1 || continue +done +"${TARLZ}" --uncompressed -cf out.tar dir || test_failed $LINENO +"${TARLZ}" -df out.tar || test_failed $LINENO +rm -rf out.tar dir || framework_failure + printf "\ntesting --diff..." # test --diff @@ -682,7 +724,7 @@ if cmp out.tar aout.tar > /dev/null ; then printf "\nwarning: '--diff' test can't be run as root.\n" else for i in 0 2 6 ; do - "${TARLZ}" -n$i -xf "${test3_lz}" || test_failed $LINENO + "${TARLZ}" -n$i -xf "${test3_lz}" || test_failed $LINENO $i "${TARLZ}" -n$i -df "${test3_lz}" > out$i [ $? = 1 ] || test_failed $LINENO $i "${TARLZ}" -n$i -df "${test3_lz}" --ignore-ids || test_failed $LINENO $i @@ -712,56 +754,68 @@ rm -f out.tar aout.tar foo bar baz || framework_failure printf "\ntesting --delete..." # test --delete +cat "${in}" > out.tar || framework_failure # invalid tar +"${TARLZ}" -q -f out.tar --delete foo +[ $? = 2 ] || test_failed $LINENO +rm -f out.tar || framework_failure +cat "${in}" > out.tar.lz || framework_failure # invalid tar.lz +"${TARLZ}" -q -f out.tar.lz --delete foo +[ $? = 2 ] || test_failed $LINENO +cat "${in_lz}" > out.tar.lz || framework_failure # invalid tar.lz +"${TARLZ}" -q -f out.tar.lz --delete foo +[ $? = 2 ] || test_failed $LINENO +rm -f out.tar.lz || framework_failure + for e in "" .lz ; do "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e "${TARLZ}" -f out.tar$e --delete test.txt || test_failed $LINENO $e cmp "${test3}"$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -f out.tar$e --delete || test_failed $LINENO $e # delete nothing cmp "${test3}"$e out.tar$e || test_failed $LINENO $e - "${TARLZ}" -qf out.tar$e --delete nx_file + "${TARLZ}" -q -f out.tar$e --delete nx_file [ $? = 1 ] || test_failed $LINENO $e cmp "${test3}"$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e - "${TARLZ}" -qf out.tar$e --delete test.txt || test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --delete test.txt || test_failed $LINENO $e cmp "${test3dir}"$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e - "${TARLZ}" -qf out.tar$e --delete dir || test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --delete dir || test_failed $LINENO $e cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e - "${TARLZ}" -qf out.tar$e --del dir/foo dir/bar dir/baz || test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --del dir/foo dir/bar dir/baz || test_failed $LINENO $e cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e - "${TARLZ}" -qf out.tar$e --del dir/foo dir/baz || test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --del dir/foo dir/baz || test_failed $LINENO $e cmp "${in_tar}"$e out.tar$e > /dev/null && test_failed $LINENO $e - "${TARLZ}" -qf out.tar$e --del dir/bar || test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --del dir/bar || test_failed $LINENO $e cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e "${TARLZ}" -f out.tar$e --delete foo bar baz || test_failed $LINENO $e cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e "${TARLZ}" -f out.tar$e --del test.txt foo bar baz || test_failed $LINENO $e - cmp "${eof}"$e out.tar$e || test_failed $LINENO $e + cmp "${eoa}"$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e for i in test.txt foo bar baz ; do "${TARLZ}" -f out.tar$e --delete $i || test_failed $LINENO "$e $i" done - cmp "${eof}"$e out.tar$e || test_failed $LINENO $e + cmp "${eoa}"$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e for i in baz bar foo test.txt ; do "${TARLZ}" -f out.tar$e --delete $i || test_failed $LINENO "$e $i" done - cmp "${eof}"$e out.tar$e || test_failed $LINENO $e + cmp "${eoa}"$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e for i in foo bar test.txt baz ; do "${TARLZ}" -f out.tar$e --delete $i || test_failed $LINENO "$e $i" done - cmp "${eof}"$e out.tar$e || test_failed $LINENO $e + cmp "${eoa}"$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -A "${in_tar}"$e "${t155}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e "${TARLZ}" -f out.tar$e --del baz foo test.txt bar || test_failed $LINENO $e cmp "${t155}"$e out.tar$e || test_failed $LINENO $e "${TARLZ}" -f out.tar$e --delete link || test_failed $LINENO $e - "${TARLZ}" -q -tf out.tar$e || test_failed $LINENO + "${TARLZ}" -q -tf out.tar$e || test_failed $LINENO $e cmp "${t155}"$e out.tar$e > /dev/null && test_failed $LINENO $e rm -f out.tar$e || framework_failure done @@ -775,8 +829,8 @@ cat "${in}" > test.txt || framework_failure "${TARLZ}" -0 -rf out.tar.lz test.txt || test_failed $LINENO rm -f foo bar baz test.txt || framework_failure for i in foo bar baz ; do - "${TARLZ}" -qf out.tar.lz --delete $i - [ $? = 2 ] || test_failed $LINENO + "${TARLZ}" -q -f out.tar.lz --delete $i + [ $? = 2 ] || test_failed $LINENO $i done "${TARLZ}" -f out.tar.lz --delete test.txt || test_failed $LINENO "${TARLZ}" -xf out.tar.lz || test_failed $LINENO @@ -788,11 +842,11 @@ rm -f out.tar.lz foo bar baz test.txt || framework_failure # test --delete with empty lzip member, global header for i in 1 2 3 4 5 6 ; do - cat "${testdir}"/test3_em${i}.tar.lz > out.tar.lz || framework_failure - for j in foo bar baz ; do - "${TARLZ}" -f out.tar.lz --delete $j || test_failed $LINENO "$i $j" - done - rm -f out.tar.lz || framework_failure + cat "${testdir}"/test3_em${i}.tar.lz > out.tar.lz || framework_failure + for j in foo bar baz ; do + "${TARLZ}" -f out.tar.lz --delete $j || test_failed $LINENO "$i $j" + done + rm -f out.tar.lz || framework_failure done cat "${testdir}"/test3_gh5.tar.lz > out.tar.lz || framework_failure for i in foo bar baz ; do @@ -882,7 +936,7 @@ cmp out.tar.lz aout.tar.lz || test_failed $LINENO "${TARLZ}" --un -q -rf aout.tar.lz foo bar baz # wrong extension archive [ $? = 2 ] || test_failed $LINENO cmp out.tar.lz aout.tar.lz || test_failed $LINENO -cat "${eof_lz}" > aout.tar.lz || framework_failure # append to empty archive +cat "${eoa_lz}" > aout.tar.lz || framework_failure # append to empty archive "${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO cmp out.tar.lz aout.tar.lz || test_failed $LINENO "${TARLZ}" --un -q -rf aout.tar.lz foo bar baz # wrong extension empty archive @@ -918,7 +972,7 @@ cmp out.tar aout.tar || test_failed $LINENO "${TARLZ}" -0 -q -rf aout.tar foo bar baz # wrong extension archive [ $? = 2 ] || test_failed $LINENO cmp out.tar aout.tar || test_failed $LINENO -cat "${eof}" > aout.tar || framework_failure # append to empty archive +cat "${eoa}" > aout.tar || framework_failure # append to empty archive "${TARLZ}" --un -rf aout.tar foo bar baz || test_failed $LINENO cmp out.tar aout.tar || test_failed $LINENO "${TARLZ}" -0 -q -rf aout.tar foo bar baz # wrong extension empty archive @@ -937,18 +991,16 @@ for i in --asolid --bsolid --dsolid --solid -0 ; do done rm -f out.tar.lz aout.tar.lz || framework_failure for i in --asolid --bsolid --dsolid -0 ; do - for j in --asolid --bsolid --dsolid --solid -0 ; do - "${TARLZ}" $i -0 -cf out.tar.lz foo || - test_failed $LINENO "$i $j" - "${TARLZ}" $j -0 -rf out.tar.lz bar baz || - test_failed $LINENO "$i $j" - rm -f foo bar baz || framework_failure - "${TARLZ}" -xf out.tar.lz || test_failed $LINENO "$i $j" - cmp cfoo foo || test_failed $LINENO "$i $j" - cmp cbar bar || test_failed $LINENO "$i $j" - cmp cbaz baz || test_failed $LINENO "$i $j" - rm -f out.tar.lz || framework_failure - done + for j in --asolid --bsolid --dsolid --solid -0 ; do + "${TARLZ}" $i -0 -cf out.tar.lz foo || test_failed $LINENO "$i $j" + "${TARLZ}" $j -0 -rf out.tar.lz bar baz || test_failed $LINENO "$i $j" + rm -f foo bar baz || framework_failure + "${TARLZ}" -xf out.tar.lz || test_failed $LINENO "$i $j" + cmp cfoo foo || test_failed $LINENO "$i $j" + cmp cbar bar || test_failed $LINENO "$i $j" + cmp cbaz baz || test_failed $LINENO "$i $j" + rm -f out.tar.lz || framework_failure + done done rm -f foo bar baz || framework_failure @@ -1006,10 +1058,13 @@ if [ "${ln_works}" = yes ] ; then "${TARLZ}" -0 -q -c ../tmp/dir1 | "${TARLZ}" -x || test_failed $LINENO diff -ru tmp/dir1 dir1 || test_failed $LINENO rm -rf tmp dir1 || framework_failure + # test -c -d -x on dangling (broken) symlinks with trailing slashes "${TARLZ}" -xf "${testdir}"/ts_in_link.tar.lz || test_failed $LINENO "${TARLZ}" -df "${testdir}"/ts_in_link.tar.lz --ignore-ids || test_failed $LINENO - rm -f link1 link2 link3 link4 || framework_failure + "${TARLZ}" -0 -cf out.tar.lz link1 link2 link3 link4 || test_failed $LINENO + "${TARLZ}" -df out.tar.lz || test_failed $LINENO + rm -f out.tar.lz link1 link2 link3 link4 || framework_failure fi printf "\ntesting long names..." @@ -1066,7 +1121,7 @@ cat out3.tar > out3z.tar || framework_failure cat outz.tar.lz > out || test_failed $LINENO cat out3z.tar.lz > out3 || test_failed $LINENO rm -f out3z.tar.lz || framework_failure -"${TARLZ}" -q -0 -z outz.tar out3z.tar +"${TARLZ}" -q -0 -z outz.tar out3z.tar # outz.tar.lz exists [ $? = 1 ] || test_failed $LINENO cmp out outz.tar.lz || test_failed $LINENO cmp out3 out3z.tar.lz || test_failed $LINENO @@ -1108,24 +1163,40 @@ for i in --asolid --bsolid --dsolid ; do cmp out3.tar.lz out3z.tar.lz || test_failed $LINENO $i rm -f out outz.tar.lz out3z.tar.lz || framework_failure done -rm -f foo bar baz test.txt out.tar.lz out3.tar.lz out.tar outz.tar out3z.tar || - framework_failure +# concatenate and compress +"${TARLZ}" --un -cf foo.tar foo || test_failed $LINENO +"${TARLZ}" --un -cf bar.tar bar || test_failed $LINENO +"${TARLZ}" --un -cf baz.tar baz || test_failed $LINENO +"${TARLZ}" -A foo.tar bar.tar baz.tar | "${TARLZ}" -0 -z -o foobarbaz.tar.lz || + test_failed $LINENO +cmp out3.tar.lz foobarbaz.tar.lz || test_failed $LINENO +# compress and concatenate +"${TARLZ}" -0 -z foo.tar bar.tar baz.tar || test_failed $LINENO +"${TARLZ}" -A foo.tar.lz bar.tar.lz baz.tar.lz > foobarbaz.tar.lz || + test_failed $LINENO +"${TARLZ}" -0 -n0 --no-solid -c foo bar baz | cmp foobarbaz.tar.lz - || + test_failed $LINENO +rm -f foo bar baz test.txt out.tar.lz out.tar outz.tar foobarbaz.tar.lz \ + out3.tar out3.tar.lz out3z.tar foo.tar bar.tar baz.tar \ + foo.tar.lz bar.tar.lz baz.tar.lz || framework_failure printf "\ntesting bad input..." # test --extract ".." mkdir dir1 || framework_failure cd dir1 || framework_failure -"${TARLZ}" -q -xf "${testdir}"/dotdot1.tar.lz || test_failed $LINENO -[ ! -e ../dir ] || test_failed $LINENO -"${TARLZ}" -q -xf "${testdir}"/dotdot2.tar.lz || test_failed $LINENO -[ ! -e ../dir ] || test_failed $LINENO -"${TARLZ}" -q -xf "${testdir}"/dotdot3.tar.lz || test_failed $LINENO -[ ! -e dir ] || test_failed $LINENO -"${TARLZ}" -q -xf "${testdir}"/dotdot4.tar.lz || test_failed $LINENO -[ ! -e dir ] || test_failed $LINENO -"${TARLZ}" -q -xf "${testdir}"/dotdot5.tar.lz || test_failed $LINENO -[ ! -e dir ] || test_failed $LINENO +for i in 0 2 ; do # try serial and parallel decoders + "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot1.tar.lz || test_failed $LINENO $i + [ ! -e ../dir ] || test_failed $LINENO $i + "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot2.tar.lz || test_failed $LINENO $i + [ ! -e ../dir ] || test_failed $LINENO $i + "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot3.tar.lz || test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i + "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot4.tar.lz || test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i + "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot5.tar.lz || test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i +done cd .. || framework_failure rm -rf dir1 || framework_failure @@ -1140,19 +1211,19 @@ rm -f truncated.tar || framework_failure # test --delete with split 'bar' tar member for i in 1 2 3 4 ; do - cat "${testdir}"/test3_sm${i}.tar.lz > out.tar.lz || framework_failure - for j in bar baz ; do - "${TARLZ}" -q -f out.tar.lz --delete $j - [ $? = 2 ] || test_failed $LINENO "$i $j" - done - cmp "${testdir}"/test3_sm${i}.tar.lz out.tar.lz || test_failed $LINENO $i - "${TARLZ}" -q -f out.tar.lz --delete foo - [ $? = 2 ] || test_failed $LINENO $i - "${TARLZ}" -xf out.tar.lz || test_failed $LINENO - [ ! -e foo ] || test_failed $LINENO - cmp cbar bar || test_failed $LINENO - cmp cbaz baz || test_failed $LINENO - rm -f out.tar.lz foo bar baz || framework_failure + cat "${testdir}"/test3_sm${i}.tar.lz > out.tar.lz || framework_failure + for j in bar baz ; do + "${TARLZ}" -q -f out.tar.lz --delete $j + [ $? = 2 ] || test_failed $LINENO "$i $j" + done + cmp "${testdir}"/test3_sm${i}.tar.lz out.tar.lz || test_failed $LINENO $i + "${TARLZ}" -q -f out.tar.lz --delete foo + [ $? = 2 ] || test_failed $LINENO $i + "${TARLZ}" -xf out.tar.lz || test_failed $LINENO $i + [ ! -e foo ] || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f out.tar.lz foo bar baz || framework_failure done # test --list and --extract format violations diff --git a/testsuite/eof.tar b/testsuite/eoa_blocks.tar Binary files differindex 06d7405..06d7405 100644 --- a/testsuite/eof.tar +++ b/testsuite/eoa_blocks.tar diff --git a/testsuite/eof.tar.lz b/testsuite/eoa_blocks.tar.lz Binary files differindex 328273c..328273c 100644 --- a/testsuite/eof.tar.lz +++ b/testsuite/eoa_blocks.tar.lz diff --git a/testsuite/test3_eof1.tar b/testsuite/test3_eoa1.tar Binary files differindex 175b807..175b807 100644 --- a/testsuite/test3_eof1.tar +++ b/testsuite/test3_eoa1.tar diff --git a/testsuite/test3_eof1.tar.lz b/testsuite/test3_eoa1.tar.lz Binary files differindex 0eb86e4..0eb86e4 100644 --- a/testsuite/test3_eof1.tar.lz +++ b/testsuite/test3_eoa1.tar.lz diff --git a/testsuite/test3_eof2.tar b/testsuite/test3_eoa2.tar Binary files differindex 458be1e..458be1e 100644 --- a/testsuite/test3_eof2.tar +++ b/testsuite/test3_eoa2.tar diff --git a/testsuite/test3_eof2.tar.lz b/testsuite/test3_eoa2.tar.lz Binary files differindex 1f47953..1f47953 100644 --- a/testsuite/test3_eof2.tar.lz +++ b/testsuite/test3_eoa2.tar.lz diff --git a/testsuite/test3_eof3.tar b/testsuite/test3_eoa3.tar Binary files differindex 3003a93..3003a93 100644 --- a/testsuite/test3_eof3.tar +++ b/testsuite/test3_eoa3.tar diff --git a/testsuite/test3_eof3.tar.lz b/testsuite/test3_eoa3.tar.lz Binary files differindex 20ba9f8..20ba9f8 100644 --- a/testsuite/test3_eof3.tar.lz +++ b/testsuite/test3_eoa3.tar.lz diff --git a/testsuite/test3_eof4.tar b/testsuite/test3_eoa4.tar Binary files differindex 4012fea..4012fea 100644 --- a/testsuite/test3_eof4.tar +++ b/testsuite/test3_eoa4.tar diff --git a/testsuite/test3_eof4.tar.lz b/testsuite/test3_eoa4.tar.lz Binary files differindex 1593feb..1593feb 100644 --- a/testsuite/test3_eof4.tar.lz +++ b/testsuite/test3_eoa4.tar.lz diff --git a/testsuite/test3_eof5.tar.lz b/testsuite/test3_eoa5.tar.lz Binary files differindex 156bd3a..156bd3a 100644 --- a/testsuite/test3_eof5.tar.lz +++ b/testsuite/test3_eoa5.tar.lz |