diff options
-rw-r--r-- | ChangeLog | 18 | ||||
-rw-r--r-- | INSTALL | 4 | ||||
-rw-r--r-- | Makefile.in | 12 | ||||
-rw-r--r-- | NEWS | 23 | ||||
-rw-r--r-- | README | 24 | ||||
-rw-r--r-- | archive_reader.cc | 57 | ||||
-rw-r--r-- | archive_reader.h | 13 | ||||
-rw-r--r-- | arg_parser.cc | 2 | ||||
-rw-r--r-- | arg_parser.h | 2 | ||||
-rw-r--r-- | common.cc | 2 | ||||
-rw-r--r-- | common_decode.cc | 44 | ||||
-rwxr-xr-x | configure | 6 | ||||
-rw-r--r-- | create.cc | 5 | ||||
-rw-r--r-- | create_lz.cc | 2 | ||||
-rw-r--r-- | decode.cc (renamed from extract.cc) | 62 | ||||
-rw-r--r-- | decode_lz.cc | 280 | ||||
-rw-r--r-- | delete.cc | 123 | ||||
-rw-r--r-- | delete_lz.cc | 172 | ||||
-rw-r--r-- | doc/tarlz.1 | 27 | ||||
-rw-r--r-- | doc/tarlz.info | 129 | ||||
-rw-r--r-- | doc/tarlz.texi | 123 | ||||
-rw-r--r-- | exclude.cc | 6 | ||||
-rw-r--r-- | extended.cc | 28 | ||||
-rw-r--r-- | lzip_index.cc | 4 | ||||
-rw-r--r-- | lzip_index.h | 2 | ||||
-rw-r--r-- | main.cc | 83 | ||||
-rw-r--r-- | tarlz.h | 76 | ||||
-rwxr-xr-x | testsuite/check.sh | 213 |
28 files changed, 926 insertions, 616 deletions
@@ -1,3 +1,19 @@ +2021-01-08 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.19 released. + * extended.cc: Print a diagnostic for each unknown keyword found. + * tarlz.h: Add a missing '#include <sys/types.h>'. + +2020-11-21 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.18 released. + * main.cc: New option '--check-lib'. + * Implement multi-threaded '-x, --extract'. + * Don't #include <sys/sysmacros.h> when compiling on OS2. + * delete.cc, delete_lz.cc: Use Archive_reader. + * extract.cc: Rename to decode.cc. + * tarlz.texi: New section 'Limitations of multi-threaded extraction'. + 2020-07-30 Antonio Diaz Diaz <antonio@gnu.org> * Version 0.17 released. @@ -146,7 +162,7 @@ * Version 0.1 released. -Copyright (C) 2013-2020 Antonio Diaz Diaz. +Copyright (C) 2013-2021 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute, and @@ -4,8 +4,10 @@ You will need a C++11 compiler and the compression library lzlib installed. (gcc 3.3.6 or newer is recommended). I use gcc 6.1.0 and 4.1.2, but the code should compile with any standards compliant compiler. + Lzlib must be version 1.8 or newer, but --keep-damaged requires lzlib 1.11 or newer to recover as much data as possible from each damaged member. + Gcc is available at http://gcc.gnu.org. Lzlib is available at http://www.nongnu.org/lzip/lzlib.html. @@ -69,7 +71,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2013-2020 Antonio Diaz Diaz. +Copyright (C) 2013-2021 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/Makefile.in b/Makefile.in index 44f0d86..da94701 100644 --- a/Makefile.in +++ b/Makefile.in @@ -9,8 +9,8 @@ SHELL = /bin/sh CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 objs = arg_parser.o lzip_index.o archive_reader.o common.o common_decode.o \ - create.o create_lz.o delete.o delete_lz.o exclude.o extended.o \ - extract.o decode_lz.o main.o + create.o create_lz.o decode.o decode_lz.o delete.o delete_lz.o \ + exclude.o extended.o main.o .PHONY : all install install-bin install-info install-man \ @@ -37,12 +37,12 @@ common.o : arg_parser.h tarlz.h common_decode.o : arg_parser.h tarlz.h create.o : arg_parser.h tarlz.h create_lz.o : arg_parser.h tarlz.h -delete.o : arg_parser.h tarlz.h lzip_index.h -delete_lz.o : arg_parser.h tarlz.h lzip_index.h +decode.o : arg_parser.h tarlz.h lzip_index.h archive_reader.h +decode_lz.o : arg_parser.h tarlz.h lzip_index.h archive_reader.h +delete.o : arg_parser.h tarlz.h lzip_index.h archive_reader.h +delete_lz.o : arg_parser.h tarlz.h lzip_index.h archive_reader.h exclude.o : tarlz.h extended.o : tarlz.h -extract.o : arg_parser.h tarlz.h lzip_index.h archive_reader.h -decode_lz.o : arg_parser.h tarlz.h lzip_index.h archive_reader.h lzip_index.o : tarlz.h lzip_index.h main.o : arg_parser.h tarlz.h @@ -1,20 +1,7 @@ -Changes in version 0.17: +Changes in version 0.19: -The new option '--mtime' has been added. +At verbosity level 1 or higher tarlz now prints a diagnostic for each +unknown extended header keyword found in an archive, once per keyword. -The new option '-p, --preserve-permissions' has been added. - -Multi-threaded '-d, --diff' has been implemented. See chapters 'Internal -structure of tarlz' and 'Limitations of parallel tar decoding' in the manual -for details. - -Tarlz now reports an error if a file name is empty (tarlz -tf foo "") or if -the archive is specified more than once. - -Tarlz now reports corruption or truncation of the last header in a -multimenber file specifically instead of showing the generic message "Last -member in input file is truncated or corrupt." - -The header <sys/sysmacros.h> is now not #included when compiling on BSD. - -The new chapter 'Internal structure of tarlz' has been added to the manual. +A missing '#include <sys/types.h>', which made compilation fail on some +systems, has been added. @@ -1,13 +1,15 @@ Description Tarlz is a massively parallel (multi-threaded) combined implementation of -the tar archiver and the lzip compressor. Tarlz creates, lists, and extracts -archives in a simplified and safer variant of the POSIX pax format -compressed with lzip, keeping the alignment between tar members and lzip -members. The resulting multimember tar.lz archive is fully backward -compatible with standard tar tools like GNU tar, which treat it like any -other tar.lz archive. Tarlz can append files to the end of such compressed -archives. +the tar archiver and the lzip compressor. Tarlz uses the compression library +lzlib. + +Tarlz creates tar archives using a simplified and safer variant of the POSIX +pax format compressed in lzip format, keeping the alignment between tar +members and lzip members. The resulting multimember tar.lz archive is fully +backward compatible with standard tar tools like GNU tar, which treat it +like any other tar.lz archive. Tarlz can append files to the end of such +compressed archives. Keeping the alignment between tar members and lzip members has two advantages. It adds an indexed lzip layer on top of the tar archive, making @@ -16,7 +18,7 @@ amount of data lost in case of corruption. Compressing a tar archive with plzip may even double the amount of files lost for each lzip member damaged because it does not keep the members aligned. -Tarlz can create tar archives with five levels of compression granularity; +Tarlz can create tar archives with five levels of compression granularity: per file (--no-solid), per block (--bsolid, default), per directory (--dsolid), appendable solid (--asolid), and solid (--solid). It can also create uncompressed tar archives. @@ -40,8 +42,8 @@ archive, but it has the following advantages: lziprecover can be used to recover some of the damaged members. * A multimember tar.lz archive is usually smaller than the corresponding - solidly compressed tar.gz archive, except when compressing files - smaller than about 32 KiB individually. + solidly compressed tar.gz archive, except when individually + compressing files smaller than about 32 KiB. Note that the POSIX pax format has a serious flaw. The metadata stored in pax extended records are not protected by any kind of check sequence. @@ -85,7 +87,7 @@ tar.lz +===============+=================================================+========+ -Copyright (C) 2013-2020 Antonio Diaz Diaz. +Copyright (C) 2013-2021 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/archive_reader.cc b/archive_reader.cc index 496c33b..b7950ef 100644 --- a/archive_reader.cc +++ b/archive_reader.cc @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2020 Antonio Diaz Diaz. + Copyright (C) 2013-2021 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -23,6 +23,7 @@ #include <cstring> #include <string> #include <vector> +#include <pthread.h> // for tarlz.h #include <stdint.h> #include <unistd.h> #include <lzlib.h> @@ -32,6 +33,50 @@ #include "archive_reader.h" +namespace { + +/* Returns the number of bytes really read. + If (returned value < size) and (errno == 0), means EOF was reached. +*/ +int preadblock( const int fd, uint8_t * const buf, const int size, + const long long pos ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = pread( fd, buf + sz, size - sz, pos + sz ); + if( n > 0 ) sz += n; + else if( n == 0 ) break; // EOF + else if( errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +/* Returns the number of bytes really written. + If (returned value < size), it is always an error. +*//* +int pwriteblock( const int fd, const uint8_t * const buf, const int size, + const long long pos ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = pwrite( fd, buf + sz, size - sz, pos + sz ); + if( n > 0 ) sz += n; + else if( n < 0 && errno != EINTR ) break; + errno = 0; + } + return sz; + } +*/ + +} // end namespace + + int Archive_reader_base::parse_records( Extended & extended, const Tar_header header, Resizable_buffer & rbuf, @@ -159,7 +204,7 @@ void Archive_reader_i::set_member( const long i ) { LZ_decompress_reset( decoder ); // prepare for new member data_pos_ = ad.lzip_index.dblock( i ).pos(); - mdata_end = ad.lzip_index.dblock( i ).end(); + mdata_end_ = ad.lzip_index.dblock( i ).end(); archive_pos = ad.lzip_index.mblock( i ).pos(); member_id = i; } @@ -175,9 +220,9 @@ int Archive_reader_i::read( uint8_t * const buf, const int size ) { const int rd = LZ_decompress_read( decoder, buf + sz, size - sz ); if( rd < 0 ) - return err( 1, LZ_strerror( LZ_decompress_errno( decoder ) ) ); + return err( 1, LZ_strerror( LZ_decompress_errno( decoder ) ), 0, sz ); if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) - return err( 2, end_msg ); + return err( 2, end_msg, 0, sz ); sz += rd; data_pos_ += rd; if( sz < size && LZ_decompress_write_size( decoder ) > 0 ) { @@ -198,7 +243,7 @@ int Archive_reader_i::read( uint8_t * const buf, const int size ) if( rd < rsize ) { LZ_decompress_finish( decoder ); - if( errno ) return err( 2, "Error reading archive" ); + if( errno ) return err( 2, "Error reading archive", 0, sz ); } } } @@ -210,7 +255,7 @@ int Archive_reader_i::read( uint8_t * const buf, const int size ) int Archive_reader_i::skip_member( const Extended & extended ) { long long rest = round_up( extended.file_size() ); // size + padding - if( data_pos_ + rest == mdata_end ) { data_pos_ = mdata_end; return 0; } + if( data_pos_ + rest == mdata_end_ ) { data_pos_ = mdata_end_; return 0; } const int bufsize = 32 * header_size; uint8_t buf[bufsize]; while( rest > 0 ) // skip tar member diff --git a/archive_reader.h b/archive_reader.h index a867c0f..bbd5bcb 100644 --- a/archive_reader.h +++ b/archive_reader.h @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2020 Antonio Diaz Diaz. + Copyright (C) 2013-2021 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ struct Archive_descriptor const int infd; const Lzip_index lzip_index; const bool seekable; - const bool indexed; + const bool indexed; // archive is a compressed regular file Archive_descriptor( const std::string & archive_name ) : name( archive_name ), namep( name.empty() ? "(stdin)" : name.c_str() ), @@ -96,15 +96,15 @@ public: */ class Archive_reader_i : public Archive_reader_base // indexed reader { - long long data_pos_; - long long mdata_end; + long long data_pos_; // current decompressed position + long long mdata_end_; // current member decompressed end long long archive_pos; // current position in archive for pread long member_id; // current member unless reading beyond public: Archive_reader_i( const Archive_descriptor & d ) : Archive_reader_base( d ), - data_pos_( 0 ), mdata_end( 0 ), archive_pos( 0 ), member_id( 0 ) + data_pos_( 0 ), mdata_end_( 0 ), archive_pos( 0 ), member_id( 0 ) { decoder = LZ_decompress_open(); if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) @@ -112,7 +112,8 @@ public: } long long data_pos() const { return data_pos_; } - bool at_member_end() const { return data_pos_ == mdata_end; } + long long mdata_end() const { return mdata_end_; } + bool at_member_end() const { return data_pos_ == mdata_end_; } // Resets decoder and sets position to the start of the member. void set_member( const long i ); diff --git a/arg_parser.cc b/arg_parser.cc index b843c09..2e40a13 100644 --- a/arg_parser.cc +++ b/arg_parser.cc @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2020 Antonio Diaz Diaz. + Copyright (C) 2006-2021 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/arg_parser.h b/arg_parser.h index d9a4af0..5629b90 100644 --- a/arg_parser.h +++ b/arg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2020 Antonio Diaz Diaz. + Copyright (C) 2006-2021 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2020 Antonio Diaz Diaz. + Copyright (C) 2013-2021 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/common_decode.cc b/common_decode.cc index 6b108c1..6ff3086 100644 --- a/common_decode.cc +++ b/common_decode.cc @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2020 Antonio Diaz Diaz. + Copyright (C) 2013-2021 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,6 +17,7 @@ #define _FILE_OFFSET_BITS 64 +#include <cerrno> #include <climits> #include <cstdio> #include <cstdlib> @@ -24,7 +25,7 @@ #include <ctime> #include <string> #include <vector> -#include <pthread.h> +#include <pthread.h> // for tarlz.h #include <stdint.h> #include <sys/stat.h> @@ -198,3 +199,42 @@ bool check_skip_filename( const Cl_options & cl_opts, } return skip; } + + +mode_t get_umask() + { + static mode_t mask = 0; // read once, cache the result + static bool first_call = true; + if( first_call ) { first_call = false; mask = umask( 0 ); umask( mask ); + mask &= S_IRWXU | S_IRWXG | S_IRWXO; } + return mask; + } + + +bool make_path( const std::string & name ) + { + const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; + unsigned end = name.size(); // first slash before last component + + while( end > 0 && name[end-1] == '/' ) --end; // remove trailing slashes + while( end > 0 && name[end-1] != '/' ) --end; // remove last component + while( end > 0 && name[end-1] == '/' ) --end; // remove more slashes + + unsigned index = 0; + while( index < end ) + { + while( index < end && name[index] == '/' ) ++index; + unsigned first = index; + while( index < end && name[index] != '/' ) ++index; + if( first < index ) + { + const std::string partial( name, 0, index ); + struct stat st; + if( stat( partial.c_str(), &st ) == 0 ) + { if( !S_ISDIR( st.st_mode ) ) return false; } + else if( mkdir( partial.c_str(), mode ) != 0 && errno != EEXIST ) + return false; + } + } + return true; + } @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Tarlz - Archiver with multimember lzip compression -# Copyright (C) 2013-2020 Antonio Diaz Diaz. +# Copyright (C) 2013-2021 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute, and modify it. pkgname=tarlz -pkgversion=0.17 +pkgversion=0.19 progname=tarlz srctrigger=doc/${pkgname}.texi @@ -167,7 +167,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Tarlz - Archiver with multimember lzip compression -# Copyright (C) 2013-2020 Antonio Diaz Diaz. +# Copyright (C) 2013-2021 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2020 Antonio Diaz Diaz. + Copyright (C) 2013-2021 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,9 +29,8 @@ #include <stdint.h> #include <unistd.h> #include <sys/stat.h> -#include <sys/types.h> #if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \ - !defined __DragonFly__ && !defined __APPLE__ + !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__ #include <sys/sysmacros.h> // for major, minor #endif #include <ftw.h> diff --git a/create_lz.cc b/create_lz.cc index a6a7146..52efb56 100644 --- a/create_lz.cc +++ b/create_lz.cc @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2020 Antonio Diaz Diaz. + Copyright (C) 2013-2021 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2020 Antonio Diaz Diaz. + Copyright (C) 2013-2021 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,14 +26,13 @@ #include <cstring> #include <string> #include <vector> -#include <pthread.h> +#include <pthread.h> // for tarlz.h #include <stdint.h> #include <unistd.h> #include <utime.h> #include <sys/stat.h> -#include <sys/types.h> #if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \ - !defined __DragonFly__ && !defined __APPLE__ + !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__ #include <sys/sysmacros.h> // for major, minor, makedev #endif #include <lzlib.h> @@ -66,35 +65,6 @@ void read_error( const Archive_reader & ar ) } -bool make_path( const std::string & name ) - { - const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; - unsigned end = name.size(); // first slash before last component - - while( end > 0 && name[end-1] == '/' ) --end; // remove trailing slashes - while( end > 0 && name[end-1] != '/' ) --end; // remove last component - while( end > 0 && name[end-1] == '/' ) --end; // remove more slashes - - unsigned index = 0; - while( index < end ) - { - while( index < end && name[index] == '/' ) ++index; - unsigned first = index; - while( index < end && name[index] != '/' ) ++index; - if( first < index ) - { - const std::string partial( name, 0, index ); - struct stat st; - if( stat( partial.c_str(), &st ) == 0 ) - { if( !S_ISDIR( st.st_mode ) ) return false; } - else if( mkdir( partial.c_str(), mode ) != 0 ) - return false; - } - } - return true; - } - - int skip_member( Archive_reader & ar, const Extended & extended ) { const int ret = ar.skip_member( extended ); @@ -139,24 +109,6 @@ int list_member( Archive_reader & ar, } -bool contains_dotdot( const char * const filename ) - { - for( int i = 0; filename[i]; ++i ) - if( dotdot_at_i( filename, i ) ) return true; - return false; - } - - -mode_t get_umask() - { - static mode_t mask = 0; // read once, cache the result - static bool first_call = true; - if( first_call ) { first_call = false; mask = umask( 0 ); umask( mask ); - mask &= S_IRWXU | S_IRWXG | S_IRWXO; } - return mask; - } - - int extract_member( const Cl_options & cl_opts, Archive_reader & ar, const Extended & extended, const Tar_header header ) { @@ -303,7 +255,7 @@ bool compare_file_type( std::string & estr, std::string & ostr, struct stat st; bool diff = false, size_differs = false, type_differs = true; if( hstat( filename, &st, cl_opts.dereference ) != 0 ) - format_file_error( estr, filename, "Warning: Can't stat", errno ); + format_file_error( estr, filename, "warning: Can't stat", errno ); else if( ( typeflag == tf_regular || typeflag == tf_hiperf ) && !S_ISREG( st.st_mode ) ) format_file_diff( ostr, filename, "Is not a regular file" ); @@ -440,9 +392,9 @@ int decode( const Cl_options & cl_opts ) } // multi-threaded --list is faster even with 1 thread and 1 file in archive - // (but multi-threaded --diff probably needs at least 2 of each) - if( ( cl_opts.program_mode == m_diff || cl_opts.program_mode == m_list ) && - cl_opts.num_workers > 0 ) + // but multi-threaded --diff and --extract probably need at least 2 of each + if( ( cl_opts.program_mode == m_diff || cl_opts.program_mode == m_list || + cl_opts.program_mode == m_extract ) && cl_opts.num_workers > 0 ) { if( ad.indexed && ad.lzip_index.members() >= 2 ) // one file + eof { diff --git a/decode_lz.cc b/decode_lz.cc index 71c699b..4fc3d80 100644 --- a/decode_lz.cc +++ b/decode_lz.cc @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2020 Antonio Diaz Diaz. + Copyright (C) 2013-2021 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,7 +29,12 @@ #include <pthread.h> #include <stdint.h> #include <unistd.h> +#include <utime.h> #include <sys/stat.h> +#if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \ + !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__ +#include <sys/sysmacros.h> // for major, minor, makedev +#endif #include <lzlib.h> #include "arg_parser.h" @@ -43,47 +48,8 @@ - the other workers return. - the muxer drains the queue and returns. */ -/* Returns the number of bytes really read. - If (returned value < size) and (errno == 0), means EOF was reached. -*/ -int preadblock( const int fd, uint8_t * const buf, const int size, - const long long pos ) - { - int sz = 0; - errno = 0; - while( sz < size ) - { - const int n = pread( fd, buf + sz, size - sz, pos + sz ); - if( n > 0 ) sz += n; - else if( n == 0 ) break; // EOF - else if( errno != EINTR ) break; - errno = 0; - } - return sz; - } - - namespace { -/* Returns the number of bytes really written. - If (returned value < size), it is always an error. -*//* -int pwriteblock( const int fd, const uint8_t * const buf, const int size, - const long long pos ) - { - int sz = 0; - errno = 0; - while( sz < size ) - { - const int n = pwrite( fd, buf + sz, size - sz, pos + sz ); - if( n > 0 ) sz += n; - else if( n < 0 && errno != EINTR ) break; - errno = 0; - } - return sz; - } -*/ - const char * const other_msg = "Other worker found an error."; struct Packet // member name and metadata or error message @@ -237,6 +203,35 @@ public: }; +// prevent two threads from extracting the same file at the same time +class Name_monitor + { + std::vector< unsigned > crc_vector; + std::vector< std::string > name_vector; + pthread_mutex_t mutex; + +public: + Name_monitor( const int num_workers ) + : crc_vector( num_workers ), name_vector( num_workers ) + { if( num_workers > 0 ) xinit_mutex( &mutex ); } + + bool reserve_name( const unsigned worker_id, const std::string & filename ) + { + // compare the CRCs of the names, verify collisions comparing the names + const unsigned crc = + crc32c.compute_crc( (const uint8_t *)filename.c_str(), filename.size() ); + xlock( &mutex ); + for( unsigned i = 0; i < crc_vector.size(); ++i ) + if( crc_vector[i] == crc && crc != 0 && i != worker_id && + name_vector[i] == filename ) + { xunlock( &mutex ); return false; } // filename already reserved + crc_vector[worker_id] = crc; name_vector[worker_id] = filename; + xunlock( &mutex ); + return true; + } + }; + + const char * skip_member_lz( Archive_reader_i & ar, Packet_courier & courier, const Extended & extended, const long member_id, const int worker_id ) @@ -310,11 +305,201 @@ const char * list_member_lz( Archive_reader_i & ar, Packet_courier & courier, } +const char * extract_member_lz( const Cl_options & cl_opts, + Archive_reader_i & ar, Packet_courier & courier, + const Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const long member_id, + const int worker_id, Name_monitor & name_monitor ) + { + // skip member if another copy is already being extracted by another thread + if( !name_monitor.reserve_name( worker_id, extended.path() ) ) + return skip_member_lz( ar, courier, extended, member_id, worker_id ); + const char * const filename = extended.path().c_str(); + if( contains_dotdot( filename ) ) + { + if( verbosity >= 0 ) + { std::string estr( extended.path() ); + estr += ": Contains a '..' component, skipping."; + if( !courier.collect_packet( member_id, worker_id, estr.c_str(), + Packet::diag ) ) return other_msg; } + return skip_member_lz( ar, courier, extended, member_id, worker_id ); + } + mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits + if( geteuid() != 0 && !cl_opts.preserve_permissions ) mode &= ~get_umask(); + const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); + int outfd = -1; + + if( verbosity < 1 ) rbuf()[0] = 0; + else if( !format_member_name( extended, header, rbuf, verbosity > 1 ) ) + return mem_msg; + if( rbuf()[0] && !courier.collect_packet( member_id, worker_id, rbuf(), + Packet::ok ) ) return other_msg; + if( typeflag != tf_directory ) std::remove( filename ); + if( !make_path( filename ) && verbosity >= 0 ) + { std::string estr( extended.path() ); + estr += ": warning: Failed to create intermediate directory."; + if( !courier.collect_packet( member_id, worker_id, estr.c_str(), + Packet::diag ) ) return other_msg; } + switch( typeflag ) + { + case tf_regular: + case tf_hiperf: + outfd = open_outstream( filename, true, &rbuf ); + if( outfd < 0 ) + { + if( verbosity >= 0 && !courier.collect_packet( member_id, worker_id, + rbuf(), Packet::diag ) ) return other_msg; + set_error_status( 2 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id ); + } + break; + case tf_link: + case tf_symlink: + { + const char * const linkname = extended.linkpath().c_str(); + const bool hard = typeflag == tf_link; + if( ( hard && link( linkname, filename ) != 0 ) || + ( !hard && symlink( linkname, filename ) != 0 ) ) + { + if( verbosity >= 0 ) + { + const int saved_errno = errno; + const int size = + snprintf( rbuf(), rbuf.size(), "Can't %slink '%s' to '%s': %s.\n", + hard ? "" : "sym", linkname, filename, + std::strerror( saved_errno ) ); + if( size > 0 && (unsigned)size > rbuf.size() && rbuf.resize( size ) ) + snprintf( rbuf(), rbuf.size(), "Can't %slink '%s' to '%s': %s.\n", + hard ? "" : "sym", linkname, filename, + std::strerror( saved_errno ) ); + if( verbosity >= 0 && !courier.collect_packet( member_id, worker_id, + rbuf(), Packet::diag ) ) return other_msg; + } + set_error_status( 2 ); + } + } break; + case tf_directory: + { + struct stat st; + bool exists = ( stat( filename, &st ) == 0 ); + if( exists && !S_ISDIR( st.st_mode ) ) + { exists = false; std::remove( filename ); } + if( !exists && mkdir( filename, mode ) != 0 && errno != EEXIST ) + { + if( verbosity >= 0 ) + { snprintf( rbuf(), rbuf.size(), "%s: Can't create directory: %s\n", + filename, std::strerror( errno ) ); + if( !courier.collect_packet( member_id, worker_id, rbuf(), + Packet::diag ) ) return other_msg; } + set_error_status( 2 ); + } + } break; + case tf_chardev: + case tf_blockdev: + { + const unsigned dev = + makedev( parse_octal( header + devmajor_o, devmajor_l ), + parse_octal( header + devminor_o, devminor_l ) ); + const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode; + if( mknod( filename, dmode, dev ) != 0 ) + { + if( verbosity >= 0 ) + { snprintf( rbuf(), rbuf.size(), "%s: Can't create device node: %s\n", + filename, std::strerror( errno ) ); + if( !courier.collect_packet( member_id, worker_id, rbuf(), + Packet::diag ) ) return other_msg; } + set_error_status( 2 ); + } + break; + } + case tf_fifo: + if( mkfifo( filename, mode ) != 0 && errno != EEXIST ) + { + if( verbosity >= 0 ) + { snprintf( rbuf(), rbuf.size(), "%s: Can't create FIFO file: %s\n", + filename, std::strerror( errno ) ); + if( !courier.collect_packet( member_id, worker_id, rbuf(), + Packet::diag ) ) return other_msg; } + set_error_status( 2 ); + } + break; + default: + if( verbosity >= 0 ) + { snprintf( rbuf(), rbuf.size(), + "File type '%c' not supported for file '%s'.\n", + typeflag, filename ); + if( !courier.collect_packet( member_id, worker_id, rbuf(), + Packet::diag ) ) return other_msg; } + set_error_status( 2 ); + } + + const uid_t uid = (uid_t)parse_octal( header + uid_o, uid_l ); + const gid_t gid = (gid_t)parse_octal( header + gid_o, gid_l ); + if( !islink && chown( filename, uid, gid ) != 0 && + errno != EPERM && errno != EINVAL ) + { + if( verbosity >= 0 ) + { snprintf( rbuf(), rbuf.size(), "%s: Can't change file owner: %s\n", + filename, std::strerror( errno ) ); + if( !courier.collect_packet( member_id, worker_id, rbuf(), + Packet::diag ) ) return other_msg; } + set_error_status( 2 ); + } + + if( typeflag == tf_regular || typeflag == tf_directory || + typeflag == tf_hiperf ) fchmod( outfd, mode ); // ignore errors + + const int bufsize = 32 * header_size; + uint8_t buf[bufsize]; + long long rest = extended.file_size(); + const int rem = rest % header_size; + const int padding = rem ? header_size - rem : 0; + while( rest > 0 ) + { + const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; + const int ret = ar.read( buf, rsize ); + if( ret != 0 ) + { + if( outfd >= 0 ) + { + if( cl_opts.keep_damaged ) + { writeblock( outfd, buf, std::min( rest, (long long)ar.e_size() ) ); + close( outfd ); } + else { close( outfd ); std::remove( filename ); } + } + return ar.e_msg(); + } + const int wsize = ( rest >= bufsize ) ? bufsize : rest; + if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize ) + { snprintf( rbuf(), rbuf.size(), "%s: Error writing file: %s\n", + filename, std::strerror( errno ) ); return rbuf(); } + rest -= wsize; + } + if( outfd >= 0 && close( outfd ) != 0 ) + { snprintf( rbuf(), rbuf.size(), "%s: Error closing file: %s\n", + filename, std::strerror( errno ) ); return rbuf(); } + if( !islink ) + { + struct utimbuf t; + t.actime = mtime; + t.modtime = mtime; + utime( filename, &t ); // ignore errors + } + if( ar.at_member_end() && + !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) + return other_msg; + return 0; + } + + struct Worker_arg { const Cl_options * cl_opts; const Archive_descriptor * ad; Packet_courier * courier; + Name_monitor * name_monitor; std::vector< char > * name_pending; int worker_id; int num_workers; @@ -330,6 +515,7 @@ extern "C" void * dworker( void * arg ) const Cl_options & cl_opts = *tmp.cl_opts; const Archive_descriptor & ad = *tmp.ad; Packet_courier & courier = *tmp.courier; + Name_monitor & name_monitor = *tmp.name_monitor; std::vector< char > & name_pending = *tmp.name_pending; const int worker_id = tmp.worker_id; const int num_workers = tmp.num_workers; @@ -429,9 +615,12 @@ extern "C" void * dworker( void * arg ) msg = skip_member_lz( ar, courier, extended, i, worker_id ); else if( cl_opts.program_mode == m_list ) msg = list_member_lz( ar, courier, extended, header, rbuf, i, worker_id ); - else msg = compare_member_lz( cl_opts, ar, courier, extended, header, - rbuf, i, worker_id ); - if( msg ) + else if( cl_opts.program_mode == m_diff ) + msg = compare_member_lz( cl_opts, ar, courier, extended, header, + rbuf, i, worker_id ); + else msg = extract_member_lz( cl_opts, ar, courier, extended, header, + rbuf, i, worker_id, name_monitor ); + if( msg ) // fatal error { if( courier.request_mastership( i, worker_id ) ) courier.collect_packet( i, worker_id, msg, Packet::error ); goto done; } @@ -481,6 +670,9 @@ int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, const int out_slots = 65536; // max small files (<=512B) in 64 MiB const int num_workers = // limited to number of members std::min( (long)cl_opts.num_workers, ad.lzip_index.members() ); + if( cl_opts.program_mode == m_extract ) get_umask(); // cache the umask + Name_monitor + name_monitor( ( cl_opts.program_mode == m_extract ) ? num_workers : 0 ); /* If an error happens after any threads have been started, exit must be called before courier goes out of scope. */ @@ -494,6 +686,7 @@ int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, worker_args[i].cl_opts = &cl_opts; worker_args[i].ad = &ad; worker_args[i].courier = &courier; + worker_args[i].name_monitor = &name_monitor; worker_args[i].name_pending = &name_pending; worker_args[i].worker_id = i; worker_args[i].num_workers = num_workers; @@ -531,6 +724,7 @@ int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, courier.ocheck_counter, courier.owait_counter ); + Exclude::clear(); // avoid error with gcc 3.3.6 if( !courier.finished() ) internal_error( "courier not finished." ); return final_exit_status( retval, cl_opts.program_mode != m_diff ); } @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2020 Antonio Diaz Diaz. + Copyright (C) 2013-2021 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,7 +25,7 @@ #include <cstring> #include <string> #include <vector> -#include <pthread.h> +#include <pthread.h> // for tarlz.h #include <stdint.h> #include <unistd.h> #include <lzlib.h> @@ -33,24 +33,7 @@ #include "arg_parser.h" #include "tarlz.h" #include "lzip_index.h" - - -namespace { - -bool parse_records( const int infd, Extended & extended, - const Tar_header header, Resizable_buffer & rbuf, - const bool permissive ) - { - const long long edsize = parse_octal( header + size_o, size_l ); - const long long bufsize = round_up( edsize ); - if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX ) - return false; // overflow or no extended data - if( !rbuf.resize( bufsize ) ) return false; // extended records buffer - return ( readblock( infd, (uint8_t *)rbuf(), bufsize ) == bufsize && - extended.parse( rbuf(), edsize, permissive ) ); - } - -} // end namespace +#include "archive_reader.h" bool safe_seek( const int fd, const long long pos ) @@ -60,34 +43,32 @@ bool safe_seek( const int fd, const long long pos ) } -int tail_copy( const char * const archive_namep, const Arg_parser & parser, - std::vector< char > & name_pending, - const Lzip_index & lzip_index, const long long istream_pos, - const int infd, const int outfd, int retval ) +int tail_copy( const Arg_parser & parser, const Archive_descriptor & ad, + std::vector< char > & name_pending, const long long istream_pos, + const int outfd, int retval ) { - const long long rest = lzip_index.file_size() - istream_pos; + const long long rest = ad.lzip_index.file_size() - istream_pos; if( istream_pos > 0 && rest > 0 && - ( !safe_seek( infd, istream_pos ) || - !copy_file( infd, outfd, rest ) ) ) - { show_file_error( archive_namep, "Error during tail copy." ); + ( !safe_seek( ad.infd, istream_pos ) || + !copy_file( ad.infd, outfd, rest ) ) ) + { show_file_error( ad.namep, "Error during tail copy." ); return retval ? retval : 1; } const long long ostream_pos = lseek( outfd, 0, SEEK_CUR ); if( ostream_pos < 0 ) { show_error( "Seek error", errno ); retval = 1; } - else if( ostream_pos > 0 && ostream_pos < lzip_index.file_size() ) + else if( ostream_pos > 0 && ostream_pos < ad.lzip_index.file_size() ) { int ret; do ret = ftruncate( outfd, ostream_pos ); while( ret != 0 && errno == EINTR ); if( ret != 0 || lseek( outfd, 0, SEEK_END ) != ostream_pos ) { - show_file_error( archive_namep, "Can't truncate archive", errno ); + show_file_error( ad.namep, "Can't truncate archive", errno ); if( retval < 1 ) retval = 1; } } - if( ( close( outfd ) != 0 || close( infd ) != 0 ) && !retval ) - { show_file_error( archive_namep, "Error closing archive", errno ); - retval = 1; } + if( ( close( outfd ) != 0 || close( ad.infd ) != 0 ) && !retval ) + { show_file_error( ad.namep, "Error closing archive", errno ); retval = 1; } if( retval == 0 ) for( int i = 0; i < parser.arguments(); ++i ) if( nonempty_arg( parser, i ) && name_pending[i] ) @@ -108,11 +89,10 @@ int delete_members( const Cl_options & cl_opts ) { if( verbosity >= 1 ) show_error( "Nothing to delete." ); return 0; } if( cl_opts.archive_name.empty() ) { show_error( "Deleting from stdin not implemented yet." ); return 1; } - const char * const archive_namep = cl_opts.archive_name.c_str(); - const int infd = open_instream( cl_opts.archive_name ); - if( infd < 0 ) return 1; + const Archive_descriptor ad( cl_opts.archive_name ); + if( ad.infd < 0 ) return 1; const int outfd = open_outstream( cl_opts.archive_name, false ); - if( outfd < 0 ) { close( infd ); return 1; } + if( outfd < 0 ) { close( ad.infd ); return 1; } // mark member names to be deleted std::vector< char > name_pending( cl_opts.parser.arguments(), false ); @@ -121,16 +101,15 @@ int delete_members( const Cl_options & cl_opts ) !Exclude::excluded( cl_opts.parser.argument( i ).c_str() ) ) name_pending[i] = true; - const Lzip_index lzip_index( infd, true, false ); // only regular files - if( lzip_index.retval() == 0 ) // compressed - return delete_members_lz( cl_opts, archive_namep, name_pending, lzip_index, - infd, outfd ); - if( lseek( infd, 0, SEEK_SET ) != 0 ) - { show_file_error( archive_namep, "Archive is not seekable." ); return 1; } - if( lzip_index.file_size() < 3 * header_size ) - { show_file_error( archive_namep, posix_msg ); return 2; } + if( ad.indexed ) // archive is a compressed regular file + return delete_members_lz( cl_opts, ad, name_pending, outfd ); + if( !ad.seekable ) + { show_file_error( ad.namep, "Archive is not seekable." ); return 1; } + if( ad.lzip_index.file_size() < 3 * header_size ) + { show_file_error( ad.namep, posix_msg ); return 2; } // archive is uncompressed seekable, unless compressed corrupt + Archive_reader ar( ad ); Resizable_buffer rbuf; long long istream_pos = 0; // source of next data move long long member_begin = 0; // first pos of current tar member @@ -139,24 +118,21 @@ int delete_members( const Cl_options & cl_opts ) bool prev_extended = false; // prev header was extended while( true ) // process one tar header per iteration { - if( !prev_extended && ( member_begin = lseek( infd, 0, SEEK_CUR ) ) < 0 ) + if( !prev_extended && ( member_begin = lseek( ad.infd, 0, SEEK_CUR ) ) < 0 ) { show_error( "Seek error", errno ); retval = 1; break; } Tar_header header; - const int rd = readblock( infd, header, header_size ); - if( rd == 0 && errno == 0 ) // missing EOF blocks - { show_file_error( archive_namep, end_msg ); retval = 2; break; } - if( rd != header_size ) - { show_file_error( archive_namep, "Read error", errno ); - retval = 2; break; } + const int ret = ar.read( header, header_size ); + if( ret != 0 ) { show_file_error( ar.ad.namep, ar.e_msg(), ar.e_code() ); + retval = ret; break; } if( !verify_ustar_chksum( header ) ) { if( block_is_zero( header, header_size ) ) // EOF { if( prev_extended && !cl_opts.permissive ) - { show_file_error( archive_namep, fv_msg1 ); retval = 2; } + { show_file_error( ad.namep, fv_msg1 ); retval = 2; } break; } - show_file_error( archive_namep, "Corrupt header in archive." ); + show_file_error( ad.namep, "Corrupt header in archive." ); retval = 2; break; } @@ -164,20 +140,23 @@ int delete_members( const Cl_options & cl_opts ) if( typeflag == tf_global ) { if( prev_extended && !cl_opts.permissive ) - { show_file_error( archive_namep, fv_msg2 ); retval = 2; break; } + { show_file_error( ad.namep, fv_msg2 ); retval = 2; break; } Extended dummy; // global headers are parsed and ignored - if( !parse_records( infd, dummy, header, rbuf, true ) ) - { show_file_error( archive_namep, gblrec_msg ); retval = 2; break; } + const int ret = ar.parse_records( dummy, header, rbuf, true ); + if( ret != 0 ) + { show_file_error( ad.namep, gblrec_msg ); retval = ret; break; } continue; } if( typeflag == tf_extended ) { if( prev_extended && !cl_opts.permissive ) - { show_file_error( archive_namep, fv_msg3 ); retval = 2; break; } - if( !parse_records( infd, extended, header, rbuf, cl_opts.permissive ) ) - { show_file_error( archive_namep, extrec_msg ); retval = 2; break; } + { show_file_error( ad.namep, fv_msg3 ); retval = 2; break; } + const int ret = ar.parse_records( extended, header, rbuf, + cl_opts.permissive ); + if( ret != 0 ) + { show_file_error( ad.namep, extrec_msg ); retval = ret; break; } else if( !extended.crc_present() && cl_opts.missing_crc ) - { show_file_error( archive_namep, mcrc_msg ); retval = 2; break; } + { show_file_error( ad.namep, mcrc_msg ); retval = 2; break; } prev_extended = true; continue; } @@ -185,11 +164,10 @@ int delete_members( const Cl_options & cl_opts ) extended.fill_from_ustar( header ); // copy metadata from header - { // skip member - long long rest = round_up( extended.file_size() ); // size + padding - if( lseek( infd, rest, SEEK_CUR ) <= 0 ) - { show_file_error( archive_namep, "Seek error", errno ); - retval = 1; break; } + { + const int ret = ar.skip_member( extended ); + if( ret != 0 ) + { show_file_error( ad.namep, "Seek error", errno ); retval = ret; break; } } // delete tar member @@ -197,24 +175,23 @@ int delete_members( const Cl_options & cl_opts ) { if( !show_member_name( extended, header, 1, rbuf ) ) { retval = 1; break; } - const long long pos = lseek( infd, 0, SEEK_CUR ); + const long long pos = lseek( ad.infd, 0, SEEK_CUR ); if( pos <= 0 || pos <= member_begin || member_begin < istream_pos ) - { show_file_error( archive_namep, "Seek error", errno ); + { show_file_error( ad.namep, "Seek error", errno ); retval = 1; break; } const long long size = member_begin - istream_pos; if( size > 0 ) // move pending data each time a member is deleted { if( istream_pos == 0 ) { if( !safe_seek( outfd, size ) ) { retval = 1; break; } } - else if( !safe_seek( infd, istream_pos ) || - !copy_file( infd, outfd, size ) || - !safe_seek( infd, pos ) ) { retval = 1; break; } + else if( !safe_seek( ad.infd, istream_pos ) || + !copy_file( ad.infd, outfd, size ) || + !safe_seek( ad.infd, pos ) ) { retval = 1; break; } } istream_pos = pos; } extended.reset(); } - return tail_copy( archive_namep, cl_opts.parser, name_pending, lzip_index, - istream_pos, infd, outfd, retval ); + return tail_copy( cl_opts.parser, ad, name_pending, istream_pos, outfd, retval ); } diff --git a/delete_lz.cc b/delete_lz.cc index 961645f..c566c00 100644 --- a/delete_lz.cc +++ b/delete_lz.cc @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2020 Antonio Diaz Diaz. + Copyright (C) 2013-2021 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,7 +25,7 @@ #include <cstring> #include <string> #include <vector> -#include <pthread.h> +#include <pthread.h> // for tarlz.h #include <stdint.h> #include <unistd.h> #include <lzlib.h> @@ -33,150 +33,52 @@ #include "arg_parser.h" #include "tarlz.h" #include "lzip_index.h" - - -/* Read 'size' decompressed bytes from the archive. - Return value: 0 = OK, 1 = damaged member, 2 = fatal error. */ -int archive_read_lz( LZ_Decoder * const decoder, const int infd, - long long & file_pos, const long long member_end, - const long long cdata_size, uint8_t * const buf, - const int size, const char ** msg ) - { - int sz = 0; - - while( sz < size ) - { - const int rd = LZ_decompress_read( decoder, buf + sz, size - sz ); - if( rd < 0 ) - { *msg = LZ_strerror( LZ_decompress_errno( decoder ) ); return 1; } - if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) - { *msg = end_msg; return 2; } - sz += rd; - if( sz < size && LZ_decompress_write_size( decoder ) > 0 ) - { - const long long ibuf_size = 16384; - uint8_t ibuf[ibuf_size]; - const long long rest = ( file_pos < member_end ) ? - member_end - file_pos : cdata_size - file_pos; - const int rsize = std::min( LZ_decompress_write_size( decoder ), - (int)std::min( ibuf_size, rest ) ); - if( rsize <= 0 ) LZ_decompress_finish( decoder ); - else - { - const int rd = preadblock( infd, ibuf, rsize, file_pos ); - if( LZ_decompress_write( decoder, ibuf, rd ) != rd ) - internal_error( "library error (LZ_decompress_write)." ); - file_pos += rd; - if( rd < rsize ) - { - LZ_decompress_finish( decoder ); - if( errno ) { *msg = "Error reading archive"; return 2; } - } - } - } - } - return 0; - } - - -int parse_records_lz( LZ_Decoder * const decoder, const int infd, - long long & file_pos, const long long member_end, - const long long cdata_size, long long & data_pos, - Extended & extended, const Tar_header header, - Resizable_buffer & rbuf, const char ** msg, - const bool permissive ) - { - const long long edsize = parse_octal( header + size_o, size_l ); - const long long bufsize = round_up( edsize ); - if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX ) - return 1; // overflow or no extended data - if( !rbuf.resize( bufsize ) ) return 1; // extended records buffer - int retval = archive_read_lz( decoder, infd, file_pos, member_end, - cdata_size, (uint8_t *)rbuf(), bufsize, msg ); - if( retval == 0 ) - { if( extended.parse( rbuf(), edsize, permissive ) ) data_pos += bufsize; - else retval = 2; } - return retval; - } - - -int skip_member_lz( LZ_Decoder * const decoder, const int infd, - long long & file_pos, const long long member_end, - const long long cdata_size, long long & data_pos, - long long rest, const char ** msg ) - { - const int bufsize = 32 * header_size; - uint8_t buf[bufsize]; - while( rest > 0 ) // skip tar member - { - const int rsize = ( rest >= bufsize ) ? bufsize : rest; - const int ret = archive_read_lz( decoder, infd, file_pos, member_end, - cdata_size, buf, rsize, msg ); - if( ret != 0 ) return ret; - data_pos += rsize; - rest -= rsize; - } - return 0; - } +#include "archive_reader.h" /* Deleting from a corrupt archive must not worsen the corruption. Stop and tail-copy as soon as corruption is found. */ int delete_members_lz( const Cl_options & cl_opts, - const char * const archive_namep, + const Archive_descriptor & ad, std::vector< char > & name_pending, - const Lzip_index & lzip_index, - const int infd, const int outfd ) + const int outfd ) { Resizable_buffer rbuf; - LZ_Decoder * const decoder = LZ_decompress_open(); - if( !rbuf.size() || !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) - { show_error( mem_msg ); return 1; } + Archive_reader_i ar( ad ); // indexed reader + if( !rbuf.size() || ar.fatal() ) { show_error( mem_msg ); return 1; } long long istream_pos = 0; // source of next data move - const long long cdata_size = lzip_index.cdata_size(); int retval = 0, retval2 = 0; - for( long i = 0; i < lzip_index.members(); ++i ) + for( long i = 0; i < ad.lzip_index.members(); ++i ) { - const long long mdata_pos = lzip_index.dblock( i ).pos(); - long long data_pos = mdata_pos; - const long long mdata_end = lzip_index.dblock( i ).end(); - if( data_pos >= mdata_end ) continue; // empty lzip member - const long long member_pos = lzip_index.mblock( i ).pos(); - long long file_pos = member_pos; - const long long member_end = lzip_index.mblock( i ).end(); - + if( ad.lzip_index.dblock( i ).size() == 0 ) continue; // empty lzip member long long member_begin = 0; // first pos of current tar member Extended extended; // metadata from extended records bool prev_extended = false; // prev header was extended - LZ_decompress_reset( decoder ); // prepare for new member - if( !safe_seek( infd, member_pos ) ) { retval = 1; break; } + ar.set_member( i ); // prepare for new member while( true ) // process one tar header per iteration { - if( data_pos >= mdata_end ) + if( ar.data_pos() >= ar.mdata_end() ) { - if( data_pos == mdata_end && !prev_extended ) break; + if( ar.at_member_end() && !prev_extended ) break; // member end exceeded or ends in extended - show_file_error( archive_namep, "Member misalignment found." ); + show_file_error( ad.namep, "Member misalignment found." ); retval = 2; goto done; } - if( !prev_extended ) member_begin = data_pos; + if( !prev_extended ) member_begin = ar.data_pos(); Tar_header header; - const char * msg = 0; - retval = archive_read_lz( decoder, infd, file_pos, member_end, - cdata_size, header, header_size, &msg ); - if( retval != 0 ) { show_file_error( archive_namep, msg ); goto done; } - data_pos += header_size; + retval = ar.read( header, header_size ); + if( retval != 0 ) { show_file_error( ad.namep, ar.e_msg() ); goto done; } if( !verify_ustar_chksum( header ) ) { if( block_is_zero( header, header_size ) ) // EOF { if( prev_extended && !cl_opts.permissive ) - { show_file_error( archive_namep, fv_msg1 ); retval = 2; } + { show_file_error( ad.namep, fv_msg1 ); retval = 2; } goto done; } - show_file_error( archive_namep, ( data_pos > header_size ) ? + show_file_error( ad.namep, ( ar.data_pos() > header_size ) ? bad_hdr_msg : posix_lz_msg ); retval = 2; goto done; @@ -186,67 +88,57 @@ int delete_members_lz( const Cl_options & cl_opts, if( typeflag == tf_global ) { if( prev_extended && !cl_opts.permissive ) - { show_file_error( archive_namep, fv_msg2 ); retval = 2; goto done; } + { show_file_error( ad.namep, fv_msg2 ); retval = 2; goto done; } Extended dummy; // global headers are parsed and ignored - retval = parse_records_lz( decoder, infd, file_pos, member_end, - cdata_size, data_pos, dummy, header, - rbuf, &msg, true ); + retval = ar.parse_records( dummy, header, rbuf, true ); if( retval == 0 ) continue; - show_file_error( archive_namep, gblrec_msg ); + show_file_error( ad.namep, gblrec_msg ); goto done; } if( typeflag == tf_extended ) { + const char * msg = 0; if( prev_extended && !cl_opts.permissive ) { msg = fv_msg3; retval = 2; } - else retval = parse_records_lz( decoder, infd, file_pos, member_end, - cdata_size, data_pos, extended, header, - rbuf, &msg, cl_opts.permissive ); + else retval = ar.parse_records( extended, header, rbuf, + cl_opts.permissive ); if( retval == 0 && !extended.crc_present() && cl_opts.missing_crc ) { msg = mcrc_msg; retval = 2; } if( retval == 0 ) { prev_extended = true; continue; } - show_file_error( archive_namep, extrec_msg ); + show_file_error( ad.namep, msg ? msg : extrec_msg ); goto done; } prev_extended = false; extended.fill_from_ustar( header ); // copy metadata from header - long long rest = round_up( extended.file_size() ); // size + padding - if( data_pos + rest >= mdata_end ) data_pos += rest; - else // skip tar member - if( ( retval = skip_member_lz( decoder, infd, file_pos, member_end, - cdata_size, data_pos, rest, &msg ) ) != 0 ) - goto done; + if( ( retval = ar.skip_member( extended ) ) != 0 ) goto done; // delete tar member if( !check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) ) { // verify that members match - if( member_begin != mdata_pos || data_pos != mdata_end ) + if( member_begin != ad.lzip_index.dblock( i ).pos() || !ar.at_member_end() ) { show_file_error( extended.path().c_str(), "Can't delete: not compressed individually." ); retval2 = 2; extended.reset(); continue; } if( !show_member_name( extended, header, 1, rbuf ) ) { retval = 1; goto done; } - const long long size = member_pos - istream_pos; + const long long size = ad.lzip_index.mblock( i ).pos() - istream_pos; if( size > 0 ) // move pending data each time a member is deleted { if( istream_pos == 0 ) { if( !safe_seek( outfd, size ) ) { retval = 1; goto done; } } - else if( !safe_seek( infd, istream_pos ) || - !copy_file( infd, outfd, size ) ) { retval = 1; goto done; } + else if( !safe_seek( ad.infd, istream_pos ) || + !copy_file( ad.infd, outfd, size ) ) { retval = 1; goto done; } } - istream_pos = member_end; + istream_pos = ad.lzip_index.mblock( i ).end(); // member end } extended.reset(); } } done: if( retval < retval2 ) retval = retval2; - if( LZ_decompress_close( decoder ) < 0 && !retval ) - { show_error( "LZ_decompress_close failed." ); retval = 1; } // tail copy keeps trailing data - return tail_copy( archive_namep, cl_opts.parser, name_pending, lzip_index, - istream_pos, infd, outfd, retval ); + return tail_copy( cl_opts.parser, ad, name_pending, istream_pos, outfd, retval ); } diff --git a/doc/tarlz.1 b/doc/tarlz.1 index cf0f659..e2ed3de 100644 --- a/doc/tarlz.1 +++ b/doc/tarlz.1 @@ -1,5 +1,5 @@ -.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH TARLZ "1" "July 2020" "tarlz 0.17" "User Commands" +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. +.TH TARLZ "1" "January 2021" "tarlz 0.19" "User Commands" .SH NAME tarlz \- creates tar archives with multimember lzip compression .SH SYNOPSIS @@ -7,13 +7,15 @@ tarlz \- creates tar archives with multimember lzip compression [\fI\,options\/\fR] [\fI\,files\/\fR] .SH DESCRIPTION Tarlz is a massively parallel (multi\-threaded) combined implementation of -the tar archiver and the lzip compressor. Tarlz creates, lists and extracts -archives in a simplified and safer variant of the POSIX pax format -compressed with lzip, keeping the alignment between tar members and lzip -members. The resulting multimember tar.lz archive is fully backward -compatible with standard tar tools like GNU tar, which treat it like any -other tar.lz archive. Tarlz can append files to the end of such compressed -archives. +the tar archiver and the lzip compressor. Tarlz uses the compression library +lzlib. +.PP +Tarlz creates, lists, and extracts archives in a simplified and safer +variant of the POSIX pax format compressed in lzip format, keeping the +alignment between tar members and lzip members. The resulting multimember +tar.lz archive is fully backward compatible with standard tar tools like GNU +tar, which treat it like any other tar.lz archive. Tarlz can append files to +the end of such compressed archives. .PP Keeping the alignment between tar members and lzip members has two advantages. It adds an indexed lzip layer on top of the tar archive, making @@ -126,6 +128,9 @@ exit with error status if missing extended CRC .TP \fB\-\-out\-slots=\fR<n> number of 1 MiB output packets buffered [64] +.TP +\fB\-\-check\-lib\fR +compare version of lzlib.h with liblz.{a,so} .PP Exit status: 0 for a normal exit, 1 for environmental problems (file not found, files differ, invalid flags, I/O errors, etc), 2 to indicate a @@ -136,8 +141,8 @@ Report bugs to lzip\-bug@nongnu.org .br Tarlz home page: http://www.nongnu.org/lzip/tarlz.html .SH COPYRIGHT -Copyright \(co 2020 Antonio Diaz Diaz. -Using lzlib 1.12\-rc1a +Copyright \(co 2021 Antonio Diaz Diaz. +Using lzlib 1.12 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. diff --git a/doc/tarlz.info b/doc/tarlz.info index e2c61db..d287697 100644 --- a/doc/tarlz.info +++ b/doc/tarlz.info @@ -11,7 +11,7 @@ File: tarlz.info, Node: Top, Next: Introduction, Up: (dir) Tarlz Manual ************ -This manual is for Tarlz (version 0.17, 30 July 2020). +This manual is for Tarlz (version 0.19, 8 January 2021). * Menu: @@ -28,10 +28,10 @@ This manual is for Tarlz (version 0.17, 30 July 2020). * Concept index:: Index of concepts - Copyright (C) 2013-2020 Antonio Diaz Diaz. + Copyright (C) 2013-2021 Antonio Diaz Diaz. - This manual is free documentation: you have unlimited permission to -copy, distribute, and modify it. + This manual is free documentation: you have unlimited permission to copy, +distribute, and modify it. File: tarlz.info, Node: Introduction, Next: Invoking tarlz, Prev: Top, Up: Top @@ -40,13 +40,15 @@ File: tarlz.info, Node: Introduction, Next: Invoking tarlz, Prev: Top, Up: T ************** Tarlz is a massively parallel (multi-threaded) combined implementation of -the tar archiver and the lzip compressor. Tarlz creates, lists and extracts -archives in a simplified and safer variant of the POSIX pax format -compressed with lzip, keeping the alignment between tar members and lzip -members. The resulting multimember tar.lz archive is fully backward -compatible with standard tar tools like GNU tar, which treat it like any -other tar.lz archive. Tarlz can append files to the end of such compressed -archives. +the tar archiver and the lzip compressor. Tarlz uses the compression +library lzlib. + + Tarlz creates tar archives using a simplified and safer variant of the +POSIX pax format compressed in lzip format, keeping the alignment between +tar members and lzip members. The resulting multimember tar.lz archive is +fully backward compatible with standard tar tools like GNU tar, which treat +it like any other tar.lz archive. Tarlz can append files to the end of such +compressed archives. Keeping the alignment between tar members and lzip members has two advantages. It adds an indexed lzip layer on top of the tar archive, making @@ -56,7 +58,7 @@ plzip may even double the amount of files lost for each lzip member damaged because it does not keep the members aligned. Tarlz can create tar archives with five levels of compression -granularity; per file (--no-solid), per block (--bsolid, default), per +granularity: per file (--no-solid), per block (--bsolid, default), per directory (--dsolid), appendable solid (--asolid), and solid (--solid). It can also create uncompressed tar archives. @@ -79,8 +81,8 @@ archive, but it has the following advantages: lziprecover can be used to recover some of the damaged members. * A multimember tar.lz archive is usually smaller than the corresponding - solidly compressed tar.gz archive, except when compressing files - smaller than about 32 KiB individually. + solidly compressed tar.gz archive, except when individually + compressing files smaller than about 32 KiB. Tarlz protects the extended records with a Cyclic Redundancy Check (CRC) in a way compatible with standard tar tools. *Note crc32::. @@ -240,8 +242,7 @@ to '-1 --solid' not used, tarlz tries to detect the number of processors in the system and use it as default value. 'tarlz --help' shows the system's default value. See the note about multi-threaded archive creation in the - option '-C' above. Multi-threaded extraction of files from an archive - is not yet implemented. *Note Multi-threaded decoding::. + option '-C' above. Note that the number of usable threads is limited during compression to ceil( uncompressed_size / data_size ) (*note Minimum archive sizes::), @@ -281,7 +282,8 @@ to '-1 --solid' '-v' '--verbose' - Verbosely list files processed. + Verbosely list files processed. Further -v's (up to 4) increase the + verbosity level. '-x' '--extract' @@ -376,7 +378,8 @@ to '-1 --solid' Don't delete partially extracted files. If a decompression error happens while extracting a file, keep the partial data extracted. Use this option to recover as much data as possible from each damaged - member. + member. It is recommended to run tarlz in single-threaded mode + (-threads=0) when using this option. '--missing-crc' Exit with error status 2 if the CRC of the extended records is missing. @@ -396,6 +399,15 @@ to '-1 --solid' more memory. Valid values range from 1 to 1024. The default value is 64. +'--check-lib' + Compare the version of lzlib used to compile tarlz with the version + actually being used and exit. Report any differences found. Exit with + error status 1 if differences are found. A mismatch may indicate that + lzlib is not correctly installed or that a different version of lzlib + has been installed after compiling tarlz. 'tarlz -v --check-lib' shows + the version of lzlib being used and the value of 'LZ_API_VERSION' (if + defined). *Note Library version: (lzlib)Library version. + Exit status: 0 for a normal exit, 1 for environmental problems (file not found, files differ, invalid flags, I/O errors, etc), 2 to indicate a @@ -546,6 +558,10 @@ space, equal-sign, and newline. the swapping of two bytes. + At verbosity level 1 or higher tarlz prints a diagnostic for each unknown +extended header keyword found in an archive, once per keyword. + + 4.2 Ustar header block ====================== @@ -770,11 +786,12 @@ interesting parts described here are those related to Multi-threaded processing. The structure of the part of tarlz performing Multi-threaded archive -creation is somewhat similar to that of plzip with the added complication of -the solidity levels. A grouper thread and several worker threads are -created, acting the main thread as muxer (multiplexer) thread. A "packet -courier" takes care of data transfers among threads and limits the maximum -number of data blocks (packets) being processed simultaneously. +creation is somewhat similar to that of plzip with the added complication +of the solidity levels. *Note Program design: (plzip)Program design. A +grouper thread and several worker threads are created, acting the main +thread as muxer (multiplexer) thread. A "packet courier" takes care of data +transfers among threads and limits the maximum number of data blocks +(packets) being processed simultaneously. The grouper traverses the directory tree, groups together the metadata of the files to be archived in each lzip member, and distributes them to the @@ -805,8 +822,7 @@ the archive. ,--------, | file |<---> data to/from each worker below | system | -`--------' - ,------------, +`--------' ,------------, ,-->| worker 0 |--, | `------------' | ,---------, | ,------------, | ,-------, ,--------, @@ -870,8 +886,7 @@ possible decoding it safely in parallel. Tarlz is able to automatically decode aligned and unaligned multimember tar.lz archives, keeping backwards compatibility. If tarlz finds a member misalignment during multi-threaded decoding, it switches to single-threaded -mode and continues decoding the archive. Currently only the options -'--diff' and '--list' are able to do multi-threaded decoding. +mode and continues decoding the archive. If the files in the archive are large, multi-threaded '--list' on a regular (seekable) tar.lz archive can be hundreds of times faster than @@ -886,7 +901,33 @@ example listing the Silesia corpus on a dual core machine: On the other hand, multi-threaded '--list' won't detect corruption in the tar member data because it only decodes the part of each lzip member -corresponding to the tar member header. +corresponding to the tar member header. This is another reason why the tar +headers must provide its own integrity checking. + + +7.1 Limitations of multi-threaded extraction +============================================ + +Multi-threaded extraction may produce different output than single-threaded +extraction in some cases: + + During multi-threaded extraction, several independent processes are +simultaneously reading the archive and creating files in the file system. +The archive is not read sequentially. As a consequence, any error or +weirdness in the archive (like a corrupt member or an EOF block in the +middle of the archive) won't be usually detected until part of the archive +beyond that point has been processed. + + If the archive contains two or more tar members with the same name, +single-threaded extraction extracts the members in the order they appear in +the archive and leaves in the file system the last version of the file. But +multi-threaded extraction may extract the members in any order and leave in +the file system any version of the file nondeterministically. It is +unspecified which of the tar members is extracted. + + If the same file is extracted through several paths (different member +names resolve to the same file in the file system), the result is undefined. +(Probably the resulting file will be mangled). File: tarlz.info, Node: Minimum archive sizes, Next: Examples, Prev: Multi-threaded decoding, Up: Top @@ -1028,22 +1069,22 @@ Concept index Tag Table: Node: Top223 -Node: Introduction1212 -Node: Invoking tarlz3982 -Ref: --data-size6193 -Ref: --bsolid14608 -Node: Portable character set18244 -Node: File format18887 -Ref: key_crc3223812 -Node: Amendments to pax format29271 -Ref: crc3229935 -Ref: flawed-compat31220 -Node: Program design33865 -Node: Multi-threaded decoding37756 -Node: Minimum archive sizes40492 -Node: Examples42630 -Node: Problems44345 -Node: Concept index44873 +Node: Introduction1214 +Node: Invoking tarlz4022 +Ref: --data-size6233 +Ref: --bsolid14593 +Node: Portable character set18852 +Node: File format19495 +Ref: key_crc3224420 +Node: Amendments to pax format30021 +Ref: crc3230685 +Ref: flawed-compat31970 +Node: Program design34615 +Node: Multi-threaded decoding38540 +Node: Minimum archive sizes42482 +Node: Examples44620 +Node: Problems46335 +Node: Concept index46863 End Tag Table diff --git a/doc/tarlz.texi b/doc/tarlz.texi index 00116ee..c6e7e89 100644 --- a/doc/tarlz.texi +++ b/doc/tarlz.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 30 July 2020 -@set VERSION 0.17 +@set UPDATED 8 January 2021 +@set VERSION 0.19 @dircategory Data Compression @direntry @@ -29,6 +29,7 @@ @contents @end ifnothtml +@ifnottex @node Top @top @@ -49,10 +50,11 @@ This manual is for Tarlz (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2013-2020 Antonio Diaz Diaz. +Copyright @copyright{} 2013-2021 Antonio Diaz Diaz. -This manual is free documentation: you have unlimited permission -to copy, distribute, and modify it. +This manual is free documentation: you have unlimited permission to copy, +distribute, and modify it. +@end ifnottex @node Introduction @@ -61,13 +63,15 @@ to copy, distribute, and modify it. @uref{http://www.nongnu.org/lzip/tarlz.html,,Tarlz} is a massively parallel (multi-threaded) combined implementation of the tar archiver and the -@uref{http://www.nongnu.org/lzip/lzip.html,,lzip} compressor. Tarlz creates, -lists and extracts archives in a simplified and safer variant of the POSIX -pax format compressed with lzip, keeping the alignment between tar members -and lzip members. The resulting multimember tar.lz archive is fully backward -compatible with standard tar tools like GNU tar, which treat it like any -other tar.lz archive. Tarlz can append files to the end of such compressed -archives. +@uref{http://www.nongnu.org/lzip/lzip.html,,lzip} compressor. Tarlz uses the +compression library @uref{http://www.nongnu.org/lzip/lzlib.html,,lzlib}. + +Tarlz creates tar archives using a simplified and safer variant of the POSIX +pax format compressed in lzip format, keeping the alignment between tar +members and lzip members. The resulting multimember tar.lz archive is fully +backward compatible with standard tar tools like GNU tar, which treat it +like any other tar.lz archive. Tarlz can append files to the end of such +compressed archives. Keeping the alignment between tar members and lzip members has two advantages. It adds an indexed lzip layer on top of the tar archive, making @@ -76,7 +80,7 @@ amount of data lost in case of corruption. Compressing a tar archive with plzip may even double the amount of files lost for each lzip member damaged because it does not keep the members aligned. -Tarlz can create tar archives with five levels of compression granularity; +Tarlz can create tar archives with five levels of compression granularity: per file (---no-solid), per block (---bsolid, default), per directory (---dsolid), appendable solid (---asolid), and solid (---solid). It can also create uncompressed tar archives. @@ -97,17 +101,17 @@ member), and unwanted members can be deleted from the archive. Just like an uncompressed tar archive. @item -It is a safe POSIX-style backup format. In case of corruption, -tarlz can extract all the undamaged members from the tar.lz -archive, skipping over the damaged members, just like the standard -(uncompressed) tar. Moreover, the option @samp{--keep-damaged} can be -used to recover as much data as possible from each damaged member, -and lziprecover can be used to recover some of the damaged members. +It is a safe POSIX-style backup format. In case of corruption, tarlz +can extract all the undamaged members from the tar.lz archive, +skipping over the damaged members, just like the standard +(uncompressed) tar. Moreover, the option @samp{--keep-damaged} can be used +to recover as much data as possible from each damaged member, and +lziprecover can be used to recover some of the damaged members. @item -A multimember tar.lz archive is usually smaller than the -corresponding solidly compressed tar.gz archive, except when -compressing files smaller than about 32 KiB individually. +A multimember tar.lz archive is usually smaller than the corresponding +solidly compressed tar.gz archive, except when individually +compressing files smaller than about 32 KiB. @end itemize Tarlz protects the extended records with a Cyclic Redundancy Check (CRC) in @@ -275,8 +279,6 @@ of 0 disables threads entirely. If this option is not used, tarlz tries to detect the number of processors in the system and use it as default value. @w{@samp{tarlz --help}} shows the system's default value. See the note about multi-threaded archive creation in the option @samp{-C} above. -Multi-threaded extraction of files from an archive is not yet implemented. -@xref{Multi-threaded decoding}. Note that the number of usable threads is limited during compression to @w{ceil( uncompressed_size / data_size )} (@pxref{Minimum archive sizes}), @@ -316,7 +318,8 @@ List the contents of an archive. If @var{files} are given, list only the @item -v @itemx --verbose -Verbosely list files processed. +Verbosely list files processed. Further -v's (up to 4) increase the +verbosity level. @item -x @itemx --extract @@ -409,8 +412,9 @@ decimal numeric group ID. @item --keep-damaged Don't delete partially extracted files. If a decompression error happens -while extracting a file, keep the partial data extracted. Use this -option to recover as much data as possible from each damaged member. +while extracting a file, keep the partial data extracted. Use this option to +recover as much data as possible from each damaged member. It is recommended +to run tarlz in single-threaded mode (--threads=0) when using this option. @item --missing-crc Exit with error status 2 if the CRC of the extended records is missing. @@ -429,6 +433,19 @@ number of packets may increase compression speed if the files being archived are larger than @w{64 MiB} compressed, but requires more memory. Valid values range from 1 to 1024. The default value is 64. +@item --check-lib +Compare the +@uref{http://www.nongnu.org/lzip/manual/lzlib_manual.html#Library-version,,version of lzlib} +used to compile tarlz with the version actually being used and exit. Report +any differences found. Exit with error status 1 if differences are found. A +mismatch may indicate that lzlib is not correctly installed or that a +different version of lzlib has been installed after compiling tarlz. +@w{@samp{tarlz -v --check-lib}} shows the version of lzlib being used and +the value of @samp{LZ_API_VERSION} (if defined). +@ifnothtml +@xref{Library version,,,lzlib}. +@end ifnothtml + @ignore @item --permissive Allow some violations of the archive format, like consecutive extended @@ -613,8 +630,12 @@ protected by the CRC to guarante that corruption is always detected (except in case of CRC collision). A CRC was chosen because a checksum is too weak for a potentially large list of variable sized records. A checksum can't detect simple errors like the swapping of two bytes. + @end table +At verbosity level 1 or higher tarlz prints a diagnostic for each unknown +extended header keyword found in an archive, once per keyword. + @sp 1 @section Ustar header block @@ -839,11 +860,16 @@ or less similar to any other tar and won't be described here. The interesting parts described here are those related to Multi-threaded processing. The structure of the part of tarlz performing Multi-threaded archive -creation is somewhat similar to that of plzip with the added complication of -the solidity levels. A grouper thread and several worker threads are -created, acting the main thread as muxer (multiplexer) thread. A "packet -courier" takes care of data transfers among threads and limits the maximum -number of data blocks (packets) being processed simultaneously. +creation is somewhat similar to that of +@uref{http://www.nongnu.org/lzip/plzip.html#Program-design,,plzip} with the +added complication of the solidity levels. +@ifnothtml +@xref{Program design,,,plzip}. +@end ifnothtml +A grouper thread and several worker threads are created, acting the main +thread as muxer (multiplexer) thread. A "packet courier" takes care of data +transfers among threads and limits the maximum number of data blocks +(packets) being processed simultaneously. The grouper traverses the directory tree, groups together the metadata of the files to be archived in each lzip member, and distributes them to the @@ -876,8 +902,7 @@ access files in the file system either to read them (diff) or write them ,--------, | file |<---> data to/from each worker below | system | -`--------' - ,------------, +`--------' ,------------, ,-->| worker 0 |--, | `------------' | ,---------, | ,------------, | ,-------, ,--------, @@ -941,8 +966,7 @@ decoding it safely in parallel. Tarlz is able to automatically decode aligned and unaligned multimember tar.lz archives, keeping backwards compatibility. If tarlz finds a member misalignment during multi-threaded decoding, it switches to single-threaded -mode and continues decoding the archive. Currently only the options -@samp{--diff} and @samp{--list} are able to do multi-threaded decoding. +mode and continues decoding the archive. If the files in the archive are large, multi-threaded @samp{--list} on a regular (seekable) tar.lz archive can be hundreds of times faster than @@ -959,7 +983,32 @@ time tarlz -tf silesia.tar.lz (0.020s) On the other hand, multi-threaded @samp{--list} won't detect corruption in the tar member data because it only decodes the part of each lzip member -corresponding to the tar member header. +corresponding to the tar member header. This is another reason why the tar +headers must provide its own integrity checking. + +@sp 1 +@section Limitations of multi-threaded extraction + +Multi-threaded extraction may produce different output than single-threaded +extraction in some cases: + +During multi-threaded extraction, several independent processes are +simultaneously reading the archive and creating files in the file system. The +archive is not read sequentially. As a consequence, any error or weirdness +in the archive (like a corrupt member or an EOF block in the middle of the +archive) won't be usually detected until part of the archive beyond that +point has been processed. + +If the archive contains two or more tar members with the same name, +single-threaded extraction extracts the members in the order they appear in +the archive and leaves in the file system the last version of the file. But +multi-threaded extraction may extract the members in any order and leave in +the file system any version of the file nondeterministically. It is +unspecified which of the tar members is extracted. + +If the same file is extracted through several paths (different member names +resolve to the same file in the file system), the result is undefined. +(Probably the resulting file will be mangled). @node Minimum archive sizes @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2020 Antonio Diaz Diaz. + Copyright (C) 2013-2021 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -23,7 +23,7 @@ #include <string> #include <vector> #include <fnmatch.h> -#include <pthread.h> +#include <pthread.h> // for tarlz.h #include <stdint.h> #include "tarlz.h" @@ -39,6 +39,8 @@ std::vector< std::string > patterns; // list of patterns void Exclude::add_pattern( const std::string & arg ) { patterns.push_back( arg ); } +void Exclude::clear() { patterns.clear(); } + bool Exclude::excluded( const char * const filename ) { diff --git a/extended.cc b/extended.cc index d03494f..1057142 100644 --- a/extended.cc +++ b/extended.cc @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2020 Antonio Diaz Diaz. + Copyright (C) 2013-2021 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,16 +24,17 @@ #include <cstring> #include <string> #include <vector> -#include <pthread.h> +#include <pthread.h> // for tarlz.h #include <stdint.h> #include "tarlz.h" -namespace { - const CRC32 crc32c( true ); + +namespace { + unsigned decimal_digits( unsigned long long value ) { unsigned digits = 1; @@ -132,6 +133,7 @@ bool print_record( char * const buf, const int size, } // end namespace +std::vector< std::string > Extended::unknown_keywords; const std::string Extended::crc_record( "22 GNU.crc32=00000000\n" ); void Extended::calculate_sizes() const @@ -147,6 +149,22 @@ void Extended::calculate_sizes() const } +// print a diagnostic for each unknown keyword once per keyword +void Extended::unknown_keyword( const char * const buf, + const unsigned long long size ) const + { + unsigned long long eq_pos = 0; // position of '=' in buf + while( eq_pos < size && buf[eq_pos] != '=' ) ++eq_pos; + const std::string keyword( buf, eq_pos ); + for( unsigned i = 0; i < unknown_keywords.size(); ++i ) + if( keyword == unknown_keywords[i] ) return; + unknown_keywords.push_back( keyword ); + std::string msg( "Ignoring unknown extended header keyword '" ); + msg += keyword; msg += '\''; + show_error( msg.c_str() ); + } + + // Returns the extended block size, or -1 if error. long long Extended::format_block( Resizable_buffer & rbuf ) const { @@ -239,6 +257,8 @@ bool Extended::parse( const char * const buf, const unsigned long long edsize, return false; } } + else if( ( rest < 8 || std::memcmp( tail, "comment=", 8 ) != 0 ) && + verbosity >= 1 ) unknown_keyword( tail, rest ); pos += rsize; } return true; diff --git a/lzip_index.cc b/lzip_index.cc index 015bef5..8df379f 100644 --- a/lzip_index.cc +++ b/lzip_index.cc @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2020 Antonio Diaz Diaz. + Copyright (C) 2013-2021 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,7 +24,7 @@ #include <cstring> #include <string> #include <vector> -#include <pthread.h> +#include <pthread.h> // for tarlz.h #include <stdint.h> #include <unistd.h> diff --git a/lzip_index.h b/lzip_index.h index 77d2a00..f47792f 100644 --- a/lzip_index.h +++ b/lzip_index.h @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2020 Antonio Diaz Diaz. + Copyright (C) 2013-2021 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2020 Antonio Diaz Diaz. + Copyright (C) 2013-2021 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -60,20 +60,21 @@ int verbosity = 0; namespace { const char * const program_name = "tarlz"; -const char * const program_year = "2020"; +const char * const program_year = "2021"; const char * invocation_name = program_name; // default value void show_help( const long num_online ) { std::printf( "Tarlz is a massively parallel (multi-threaded) combined implementation of\n" - "the tar archiver and the lzip compressor. Tarlz creates, lists and extracts\n" - "archives in a simplified and safer variant of the POSIX pax format\n" - "compressed with lzip, keeping the alignment between tar members and lzip\n" - "members. The resulting multimember tar.lz archive is fully backward\n" - "compatible with standard tar tools like GNU tar, which treat it like any\n" - "other tar.lz archive. Tarlz can append files to the end of such compressed\n" - "archives.\n" + "the tar archiver and the lzip compressor. Tarlz uses the compression library\n" + "lzlib.\n" + "\nTarlz creates, lists, and extracts archives in a simplified and safer\n" + "variant of the POSIX pax format compressed in lzip format, keeping the\n" + "alignment between tar members and lzip members. The resulting multimember\n" + "tar.lz archive is fully backward compatible with standard tar tools like GNU\n" + "tar, which treat it like any other tar.lz archive. Tarlz can append files to\n" + "the end of such compressed archives.\n" "\nKeeping the alignment between tar members and lzip members has two\n" "advantages. It adds an indexed lzip layer on top of the tar archive, making\n" "it possible to decode the archive safely in parallel. It also minimizes the\n" @@ -119,6 +120,7 @@ void show_help( const long num_online ) " --keep-damaged don't delete partially extracted files\n" " --missing-crc exit with error status if missing extended CRC\n" " --out-slots=<n> number of 1 MiB output packets buffered [64]\n" + " --check-lib compare version of lzlib.h with liblz.{a,so}\n" /* " --permissive allow repeated extended headers and records\n"*/, num_online ); if( verbosity >= 1 ) @@ -145,6 +147,37 @@ void show_version() } +int check_lib() + { + bool warning = false; + if( std::strcmp( LZ_version_string, LZ_version() ) != 0 ) + { warning = true; + if( verbosity >= 0 ) + std::printf( "warning: LZ_version_string != LZ_version() (%s vs %s)\n", + LZ_version_string, LZ_version() ); } +#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012 + if( LZ_API_VERSION != LZ_api_version() ) + { warning = true; + if( verbosity >= 0 ) + std::printf( "warning: LZ_API_VERSION != LZ_api_version() (%u vs %u)\n", + LZ_API_VERSION, LZ_api_version() ); } +#endif + if( verbosity >= 1 ) + { + std::printf( "Using lzlib %s\n", LZ_version() ); +#if !defined LZ_API_VERSION + std::fputs( "LZ_API_VERSION is not defined.\n", stdout ); +#elif LZ_API_VERSION >= 1012 + std::printf( "Using LZ_API_VERSION = %u\n", LZ_api_version() ); +#else + std::printf( "Compiled with LZ_API_VERSION = %u. " + "Using an unknown LZ_API_VERSION\n", LZ_API_VERSION ); +#endif + } + return warning; + } + + unsigned long long getnum( const char * const ptr, const unsigned long long llimit, const unsigned long long ulimit ) @@ -281,15 +314,21 @@ int open_instream( const std::string & name ) } -int open_outstream( const std::string & name, const bool create ) +int open_outstream( const std::string & name, const bool create, + Resizable_buffer * const rbufp ) { const int flags = (create ? O_CREAT | O_WRONLY | O_TRUNC : O_RDWR) | O_BINARY; const mode_t outfd_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; const int outfd = open( name.c_str(), flags, outfd_mode ); if( outfd < 0 ) - show_file_error( name.c_str(), create ? - "Can't create file" : "Error opening file", errno ); + { + const char * msg = create ? "Can't create file" : "Error opening file"; + if( !rbufp ) show_file_error( name.c_str(), msg, errno ); + else + snprintf( (*rbufp)(), (*rbufp).size(), "%s: %s: %s\n", name.c_str(), + msg, std::strerror( errno ) ); + } return outfd; } @@ -354,17 +393,9 @@ int main( const int argc, const char * const argv[] ) { if( argc > 0 ) invocation_name = argv[0]; -#if !defined LZ_API_VERSION || LZ_API_VERSION < 1 // compile-time test -#error "lzlib 1.8 or newer needed." -#elif LZ_API_VERSION >= 2 - if( LZ_api_version() < 1 ) // runtime test - { show_error( "Wrong library version. At least lzlib 1.8 is required." ); - return 1; } -#endif - - enum { opt_ano = 256, opt_aso, opt_bso, opt_crc, opt_dbg, opt_del, opt_dso, - opt_exc, opt_grp, opt_hlp, opt_id, opt_kd, opt_mti, opt_nso, opt_out, - opt_own, opt_per, opt_sol, opt_un }; + enum { opt_ano = 256, opt_aso, opt_bso, opt_chk, opt_crc, opt_dbg, opt_del, + opt_dso, opt_exc, opt_grp, opt_hlp, opt_id, opt_kd, opt_mti, opt_nso, + opt_out, opt_own, opt_per, opt_sol, opt_un }; const Arg_parser::Option options[] = { { '0', 0, Arg_parser::no }, @@ -396,6 +427,7 @@ int main( const int argc, const char * const argv[] ) { opt_ano, "anonymous", Arg_parser::no }, { opt_aso, "asolid", Arg_parser::no }, { opt_bso, "bsolid", Arg_parser::no }, + { opt_chk, "check-lib", Arg_parser::no }, { opt_dbg, "debug", Arg_parser::yes }, { opt_del, "delete", Arg_parser::no }, { opt_dso, "dsolid", Arg_parser::no }, @@ -462,6 +494,7 @@ int main( const int argc, const char * const argv[] ) case opt_aso: cl_opts.solidity = asolid; break; case opt_bso: cl_opts.solidity = bsolid; break; case opt_crc: cl_opts.missing_crc = true; break; + case opt_chk: return check_lib(); case opt_dbg: cl_opts.debug_level = getnum( arg, 0, 3 ); break; case opt_del: set_mode( cl_opts.program_mode, m_delete ); break; case opt_dso: cl_opts.solidity = dsolid; break; @@ -481,6 +514,10 @@ int main( const int argc, const char * const argv[] ) } } // end process options +#if !defined LZ_API_VERSION || LZ_API_VERSION < 1 // compile-time test +#error "lzlib 1.8 or newer needed." +#endif + #if defined(__MSVCRT__) || defined(__OS2__) setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2020 Antonio Diaz Diaz. + Copyright (C) 2013-2021 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,6 +15,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <sys/types.h> + #define max_file_size ( LLONG_MAX - header_size ) enum { header_size = 512 }; typedef uint8_t Tar_header[header_size]; @@ -71,6 +73,14 @@ inline bool dotdot_at_i( const char * const filename, const int i ) } +inline bool contains_dotdot( const char * const filename ) + { + for( int i = 0; filename[i]; ++i ) + if( dotdot_at_i( filename, i ) ) return true; + return false; + } + + class Resizable_buffer { char * p; @@ -103,6 +113,7 @@ public: class Extended // stores metadata from/for extended records { + static std::vector< std::string > unknown_keywords; // already diagnosed std::string linkpath_; // these are the real metadata std::string path_; long long file_size_; // >= 0 && <= max_file_size @@ -119,6 +130,8 @@ class Extended // stores metadata from/for extended records mutable bool crc_present_; void calculate_sizes() const; + void unknown_keyword( const char * const buf, + const unsigned long long size ) const; public: static const std::string crc_record; @@ -189,6 +202,14 @@ public: crc = c; } + uint32_t compute_crc( const uint8_t * const buffer, const int size ) const + { + uint32_t crc = 0xFFFFFFFFU; + for( int i = 0; i < size; ++i ) + crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 ); + return crc ^ 0xFFFFFFFFU; + } + // Calculates the crc of size bytes except a window of 8 bytes at pos uint32_t windowed_crc( const uint8_t * const buffer, const int pos, const int size ) const @@ -380,6 +401,8 @@ bool show_member_name( const Extended & extended, const Tar_header header, bool check_skip_filename( const Cl_options & cl_opts, std::vector< char > & name_pending, const char * const filename ); +mode_t get_umask(); +bool make_path( const std::string & name ); // defined in create.cc bool copy_file( const int infd, const int outfd, const long long max_size = -1 ); @@ -406,29 +429,7 @@ int encode_lz( const Cl_options & cl_opts, const char * const archive_namep, const int dictionary_size, const int match_len_limit, const int outfd ); -// defined in delete.cc -class Lzip_index; -bool safe_seek( const int fd, const long long pos ); -int tail_copy( const char * const archive_namep, const Arg_parser & parser, - std::vector< char > & name_pending, - const Lzip_index & lzip_index, const long long istream_pos, - const int infd, const int outfd, int retval ); -int delete_members( const Cl_options & cl_opts ); - -// defined in delete_lz.cc -int delete_members_lz( const Cl_options & cl_opts, - const char * const archive_namep, - std::vector< char > & name_pending, - const Lzip_index & lzip_index, - const int infd, const int outfd ); - -// defined in exclude.cc -namespace Exclude { -void add_pattern( const std::string & arg ); -bool excluded( const char * const filename ); -} // end namespace Exclude - -// defined in extract.cc +// defined in decode.cc bool compare_file_type( std::string & estr, std::string & ostr, const Cl_options & cl_opts, const Extended & extended, const Tar_header header ); @@ -439,12 +440,32 @@ bool compare_file_contents( std::string & estr, std::string & ostr, int decode( const Cl_options & cl_opts ); // defined in decode_lz.cc -int preadblock( const int fd, uint8_t * const buf, const int size, - const long long pos ); struct Archive_descriptor; int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, std::vector< char > & name_pending ); +// defined in delete.cc +bool safe_seek( const int fd, const long long pos ); +int tail_copy( const Arg_parser & parser, const Archive_descriptor & ad, + std::vector< char > & name_pending, const long long istream_pos, + const int outfd, int retval ); +int delete_members( const Cl_options & cl_opts ); + +// defined in delete_lz.cc +int delete_members_lz( const Cl_options & cl_opts, + const Archive_descriptor & ad, + std::vector< char > & name_pending, const int outfd ); + +// defined in exclude.cc +namespace Exclude { +void add_pattern( const std::string & arg ); +void clear(); +bool excluded( const char * const filename ); +} // end namespace Exclude + +// defined in extended.cc +extern const CRC32 crc32c; + // defined in lzip_index.cc int seek_read( const int fd, uint8_t * const buf, const int size, const long long pos ); @@ -455,7 +476,8 @@ struct stat; int hstat( const char * const filename, struct stat * const st, const bool dereference ); int open_instream( const std::string & name ); -int open_outstream( const std::string & name, const bool create = true ); +int open_outstream( const std::string & name, const bool create = true, + Resizable_buffer * const rbufp = 0 ); void cleanup_and_fail( const int retval = 1 ); // terminate the program void show_error( const char * const msg, const int errcode = 0, const bool help = false ); diff --git a/testsuite/check.sh b/testsuite/check.sh index 74e8549..13ef132 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Tarlz - Archiver with multimember lzip compression -# Copyright (C) 2013-2020 Antonio Diaz Diaz. +# Copyright (C) 2013-2021 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute, and modify it. @@ -39,8 +39,6 @@ test3_lz="${testdir}"/test3.tar.lz test3dir="${testdir}"/test3_dir.tar test3dir_lz="${testdir}"/test3_dir.tar.lz test3dot_lz="${testdir}"/test3_dot.tar.lz -tarint1_lz="${testdir}"/tar_in_tlz1.tar.lz -tarint2_lz="${testdir}"/tar_in_tlz2.tar.lz t155="${testdir}"/t155.tar t155_lz="${testdir}"/t155.tar.lz tlzit1="${testdir}"/tlz_in_tar1.tar @@ -115,6 +113,7 @@ cyg_symlink() { [ ${lwarnc} = 0 ] && # Note that multi-threaded --list succeeds with test_bad2.txt.tar.lz and # test3_bad3.tar.lz because their headers are intact. +"${TARLZ}" --check-lib # just print warning printf "testing tarlz-%s..." "$2" "${TARLZ}" -q -tf "${in}" @@ -181,25 +180,31 @@ rm -f empty.tar.lz empty.tlz || framework_failure "${TARLZ}" --group=invalid_goup_name -tf "${test3_lz}" 2> /dev/null [ $? = 1 ] || test_failed $LINENO +printf "\ntesting --list and --extract..." + # test --list and --extract "${TARLZ}" -tf "${eof_lz}" --missing-crc || test_failed $LINENO "${TARLZ}" -xf "${eof_lz}" --missing-crc || test_failed $LINENO -"${TARLZ}" -tf "${in_tar_lz}" --missing-crc > /dev/null || test_failed $LINENO -"${TARLZ}" -xf "${in_tar_lz}" --missing-crc || test_failed $LINENO -cmp "${in}" test.txt || test_failed $LINENO -rm -f test.txt || framework_failure "${TARLZ}" -C nx_dir -tf "${in_tar}" > /dev/null || test_failed $LINENO "${TARLZ}" -xf "${in_tar}" --missing-crc || test_failed $LINENO cmp "${in}" test.txt || test_failed $LINENO rm -f test.txt || framework_failure +"${TARLZ}" -tf "${in_tar_lz}" --missing-crc > /dev/null || test_failed $LINENO +for i in 0 2 6 ; do + "${TARLZ}" -n$i -xf "${in_tar_lz}" --missing-crc || test_failed $LINENO $i + cmp "${in}" test.txt || test_failed $LINENO $i + rm -f test.txt || framework_failure +done # test3 reference files for -t and -tv (list3, vlist3) "${TARLZ}" -tf "${test3}" > list3 || test_failed $LINENO "${TARLZ}" -tvf "${test3}" > vlist3 || test_failed $LINENO -"${TARLZ}" -tf "${test3_lz}" > out || test_failed $LINENO -diff -u list3 out || test_failed $LINENO -"${TARLZ}" -tvf "${test3_lz}" > out || test_failed $LINENO -diff -u vlist3 out || test_failed $LINENO +for i in 0 2 6 ; do + "${TARLZ}" -n$i -tf "${test3_lz}" > out || test_failed $LINENO $i + diff -u list3 out || test_failed $LINENO $i + "${TARLZ}" -n$i -tvf "${test3_lz}" > out || test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i +done rm -f out || framework_failure # test3 reference files for cmp @@ -209,41 +214,43 @@ cat "${testdir}"/rbaz > cbaz || framework_failure # test --list and --extract test3 rm -f foo bar baz || framework_failure -"${TARLZ}" -xf "${test3_lz}" --missing-crc || test_failed $LINENO -cmp cfoo foo || test_failed $LINENO -cmp cbar bar || test_failed $LINENO -cmp cbaz baz || test_failed $LINENO -rm -f foo bar baz || framework_failure -"${TARLZ}" -tvf "${test3_lz}" ./foo ./bar ./baz > out 2> /dev/null || - test_failed $LINENO -diff -u vlist3 out || test_failed $LINENO -rm -f out || framework_failure -"${TARLZ}" -q -xf "${test3_lz}" ./foo ./bar ./baz || test_failed $LINENO -cmp cfoo foo || test_failed $LINENO -cmp cbar bar || test_failed $LINENO -cmp cbaz baz || test_failed $LINENO -rm -f foo bar baz || framework_failure -"${TARLZ}" -xf "${test3_lz}" foo/ bar// baz/// || test_failed $LINENO -cmp cfoo foo || test_failed $LINENO -cmp cbar bar || test_failed $LINENO -cmp cbaz baz || test_failed $LINENO -rm -f foo bar baz || framework_failure "${TARLZ}" -xf "${test3}" --missing-crc || test_failed $LINENO cmp cfoo foo || test_failed $LINENO cmp cbar bar || test_failed $LINENO cmp cbaz baz || test_failed $LINENO rm -f foo bar baz || framework_failure -"${TARLZ}" -q -xf "${test3dot_lz}" --missing-crc || test_failed $LINENO -cmp cfoo foo || test_failed $LINENO -cmp cbar bar || test_failed $LINENO -cmp cbaz baz || test_failed $LINENO -rm -f foo bar baz || framework_failure -"${TARLZ}" -q -tf "${test3dot_lz}" foo bar baz || test_failed $LINENO -"${TARLZ}" -q -xf "${test3dot_lz}" foo bar baz || test_failed $LINENO -cmp cfoo foo || test_failed $LINENO -cmp cbar bar || test_failed $LINENO -cmp cbaz baz || test_failed $LINENO -rm -f foo bar baz || framework_failure +for i in 0 2 6 ; do + "${TARLZ}" -n$i -xf "${test3_lz}" --missing-crc || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -n$i -tvf "${test3_lz}" ./foo ./bar ./baz > out 2> /dev/null || + test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i + rm -f out || framework_failure + "${TARLZ}" -q -n$i -xf "${test3_lz}" ./foo ./bar ./baz || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -n$i -xf "${test3_lz}" foo/ bar// baz/// || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dot_lz}" --missing-crc || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n$i -tf "${test3dot_lz}" foo bar baz || test_failed $LINENO $i + "${TARLZ}" -q -n$i -xf "${test3dot_lz}" foo bar baz || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure +done for i in "${test3dir}" "${test3dir_lz}" ; do "${TARLZ}" -q -tf "$i" --missing-crc || test_failed $LINENO "$i" @@ -272,34 +279,37 @@ done cmp cbar bar || test_failed $LINENO [ ! -e baz ] || test_failed $LINENO rm -f foo bar baz || framework_failure -"${TARLZ}" -xf "${test3_lz}" --exclude=bar || test_failed $LINENO -cmp cfoo foo || test_failed $LINENO -[ ! -e bar ] || test_failed $LINENO -cmp cbaz baz || test_failed $LINENO -rm -f foo bar baz || framework_failure -"${TARLZ}" -q -xf "${test3dir_lz}" --exclude='?ar' || test_failed $LINENO -cmp cfoo dir/foo || test_failed $LINENO -[ ! -e dir/bar ] || test_failed $LINENO -cmp cbaz dir/baz || test_failed $LINENO -rm -rf dir || framework_failure -"${TARLZ}" -q -xf "${test3dir_lz}" --exclude=dir/bar || test_failed $LINENO -cmp cfoo dir/foo || test_failed $LINENO -[ ! -e dir/bar ] || test_failed $LINENO -cmp cbaz dir/baz || test_failed $LINENO -rm -rf dir || framework_failure -"${TARLZ}" -q -xf "${test3dir_lz}" --exclude=dir || test_failed $LINENO -[ ! -e dir ] || test_failed $LINENO -rm -rf dir || framework_failure -"${TARLZ}" -q -xf "${test3dir_lz}" --exclude='dir/*' || test_failed $LINENO -[ ! -e dir ] || test_failed $LINENO -rm -rf dir || framework_failure -"${TARLZ}" -q -xf "${test3dir_lz}" --exclude='[bf][ao][orz]' || - test_failed $LINENO -[ ! -e dir ] || test_failed $LINENO -rm -rf dir || framework_failure -"${TARLZ}" -q -xf "${test3dir_lz}" --exclude='*o' dir/foo || test_failed $LINENO -[ ! -e dir ] || test_failed $LINENO -rm -rf dir || framework_failure +for i in 0 2 6 ; do + "${TARLZ}" -n$i -xf "${test3_lz}" --exclude=bar || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + [ ! -e bar ] || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude='?ar' || test_failed $LINENO $i + cmp cfoo dir/foo || test_failed $LINENO $i + [ ! -e dir/bar ] || test_failed $LINENO $i + cmp cbaz dir/baz || test_failed $LINENO $i + rm -rf dir || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude=dir/bar || test_failed $LINENO $i + cmp cfoo dir/foo || test_failed $LINENO $i + [ ! -e dir/bar ] || test_failed $LINENO $i + cmp cbaz dir/baz || test_failed $LINENO $i + rm -rf dir || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude=dir || test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i + rm -rf dir || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude='dir/*' || test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i + rm -rf dir || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude='[bf][ao][orz]' || + test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i + rm -rf dir || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude='*o' dir/foo || + test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i + rm -rf dir || framework_failure +done # test --list and --extract eof "${TARLZ}" -tvf "${testdir}"/test3_eof1.tar > out 2> /dev/null @@ -365,12 +375,6 @@ for i in 0 2 6 ; do cmp cbar bar || test_failed $LINENO $i cmp cbaz baz || test_failed $LINENO $i rm -f foo bar baz || framework_failure - "${TARLZ}" -n$i -xf "${testdir}"/test3_eof3.tar.lz || - test_failed $LINENO $i - cmp cfoo foo || test_failed $LINENO $i - [ ! -e bar ] || test_failed $LINENO $i - [ ! -e baz ] || test_failed $LINENO $i - rm -f foo bar baz || framework_failure "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eof4.tar.lz [ $? = 2 ] || test_failed $LINENO $i cmp cfoo foo || test_failed $LINENO $i @@ -384,9 +388,14 @@ for i in 0 2 6 ; do cmp cbaz baz || test_failed $LINENO $i rm -f foo bar baz || framework_failure done +"${TARLZ}" -n0 -xf "${testdir}"/test3_eof3.tar.lz || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO $i +[ ! -e bar ] || test_failed $LINENO $i +[ ! -e baz ] || test_failed $LINENO $i +rm -f foo bar baz || framework_failure # test --list and --extract tar in tar.lz -for i in "${tarint1_lz}" "${tarint2_lz}" ; do +for i in "${testdir}"/tar_in_tlz1.tar.lz "${testdir}"/tar_in_tlz2.tar.lz ; do for j in 0 2 6 ; do "${TARLZ}" -tf "$i" -n$j > out$j || test_failed $LINENO "$i $j" @@ -400,10 +409,12 @@ for i in "${tarint1_lz}" "${tarint2_lz}" ; do diff -u outv0 outv6 || test_failed $LINENO "$i" diff -u outv2 outv6 || test_failed $LINENO "$i" rm -f out0 out2 out6 outv0 outv2 outv6 || framework_failure - "${TARLZ}" -xf "$i" || test_failed $LINENO "$i" - cmp "${in_tar}" test.txt.tar || test_failed $LINENO "$i" - cmp "${test3}" test3.tar || test_failed $LINENO "$i" - rm -f test.txt.tar test3.tar || framework_failure + for j in 0 2 6 ; do + "${TARLZ}" -xf "$i" -n$j || test_failed $LINENO "$i $j" + cmp "${in_tar}" test.txt.tar || test_failed $LINENO "$i $j" + cmp "${test3}" test3.tar || test_failed $LINENO "$i $j" + rm -f test.txt.tar test3.tar || framework_failure + done done # test --list and --extract with global headers uncompressed @@ -444,6 +455,8 @@ for i in em1 em2 em3 em4 em5 em6 gh1 gh2 gh3 gh4 gh5 gh6 sm1 sm2 sm3 sm4 ; do done rm -f list3 vlist3 || framework_failure +printf "\ntesting --concatenate..." + # test --concatenate compressed cat "${in}" > out.tar.lz || framework_failure # invalid tar.lz "${TARLZ}" -Aqf out.tar.lz "${test3_lz}" @@ -532,6 +545,8 @@ touch aout.tar || framework_failure # --exclude cmp out.tar aout.tar || test_failed $LINENO rm -f out.tar aout.tar || framework_failure +printf "\ntesting --create..." + # test --create cat "${in}" > test.txt || framework_failure "${TARLZ}" -0 -cf out.tar.lz test.txt || test_failed $LINENO @@ -645,14 +660,16 @@ cmp cfoo foo || test_failed $LINENO [ ! -e baz ] || test_failed $LINENO rm -f out.tar foo bar baz || framework_failure +printf "\ntesting --diff..." + # test --diff "${TARLZ}" -xf "${test3_lz}" || test_failed $LINENO "${TARLZ}" --uncompressed -cf out.tar foo || test_failed $LINENO "${TARLZ}" --uncompressed -cf aout.tar foo --anonymous || test_failed $LINENO if cmp out.tar aout.tar > /dev/null ; then - printf "\nwarning: '--diff' test can't be run as root." + printf "\nwarning: '--diff' test can't be run as root.\n" else - for i in 0 2 ; do + for i in 0 2 6 ; do "${TARLZ}" -n$i -xf "${test3_lz}" || test_failed $LINENO "${TARLZ}" -n$i -df "${test3_lz}" > out$i [ $? = 1 ] || test_failed $LINENO $i @@ -678,6 +695,8 @@ fi cmp out0 out2 || test_failed $LINENO rm -f out0 out2 out.tar aout.tar foo bar baz || framework_failure +printf "\ntesting --delete..." + # test --delete for e in "" .lz ; do "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e @@ -774,13 +793,15 @@ for i in 1 2 3 4 ; do rm -f out.tar || framework_failure done +printf "\ntesting --dereference..." + # test --dereference touch dummy_file || framework_failure if ln dummy_file dummy_link 2> /dev/null && ln -s dummy_file dummy_slink 2> /dev/null ; then ln_works=yes else - printf "\nwarning: skipping link test: 'ln' does not work on your system." + printf "\nwarning: skipping link test: 'ln' does not work on your system.\n" fi rm -f dummy_slink dummy_link dummy_file || framework_failure # @@ -814,6 +835,8 @@ if [ "${ln_works}" = yes ] ; then done fi +printf "\ntesting --append..." + # test --append compressed cat cfoo > foo || framework_failure cat cbar > bar || framework_failure @@ -915,6 +938,8 @@ for i in --asolid --bsolid --dsolid -0 ; do done rm -f foo bar baz || framework_failure +printf "\ntesting dirs and links..." + # test -c -d -x on directories and links mkdir dir1 || framework_failure "${TARLZ}" -0 -cf out.tar.lz dir1 || test_failed $LINENO @@ -1089,6 +1114,8 @@ if [ "${ln_works}" = yes ] ; then rm -rf dir1 || framework_failure fi +printf "\ntesting --keep-damaged..." + # test --extract and --keep-damaged compressed rm -f test.txt || framework_failure for i in "${inbad1}" "${inbad2}" ; do @@ -1096,7 +1123,7 @@ for i in "${inbad1}" "${inbad2}" ; do [ $? = 2 ] || test_failed $LINENO "$i" [ ! -e test.txt ] || test_failed $LINENO "$i" rm -f test.txt || framework_failure - "${TARLZ}" -q -xf "${i}.tar.lz" --keep-damaged + "${TARLZ}" -q -n0 -xf "${i}.tar.lz" --keep-damaged [ $? = 2 ] || test_failed $LINENO "$i" [ -e test.txt ] || test_failed $LINENO "$i" cmp "$i" test.txt 2> /dev/null || lzlib_1_11 "$LINENO $i" @@ -1104,55 +1131,55 @@ for i in "${inbad1}" "${inbad2}" ; do done # rm -f foo bar baz || framework_failure -"${TARLZ}" -q -xf "${bad1_lz}" +"${TARLZ}" -q -n0 -xf "${bad1_lz}" [ $? = 2 ] || test_failed $LINENO [ ! -e foo ] || test_failed $LINENO cmp cbar bar || test_failed $LINENO cmp cbaz baz || test_failed $LINENO rm -f foo bar baz || framework_failure -"${TARLZ}" -q -xf "${bad2_lz}" +"${TARLZ}" -q -n0 -xf "${bad2_lz}" [ $? = 2 ] || test_failed $LINENO [ ! -e foo ] || test_failed $LINENO cmp cbar bar || test_failed $LINENO cmp cbaz baz || test_failed $LINENO rm -f foo bar baz || framework_failure -"${TARLZ}" -q -xf "${bad3_lz}" +"${TARLZ}" -q -n0 -xf "${bad3_lz}" [ $? = 2 ] || test_failed $LINENO cmp cfoo foo || test_failed $LINENO [ ! -e bar ] || test_failed $LINENO cmp cbaz baz || test_failed $LINENO rm -f foo bar baz || framework_failure -"${TARLZ}" -q -xf "${bad3_lz}" --keep-damaged +"${TARLZ}" -q -n0 -xf "${bad3_lz}" --keep-damaged [ $? = 2 ] || test_failed $LINENO cmp cfoo foo || test_failed $LINENO cmp cbar bar 2> /dev/null || lzlib_1_11 $LINENO cmp cbaz baz || test_failed $LINENO rm -f foo bar baz || framework_failure -"${TARLZ}" -q -xf "${bad4_lz}" +"${TARLZ}" -q -n0 -xf "${bad4_lz}" [ $? = 2 ] || test_failed $LINENO [ ! -e foo ] || test_failed $LINENO [ ! -e bar ] || test_failed $LINENO cmp cbaz baz || test_failed $LINENO rm -f foo bar baz || framework_failure -"${TARLZ}" -q -xf "${bad4_lz}" --keep-damaged +"${TARLZ}" -q -n0 -xf "${bad4_lz}" --keep-damaged [ $? = 2 ] || test_failed $LINENO [ ! -e foo ] || test_failed $LINENO cmp cbar bar 2> /dev/null || lzlib_1_11 $LINENO cmp cbaz baz || test_failed $LINENO rm -f foo bar baz || framework_failure -"${TARLZ}" -q -xf "${bad5_lz}" +"${TARLZ}" -q -n0 -xf "${bad5_lz}" [ $? = 2 ] || test_failed $LINENO [ ! -e foo ] || test_failed $LINENO [ ! -e bar ] || test_failed $LINENO cmp cbaz baz || test_failed $LINENO rm -f foo bar baz || framework_failure -"${TARLZ}" -q -xf "${bad5_lz}" --keep-damaged +"${TARLZ}" -q -n0 -xf "${bad5_lz}" --keep-damaged [ $? = 2 ] || test_failed $LINENO cmp cfoo foo 2> /dev/null || lzlib_1_11 $LINENO [ ! -e bar ] || test_failed $LINENO cmp cbaz baz || test_failed $LINENO rm -f foo bar baz || framework_failure -"${TARLZ}" -q -xf "${bad6_lz}" +"${TARLZ}" -q -n0 -xf "${bad6_lz}" [ $? = 2 ] || test_failed $LINENO cmp cfoo foo || test_failed $LINENO cmp cbar bar || test_failed $LINENO |