summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2019-03-16 06:46:00 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2019-03-16 06:46:00 +0000
commitd25dcde027d4b19e3d7728f96138210862ed0aa0 (patch)
tree30c61903dae6754752ffa50918d78a05b58dce90
parentReleasing debian version 0.13-1. (diff)
downloadtarlz-d25dcde027d4b19e3d7728f96138210862ed0aa0.tar.xz
tarlz-d25dcde027d4b19e3d7728f96138210862ed0aa0.zip
Merging upstream version 0.14.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
-rw-r--r--ChangeLog10
-rw-r--r--Makefile.in6
-rw-r--r--NEWS22
-rw-r--r--README3
-rwxr-xr-xconfigure2
-rw-r--r--create.cc245
-rw-r--r--create_lz.cc31
-rw-r--r--doc/tarlz.113
-rw-r--r--doc/tarlz.info127
-rw-r--r--doc/tarlz.texi96
-rw-r--r--exclude.cc55
-rw-r--r--extended.cc20
-rw-r--r--extract.cc94
-rw-r--r--list_lz.cc28
-rw-r--r--main.cc31
-rw-r--r--tarlz.h29
-rwxr-xr-xtestsuite/check.sh246
-rw-r--r--testsuite/eof.tarbin0 -> 1024 bytes
-rw-r--r--testsuite/test.txt.tar.lzbin7495 -> 7495 bytes
-rw-r--r--testsuite/test_bad1.txt.tar.lzbin6000 -> 6000 bytes
-rw-r--r--testsuite/test_bad2.txt.tar.lzbin7495 -> 7495 bytes
21 files changed, 794 insertions, 264 deletions
diff --git a/ChangeLog b/ChangeLog
index 6c6884a..4845761 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2019-03-12 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.14 released.
+ * Added new option '--exclude'.
+ * Added new option '-h, --dereference'.
+ * Short option name '-h' no longer means '--help'.
+ * create.cc: Implemented '-A, --concatenate', '-r, --append' to
+ uncompressed archives and to standard output.
+ * main.cc: Ported option '--out-slots' from plzip.
+
2019-02-27 Antonio Diaz Diaz <antonio@gnu.org>
* Version 0.13 released.
diff --git a/Makefile.in b/Makefile.in
index cf5519d..bdd5fe1 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -8,8 +8,8 @@ LIBS = -llz -lpthread
SHELL = /bin/sh
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
-objs = arg_parser.o lzip_index.o create.o create_lz.o extended.o extract.o \
- list_lz.o main.o
+objs = arg_parser.o lzip_index.o create.o create_lz.o exclude.o extended.o \
+ extract.o list_lz.o main.o
.PHONY : all install install-bin install-info install-man \
@@ -33,6 +33,7 @@ $(objs) : Makefile
arg_parser.o : arg_parser.h
create.o : arg_parser.h tarlz.h
create_lz.o : arg_parser.h tarlz.h
+exclude.o : tarlz.h
extended.o : tarlz.h
extract.o : arg_parser.h lzip_index.h tarlz.h
list_lz.o : arg_parser.h lzip_index.h tarlz.h
@@ -131,6 +132,7 @@ dist : doc
$(DISTNAME)/testsuite/rbaz \
$(DISTNAME)/testsuite/test3.tar \
$(DISTNAME)/testsuite/test3_bad[1-5].tar \
+ $(DISTNAME)/testsuite/eof.tar \
$(DISTNAME)/testsuite/test.txt.lz \
$(DISTNAME)/testsuite/test.txt.tar.lz \
$(DISTNAME)/testsuite/test_bad[12].txt.tar.lz \
diff --git a/NEWS b/NEWS
index 07f4272..2b736ee 100644
--- a/NEWS
+++ b/NEWS
@@ -1,11 +1,17 @@
-Changes in version 0.13:
+Changes in version 0.14:
-Skipping of unreadable files during multi-threaded archive creation with
-per-file compression has been fixed. Tarlz did produce empty lzip members,
-and sometines left the last files out of the archive.
+The new option '--exclude', which excludes files matching a shell pattern,
+has been added.
-Multi-threaded listing of tar.lz archives containing empty lzip members has
-been fixed. It listed members out of order and sometimes hung.
+The new option '-h, --dereference', which instructs tarlz to follow symbolic
+links during archive creation, appending or comparison, has been added.
+(The short option name '-h' no longer means '--help').
-When creating an archive, negative modification times are now stored as cero
-(1970-01-01 00:00:00 UTC). Negative times are not portable.
+Concatenation and appending to uncompressed archives and to standard output
+have been implemented.
+
+The new option '--out-slots', setting the number of output packets buffered
+per worker thread during multi-threaded creation and appending to compressed
+archives, has been added. Increasing the number of packets may increase
+compression speed if the files being archived are larger than 64 MiB
+compressed, but requires more memory.
diff --git a/README b/README
index 3ada513..f1149a2 100644
--- a/README
+++ b/README
@@ -11,7 +11,8 @@ it like any other tar.lz archive. Tarlz can append files to the end of such
compressed archives.
Tarlz can create tar archives with five levels of compression granularity;
-per file, per block (default), per directory, appendable solid, and solid.
+per file (--no-solid), per block (--bsolid, default), per directory
+(--dsolid), appendable solid (--asolid), and solid (--solid).
Of course, compressing each file (or each directory) individually can't
achieve a compression ratio as high as compressing solidly the whole tar
diff --git a/configure b/configure
index 2d701da..75442cf 100755
--- a/configure
+++ b/configure
@@ -6,7 +6,7 @@
# to copy, distribute and modify it.
pkgname=tarlz
-pkgversion=0.13
+pkgversion=0.14
progname=tarlz
srctrigger=doc/${pkgname}.texi
diff --git a/create.cc b/create.cc
index cba638a..f70b99f 100644
--- a/create.cc
+++ b/create.cc
@@ -54,7 +54,7 @@ namespace {
LZ_Encoder * encoder = 0; // local vars needed by add_member
const char * archive_namep = 0;
unsigned long long partial_data_size = 0; // size of current block
-Resizable_buffer grbuf( 2 * header_size ); // extended header + data
+Resizable_buffer grbuf; // extended header + data
int goutfd = -1;
int error_status = 0;
@@ -123,56 +123,108 @@ bool copy_file( const int infd, const int outfd, const long long max_size = -1 )
}
-/* Check archive type. If success, leave fd file pos at 0.
- If remove_eof, leave fd file pos at beginning of the EOF blocks. */
-bool check_appendable( const int fd, const bool remove_eof )
+/* Check archive type. Return position of EOF blocks or -1 if failure.
+ If remove_eof, leave fd file pos at beginning of the EOF blocks.
+ Else, leave fd file pos at 0. */
+long long check_appendable( const int fd, const bool remove_eof )
{
- struct stat st;
- if( fstat( fd, &st ) != 0 || !S_ISREG( st.st_mode ) ) return false;
- if( lseek( fd, 0, SEEK_SET ) != 0 ) return false;
+ struct stat st; // fd must be regular
+ if( fstat( fd, &st ) != 0 || !S_ISREG( st.st_mode ) ) return -1;
+ if( lseek( fd, 0, SEEK_SET ) != 0 ) return -1;
enum { bufsize = header_size + ( header_size / 8 ) };
uint8_t buf[bufsize];
- int rd = readblock( fd, buf, bufsize );
- if( rd == 0 && errno == 0 ) return true; // append to empty archive
- if( rd < min_member_size || ( rd != bufsize && errno ) ) return false;
+ const int rd = readblock( fd, buf, bufsize );
+ if( rd == 0 && errno == 0 ) return 0; // append to empty archive
+ if( rd < min_member_size || ( rd != bufsize && errno ) ) return -1;
const Lzip_header * const p = (const Lzip_header *)buf; // shut up gcc
- if( !p->verify_magic() || !p->verify_version() ) return false;
+ if( !p->verify_magic() || !p->verify_version() ) return -1;
LZ_Decoder * decoder = LZ_decompress_open(); // decompress first header
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ||
LZ_decompress_write( decoder, buf, rd ) != rd ||
- ( rd = LZ_decompress_read( decoder, buf, header_size ) ) != header_size )
- { LZ_decompress_close( decoder ); return false; }
+ LZ_decompress_read( decoder, buf, header_size ) != header_size )
+ { LZ_decompress_close( decoder ); return -1; }
LZ_decompress_close( decoder );
- const bool maybe_eof = ( buf[0] == 0 );
- if( !verify_ustar_chksum( buf ) && !maybe_eof ) return false;
+ const bool maybe_eof = block_is_zero( buf, header_size );
+ if( !verify_ustar_chksum( buf ) && !maybe_eof ) return -1;
const long long end = lseek( fd, 0, SEEK_END );
- if( end < min_member_size ) return false;
+ if( end < min_member_size ) return -1;
Lzip_trailer trailer;
if( seek_read( fd, trailer.data, Lzip_trailer::size,
- end - Lzip_trailer::size ) != Lzip_trailer::size )
- return false;
+ end - Lzip_trailer::size ) != Lzip_trailer::size ) return -1;
const long long member_size = trailer.member_size();
if( member_size < min_member_size || member_size > end ||
- ( maybe_eof && member_size != end ) ) return false;
+ ( maybe_eof && member_size != end ) ) return -1;
Lzip_header header;
if( seek_read( fd, header.data, Lzip_header::size,
- end - member_size ) != Lzip_header::size ) return false;
+ end - member_size ) != Lzip_header::size ) return -1;
if( !header.verify_magic() || !header.verify_version() ||
- !isvalid_ds( header.dictionary_size() ) ) return false;
+ !isvalid_ds( header.dictionary_size() ) ) return -1;
const unsigned long long data_size = trailer.data_size();
- if( data_size < header_size || data_size > 32256 ) return false;
+ if( data_size < header_size || data_size > 32256 ) return -1;
const unsigned data_crc = trailer.data_crc();
const CRC32 crc32;
uint32_t crc = 0xFFFFFFFFU;
for( unsigned i = 0; i < data_size; ++i ) crc32.update_byte( crc, 0 );
crc ^= 0xFFFFFFFFU;
- if( crc != data_crc ) return false;
+ if( crc != data_crc ) return -1;
const long long pos = remove_eof ? end - member_size : 0;
- return ( lseek( fd, pos, SEEK_SET ) == pos );
+ if( lseek( fd, pos, SEEK_SET ) != pos ) return -1;
+ return end - member_size;
+ }
+
+
+/* Skip all tar headers. Return position of EOF blocks or -1 if failure.
+ If remove_eof, leave fd file pos at beginning of the EOF blocks.
+ Else, leave fd file pos at 0. */
+long long check_uncompressed_appendable( const int fd, const bool remove_eof )
+ {
+ struct stat st; // fd must be regular
+ if( fstat( fd, &st ) != 0 || !S_ISREG( st.st_mode ) ) return -1;
+ if( lseek( fd, 0, SEEK_SET ) != 0 ) return -1;
+ if( st.st_size == 0 ) return 0; // append to empty archive
+ long long eof_pos = 0;
+ Extended extended; // metadata from extended records
+ Resizable_buffer rbuf; // extended records buffer
+ bool prev_extended = false; // prev header was extended
+ while( true ) // process one tar member per iteration
+ {
+ Tar_header header;
+ const int rd = readblock( fd, header, header_size );
+ if( rd == 0 && errno == 0 ) break; // missing EOF blocks
+ if( rd != header_size ) return -1;
+ if( !verify_ustar_chksum( header ) ) // maybe EOF
+ { if( block_is_zero( header, header_size ) ) break; else return -1; }
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ if( typeflag == tf_extended || typeflag == tf_global )
+ {
+ if( prev_extended ) return -1;
+ const unsigned long long edsize = parse_octal( header + size_o, size_l );
+ const unsigned long long bufsize = round_up( edsize );
+ if( edsize == 0 || edsize >= 1ULL << 33 || bufsize >= INT_MAX )
+ return -1; // overflow or no extended data
+ if( !rbuf.resize( bufsize ) ) return -1;
+ if( readblock( fd, (uint8_t *)rbuf(), bufsize ) != (int)bufsize )
+ return -1;
+ if( typeflag == tf_extended )
+ { if( !extended.parse( rbuf(), edsize, false ) ) return -1;
+ prev_extended = true; }
+ continue;
+ }
+ prev_extended = false;
+
+ eof_pos = lseek( fd, round_up( extended.get_file_size_and_reset( header ) ),
+ SEEK_CUR );
+ if( eof_pos <= 0 ) return -1;
+ }
+
+ if( prev_extended ) return -1;
+ const long long pos = remove_eof ? eof_pos : 0;
+ if( lseek( fd, pos, SEEK_SET ) != pos ) return -1;
+ return eof_pos;
}
@@ -251,6 +303,7 @@ bool store_name( const char * const filename, Extended & extended,
int add_member( const char * const filename, const struct stat *,
const int flag, struct FTW * )
{
+ if( Exclude::excluded( filename ) ) return 0; // skip excluded
unsigned long long file_size = 0;
Extended extended; // metadata for extended records
Tar_header header;
@@ -310,6 +363,26 @@ bool writeblock_wrapper( const int outfd, const uint8_t * const buffer,
}
+// write End-Of-Archive records
+bool write_eof_records( const int outfd, const bool compressed )
+ {
+ if( compressed )
+ {
+ enum { eof_member_size = 44 };
+ const uint8_t eof_member[eof_member_size] = {
+ 0x4C, 0x5A, 0x49, 0x50, 0x01, 0x0C, 0x00, 0x00, 0x6F, 0xFD, 0xFF, 0xFF,
+ 0xA3, 0xB7, 0x80, 0x0C, 0x82, 0xDB, 0xFF, 0xFF, 0x9F, 0xF0, 0x00, 0x00,
+ 0x2E, 0xAF, 0xB5, 0xEF, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x2C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+ return writeblock_wrapper( outfd, eof_member, eof_member_size );
+ }
+ enum { bufsize = 2 * header_size };
+ uint8_t buf[bufsize];
+ std::memset( buf, 0, bufsize );
+ return writeblock_wrapper( outfd, buf, bufsize );
+ }
+
+
/* Removes any amount of leading "./" and '/' strings from filename.
Optionally also removes prefixes containing a ".." component. */
const char * remove_leading_dotslash( const char * const filename,
@@ -348,7 +421,7 @@ bool fill_headers( const char * const filename, Extended & extended,
const int flag )
{
struct stat st;
- if( lstat( filename, &st ) != 0 )
+ if( hstat( filename, &st ) != 0 )
{ show_file_error( filename, "Can't stat input file", errno );
set_error_status( 1 ); return false; }
if( file_is_the_archive( st ) )
@@ -492,52 +565,95 @@ bool verify_ustar_chksum( const uint8_t * const header )
ustar_chksum( header ) == parse_octal( header + chksum_o, chksum_l ) ); }
-int concatenate( const std::string & archive_name, const Arg_parser & parser,
+bool has_lz_ext( const std::string & name )
+ {
+ return ( name.size() > 3 &&
+ name.compare( name.size() - 3, 3, ".lz" ) == 0 ) ||
+ ( name.size() > 4 &&
+ name.compare( name.size() - 4, 4, ".tlz" ) == 0 );
+ }
+
+
+int concatenate( std::string archive_name, const Arg_parser & parser,
const int filenames )
{
if( !filenames )
{ if( verbosity >= 1 ) show_error( "Nothing to concatenate." ); return 0; }
- if( archive_name.empty() )
- { show_error( "'--concatenate' is incompatible with '-f -'.", 0, true );
- return 1; }
- const int outfd = open_outstream( archive_name, false );
+ const bool to_stdout = archive_name.empty();
+ const int outfd =
+ to_stdout ? STDOUT_FILENO : open_outstream( archive_name, false );
if( outfd < 0 ) return 1;
- if( !file_is_the_archive.init( outfd ) )
+ if( to_stdout ) archive_name = "(stdout)";
+ else if( !file_is_the_archive.init( outfd ) )
{ show_file_error( archive_name.c_str(), "Can't stat", errno ); return 1; }
+ int compressed; // tri-state bool
+ if( to_stdout ) compressed = -1; // unknown
+ else
+ {
+ compressed = has_lz_ext( archive_name ); // default value
+ long long pos = check_appendable( outfd, true );
+ if( pos > 0 ) compressed = true;
+ else if( pos < 0 )
+ {
+ pos = check_uncompressed_appendable( outfd, true );
+ if( pos > 0 ) compressed = false;
+ else if( pos < 0 )
+ { show_file_error( archive_name.c_str(), compressed ?
+ "This does not look like an appendable tar.lz archive." :
+ "This does not look like an appendable tar archive." );
+ return 2; }
+ }
+ }
int retval = 0;
+ bool eof_pending = false;
for( int i = 0; i < parser.arguments(); ++i ) // copy archives
{
if( parser.code( i ) ) continue; // skip options
if( parser.argument( i ).empty() ) continue; // skip empty names
const char * const filename = parser.argument( i ).c_str();
+ if( Exclude::excluded( filename ) ) continue; // skip excluded
const int infd = open_instream( filename );
if( infd < 0 ) { retval = 1; break; }
- if( !check_appendable( infd, false ) )
- { show_file_error( filename, "Not an appendable tar.lz archive." );
- close( infd ); retval = 2; break; }
struct stat st;
- if( fstat( infd, &st ) == 0 && file_is_the_archive( st ) )
+ if( !to_stdout && fstat( infd, &st ) == 0 && file_is_the_archive( st ) )
{ show_file_error( filename, "File is the archive; not concatenated." );
close( infd ); continue; }
- if( !check_appendable( outfd, true ) )
- { show_error( "This does not look like an appendable tar.lz archive." );
+ long long size;
+ if( compressed < 0 ) // not initialized yet
+ {
+ if( ( size = check_appendable( infd, false ) ) > 0 ) compressed = true;
+ else if( ( size = check_uncompressed_appendable( infd, false ) ) > 0 )
+ compressed = false;
+ else { size = -1 ; compressed = has_lz_ext( filename ); }
+ }
+ else size = compressed ? check_appendable( infd, false ) :
+ check_uncompressed_appendable( infd, false );
+ if( size < 0 )
+ { show_file_error( filename, compressed ?
+ "Not an appendable tar.lz archive." :
+ "Not an appendable tar archive." );
close( infd ); retval = 2; break; }
- if( !copy_file( infd, outfd ) || close( infd ) != 0 )
+ if( !copy_file( infd, outfd, size ) || close( infd ) != 0 )
{ show_file_error( filename, "Error copying archive", errno );
- retval = 1; break; }
+ eof_pending = false; retval = 1; break; }
+ eof_pending = true;
if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
}
+ if( eof_pending && !write_eof_records( outfd, compressed ) && !retval )
+ retval = 1;
if( close( outfd ) != 0 && !retval )
- { show_error( "Error closing archive", errno ); retval = 1; }
+ { show_file_error( archive_name.c_str(), "Error closing archive", errno );
+ retval = 1; }
return retval;
}
int encode( const std::string & archive_name, const Arg_parser & parser,
const int filenames, const int level, const int num_workers,
- const int debug_level, const bool append )
+ const int out_slots, const int debug_level, const bool append,
+ const bool dereference )
{
struct Lzma_options
{
@@ -558,28 +674,32 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
{ 1 << 25, 273 } }; // -9
const bool compressed = ( level >= 0 && level <= 9 );
- if( !append )
+ if( archive_name.size() && !compressed && has_lz_ext( archive_name ) )
+ { show_file_error( archive_name.c_str(),
+ "Uncompressed mode incompatible with .lz extension." ); return 2; }
+
+ if( !filenames )
{
- if( !filenames )
+ if( !append && archive_name.size() ) // create archive
{ show_error( "Cowardly refusing to create an empty archive.", 0, true );
return 1; }
- if( archive_name.empty() ) goutfd = STDOUT_FILENO;
- else if( ( goutfd = open_outstream( archive_name ) ) < 0 ) return 1;
+ else // create/append to stdout or append to archive
+ { if( verbosity >= 1 ) show_error( "Nothing to append." ); return 0; }
}
- else
+
+ if( archive_name.empty() ) // create/append to stdout
+ goutfd = STDOUT_FILENO;
+ else if( !append ) // create archive
+ { if( ( goutfd = open_outstream( archive_name ) ) < 0 ) return 1; }
+ else // append to archive
{
- if( !filenames )
- { if( verbosity >= 1 ) show_error( "Nothing to append." ); return 0; }
- if( archive_name.empty() )
- { show_error( "'--append' is incompatible with '-f -'.", 0, true );
- return 1; }
- if( !compressed )
- { show_error( "'--append' is incompatible with '--uncompressed'.", 0, true );
- return 1; }
if( ( goutfd = open_outstream( archive_name, false ) ) < 0 ) return 1;
- if( !check_appendable( goutfd, true ) )
- { show_error( "This does not look like an appendable tar.lz archive." );
- return 2; }
+ if( compressed && check_appendable( goutfd, true ) < 0 )
+ { show_file_error( archive_name.c_str(),
+ "This does not look like an appendable tar.lz archive." ); return 2; }
+ if( !compressed && check_uncompressed_appendable( goutfd, true ) < 0 )
+ { show_file_error( archive_name.c_str(),
+ "This does not look like an appendable tar archive." ); return 2; }
}
archive_namep = archive_name.size() ? archive_name.c_str() : "(stdout)";
@@ -602,7 +722,7 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
// show_file_error( archive_namep, "Multi-threaded --create" );
return encode_lz( parser, dictionary_size,
option_mapping[level].match_len_limit, num_workers,
- goutfd, debug_level );
+ goutfd, out_slots, debug_level, dereference );
}
encoder = LZ_compress_open( dictionary_size,
option_mapping[level].match_len_limit, LLONG_MAX );
@@ -632,14 +752,16 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
while( len > 1 && arg[len-1] == '/' ) --len;
if( len < arg.size() )
{ deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); }
+ if( Exclude::excluded( filename ) ) continue; // skip excluded
struct stat st;
if( lstat( filename, &st ) != 0 ) // filename from command line
{ show_file_error( filename, "Can't stat input file", errno );
set_error_status( 1 ); }
- else if( ( retval = nftw( filename, add_member, 16, FTW_PHYS ) ) != 0 )
+ else if( ( retval = nftw( filename, add_member, 16,
+ dereference ? 0 : FTW_PHYS ) ) != 0 )
break; // write error
else if( encoder && solidity == dsolid && !archive_write( 0, 0 ) )
- retval = 1;
+ { retval = 1; break; }
}
if( !retval ) // write End-Of-Archive records
@@ -656,6 +778,7 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
if( encoder && LZ_compress_close( encoder ) < 0 )
{ show_error( "LZ_compress_close failed." ); retval = 1; }
if( close( goutfd ) != 0 && !retval )
- { show_error( "Error closing archive", errno ); retval = 1; }
+ { show_file_error( archive_name.c_str(), "Error closing archive", errno );
+ retval = 1; }
return final_exit_status( retval );
}
diff --git a/create_lz.cc b/create_lz.cc
index e2aaf3d..e72839e 100644
--- a/create_lz.cc
+++ b/create_lz.cc
@@ -41,7 +41,7 @@ namespace {
enum { max_packet_size = 1 << 20 };
class Packet_courier;
-Packet_courier * courierp = 0; // local vars needed by add_member
+Packet_courier * courierp = 0; // local vars needed by add_member_lz
unsigned long long partial_data_size = 0; // size of current block
@@ -257,9 +257,10 @@ public:
// send one ipacket with tar member metadata to courier
-int add_member( const char * const filename, const struct stat *,
- const int flag, struct FTW * )
+int add_member_lz( const char * const filename, const struct stat *,
+ const int flag, struct FTW * )
{
+ if( Exclude::excluded( filename ) ) return 0; // skip excluded
unsigned long long file_size = 0;
// metadata for extended records
Extended * const extended = new( std::nothrow ) Extended;
@@ -286,6 +287,7 @@ struct Grouper_arg
{
Packet_courier * courier;
const Arg_parser * parser;
+ bool dereference;
};
@@ -296,6 +298,7 @@ extern "C" void * grouper( void * arg )
const Grouper_arg & tmp = *(const Grouper_arg *)arg;
Packet_courier & courier = *tmp.courier;
const Arg_parser & parser = *tmp.parser;
+ const bool dereference = tmp.dereference;
for( int i = 0; i < parser.arguments(); ++i ) // parse command line
{
@@ -312,11 +315,13 @@ extern "C" void * grouper( void * arg )
while( len > 1 && arg[len-1] == '/' ) --len;
if( len < arg.size() )
{ deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); }
+ if( Exclude::excluded( filename ) ) continue; // skip excluded
struct stat st;
if( lstat( filename, &st ) != 0 ) // filename from command line
{ show_file_error( filename, "Can't stat input file", errno );
set_error_status( 1 ); }
- else if( nftw( filename, add_member, 16, FTW_PHYS ) != 0 )
+ else if( nftw( filename, add_member_lz, 16,
+ dereference ? 0 : FTW_PHYS ) != 0 )
cleanup_and_fail(); // write error or oom
else if( solidity == dsolid ) // end of group
courier.receive_packet( new Ipacket );
@@ -401,7 +406,7 @@ extern "C" void * cworker( void * arg )
LZ_Encoder * encoder = 0;
uint8_t * data = 0;
- Resizable_buffer rbuf( 2 * header_size ); // extended header + data
+ Resizable_buffer rbuf; // extended header + data
if( !rbuf.size() ) { show_error( mem_msg2 ); cleanup_and_fail(); }
int opos = 0;
@@ -518,21 +523,22 @@ void muxer( Packet_courier & courier, const int outfd )
// init the courier, then start the grouper and the workers and call the muxer
int encode_lz( const Arg_parser & parser, const int dictionary_size,
const int match_len_limit, const int num_workers,
- const int outfd, const int debug_level )
+ const int outfd, const int out_slots, const int debug_level,
+ const bool dereference )
{
const int in_slots = 65536; // max small files (<=512B) in 64 MiB
const int total_in_slots = ( INT_MAX / num_workers >= in_slots ) ?
num_workers * in_slots : INT_MAX;
- const int out_slots = 64;
/* If an error happens after any threads have been started, exit must be
called before courier goes out of scope. */
Packet_courier courier( num_workers, total_in_slots, out_slots );
- courierp = &courier; // needed by add_member
+ courierp = &courier; // needed by add_member_lz
Grouper_arg grouper_arg;
grouper_arg.courier = &courier;
grouper_arg.parser = &parser;
+ grouper_arg.dereference = dereference;
pthread_t grouper_thread;
int errcode = pthread_create( &grouper_thread, 0, grouper, &grouper_arg );
@@ -570,14 +576,7 @@ int encode_lz( const Arg_parser & parser, const int dictionary_size,
{ show_error( "Can't join grouper thread", errcode ); cleanup_and_fail(); }
// write End-Of-Archive records
- int retval = 0;
- enum { eof_member_size = 44 };
- const uint8_t eof_member[eof_member_size] = {
- 0x4C, 0x5A, 0x49, 0x50, 0x01, 0x0C, 0x00, 0x00, 0x6F, 0xFD, 0xFF, 0xFF,
- 0xA3, 0xB7, 0x80, 0x0C, 0x82, 0xDB, 0xFF, 0xFF, 0x9F, 0xF0, 0x00, 0x00,
- 0x2E, 0xAF, 0xB5, 0xEF, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x2C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
- if( !writeblock_wrapper( outfd, eof_member, eof_member_size ) ) retval = 1;
+ int retval = !write_eof_records( outfd, true );
if( close( outfd ) != 0 && !retval )
{ show_error( "Error closing archive", errno ); retval = 1; }
diff --git a/doc/tarlz.1 b/doc/tarlz.1
index 47be9a8..a17e58d 100644
--- a/doc/tarlz.1
+++ b/doc/tarlz.1
@@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
-.TH TARLZ "1" "February 2019" "tarlz 0.13" "User Commands"
+.TH TARLZ "1" "March 2019" "tarlz 0.14" "User Commands"
.SH NAME
tarlz \- creates tar archives with multimember lzip compression
.SH SYNOPSIS
@@ -24,7 +24,7 @@ recover as much data as possible from each damaged member, and lziprecover
can be used to recover some of the damaged members.
.SH OPTIONS
.TP
-\fB\-h\fR, \fB\-\-help\fR
+\fB\-\-help\fR
display this help and exit
.TP
\fB\-V\fR, \fB\-\-version\fR
@@ -48,9 +48,15 @@ find differences between archive and file system
\fB\-\-ignore\-ids\fR
ignore differences in owner and group IDs
.TP
+\fB\-\-exclude=\fR<pattern>
+exclude files matching a shell pattern
+.TP
\fB\-f\fR, \fB\-\-file=\fR<archive>
use archive file <archive>
.TP
+\fB\-h\fR, \fB\-\-dereference\fR
+follow symlinks; archive the files they point to
+.TP
\fB\-n\fR, \fB\-\-threads=\fR<n>
set number of (de)compression threads [2]
.TP
@@ -104,6 +110,9 @@ don't delete partially extracted files
.TP
\fB\-\-missing\-crc\fR
exit with error status if missing extended CRC
+.TP
+\fB\-\-out\-slots=\fR<n>
+number of 1 MiB output packets buffered [64]
.PP
Exit status: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
diff --git a/doc/tarlz.info b/doc/tarlz.info
index fa8666c..fc1f092 100644
--- a/doc/tarlz.info
+++ b/doc/tarlz.info
@@ -11,7 +11,7 @@ File: tarlz.info, Node: Top, Next: Introduction, Up: (dir)
Tarlz Manual
************
-This manual is for Tarlz (version 0.13, 27 February 2019).
+This manual is for Tarlz (version 0.14, 12 March 2019).
* Menu:
@@ -48,8 +48,8 @@ tar tools like GNU tar, which treat it like any other tar.lz archive.
Tarlz can append files to the end of such compressed archives.
Tarlz can create tar archives with five levels of compression
-granularity; per file, per block (default), per directory, appendable
-solid, and solid.
+granularity; per file (--no-solid), per block (--bsolid, default), per
+directory (--dsolid), appendable solid (--asolid), and solid (--solid).
Of course, compressing each file (or each directory) individually can't
achieve a compression ratio as high as compressing solidly the whole tar
@@ -107,7 +107,6 @@ equivalent to '-1 --solid'
tarlz supports the following options:
-'-h'
'--help'
Print an informative help message describing the options and exit.
@@ -118,14 +117,17 @@ equivalent to '-1 --solid'
'-A'
'--concatenate'
- Append tar.lz archives to the end of a tar.lz archive. All the
- archives involved must be regular (seekable) files compressed as
- multimember lzip files, and the two end-of-file blocks plus any
- zero padding must be contained in the last lzip member of each
- archive. The intermediate end-of-file blocks are removed as each
- new archive is concatenated. Exit with status 0 without modifying
- the archive if no FILES have been specified. Tarlz can't
- concatenate uncompressed tar archives.
+ Append one or more archives to the end of an archive. All the
+ archives involved must be regular (seekable) files, and must be
+ either all compressed or all uncompressed. Compressed and
+ uncompressed archives can't be mixed. Compressed archives must be
+ multimember lzip files with the two end-of-file blocks plus any
+ zero padding contained in the last lzip member of each archive.
+ The intermediate end-of-file blocks are removed as each new archive
+ is concatenated. If the archive is uncompressed, tarlz parses and
+ skips tar headers until it finds the end-of-file blocks. Exit with
+ status 0 without modifying the archive if no FILES have been
+ specified.
'-B BYTES'
'--data-size=BYTES'
@@ -158,21 +160,38 @@ equivalent to '-1 --solid'
'--diff'
Find differences between archive and file system. For each tar
member in the archive, verify that the corresponding file exists
- and is of the same type (regular file, directory, etc). Report the
- differences found in type, mode (permissions), owner and group
- IDs, modification time, file size, file contents (of regular
- files), target (of symlinks) and device number (of block/character
- special files).
+ and is of the same type (regular file, directory, etc). Report on
+ standard output the differences found in type, mode (permissions),
+ owner and group IDs, modification time, file size, file contents
+ (of regular files), target (of symlinks) and device number (of
+ block/character special files).
+
+ As tarlz removes leading slashes from member names, the '-C'
+ option may be used in combination with '--diff' when absolute
+ filenames were used on archive creation: 'tarlz -C / -d'.
+ Alternatively, tarlz may be run from the root directory to perform
+ the comparison.
'--ignore-ids'
Make '--diff' ignore differences in owner and group IDs. This
option is useful when comparing an '--anonymous' archive.
+'--exclude=PATTERN'
+ Exclude files matching a shell pattern like '*.o'. A file is
+ considered to match if any component of the filename matches. For
+ example, '*.o' matches 'foo.o', 'foo.o/bar' and 'foo/bar.o'.
+
'-f ARCHIVE'
'--file=ARCHIVE'
Use archive file ARCHIVE. '-' used as an ARCHIVE argument reads
from standard input or writes to standard output.
+'-h'
+'--dereference'
+ Follow symbolic links during archive creation, appending or
+ comparison. Archive or compare the files they point to instead of
+ the links themselves.
+
'-n N'
'--threads=N'
Set the number of (de)compression threads, overriding the system's
@@ -197,14 +216,18 @@ equivalent to '-1 --solid'
'-r'
'--append'
- Append files to the end of a tar.lz archive. The archive must be a
- regular (seekable) file compressed as a multimember lzip file, and
- the two end-of-file blocks plus any zero padding must be contained
- in the last lzip member of the archive. First this last member is
- removed, then the new members are appended, and then a new
- end-of-file member is appended to the archive. Exit with status 0
- without modifying the archive if no FILES have been specified.
- Tarlz can't append files to an uncompressed tar archive.
+ Append files to the end of an archive. The archive must be a
+ regular (seekable) file either compressed or uncompressed.
+ Compressed members can't be appended to an uncompressed archive,
+ nor vice versa. If the archive is compressed, it must be a
+ multimember lzip file with the two end-of-file blocks plus any
+ zero padding contained in the last lzip member of the archive.
+ Appending works as follows; first the end-of-file blocks are
+ removed, then the new members are appended, and finally two new
+ end-of-file blocks are appended to the archive. If the archive is
+ uncompressed, tarlz parses and skips tar headers until it finds
+ the end-of-file blocks. Exit with status 0 without modifying the
+ archive if no FILES have been specified.
'-t'
'--list'
@@ -221,10 +244,10 @@ equivalent to '-1 --solid'
the FILES given. Else extract all the files in the archive.
'-0 .. -9'
- Set the compression level. The default compression level is '-6'.
- Like lzip, tarlz also minimizes the dictionary size of the lzip
- members it creates, reducing the amount of memory required for
- decompression.
+ Set the compression level for '--create' and '--append'. The
+ default compression level is '-6'. Like lzip, tarlz also minimizes
+ the dictionary size of the lzip members it creates, reducing the
+ amount of memory required for decompression.
Level Dictionary size Match length limit
-0 64 KiB 16 bytes
@@ -239,8 +262,10 @@ equivalent to '-1 --solid'
-9 32 MiB 273 bytes
'--uncompressed'
- With '--create', don't compress the created tar archive. Create an
- uncompressed tar archive instead.
+ With '--create', don't compress the tar archive created. Create an
+ uncompressed tar archive instead. With '--append', don't compress
+ the new members appended to the tar archive. Compressed members
+ can't be appended to an uncompressed archive, nor vice versa.
'--asolid'
When creating or appending to a compressed archive, use appendable
@@ -314,6 +339,14 @@ equivalent to '-1 --solid'
the posix pax format; i.e., the lack of a mandatory check sequence
in the extended records. *Note crc32::.
+'--out-slots=N'
+ Number of 1 MiB output packets buffered per worker thread during
+ multi-threaded creation or appending to compressed archives.
+ Increasing the number of packets may increase compression speed if
+ the files being archived are larger than 64 MiB compressed, but
+ requires more memory. Valid values range from 1 to 1024. The
+ default value is 64.
+
Exit status: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
@@ -345,7 +378,7 @@ sets). The members simply appear one after another in the file, with no
additional information before, between, or after them.
Each lzip member contains one or more tar members in a simplified
-posix pax interchange format; the only pax typeflag value supported by
+posix pax interchange format. The only pax typeflag value supported by
tarlz (in addition to the typeflag values defined by the ustar format)
is 'x'. The pax format is an extension on top of the ustar format that
removes the size limitations of the ustar format.
@@ -714,7 +747,7 @@ speed by the number of available processors), the uncompressed archive
must be at least as large as the number of worker threads times the
block size (*note --data-size::). Else some processors will not get any
data to compress, and compression will be proportionally slower. The
-maximum speed increase achievable on a given file is limited by the
+maximum speed increase achievable on a given archive is limited by the
ratio (uncompressed_size / data_size). For example, a tarball the size
of gcc or linux will scale up to 10 or 12 processors at level -9.
@@ -835,20 +868,20 @@ Concept index

Tag Table:
Node: Top223
-Node: Introduction1089
-Node: Invoking tarlz3228
-Ref: --data-size5107
-Ref: --bsolid10054
-Node: File format13298
-Ref: key_crc3218118
-Node: Amendments to pax format23535
-Ref: crc3224059
-Ref: flawed-compat25084
-Node: Multi-threaded tar27451
-Node: Minimum archive sizes29990
-Node: Examples32120
-Node: Problems33789
-Node: Concept index34315
+Node: Introduction1086
+Node: Invoking tarlz3280
+Ref: --data-size5339
+Ref: --bsolid11442
+Node: File format15072
+Ref: key_crc3219892
+Node: Amendments to pax format25309
+Ref: crc3225833
+Ref: flawed-compat26858
+Node: Multi-threaded tar29225
+Node: Minimum archive sizes31764
+Node: Examples33897
+Node: Problems35566
+Node: Concept index36092

End Tag Table
diff --git a/doc/tarlz.texi b/doc/tarlz.texi
index 47f01a2..da7abfa 100644
--- a/doc/tarlz.texi
+++ b/doc/tarlz.texi
@@ -6,8 +6,8 @@
@finalout
@c %**end of header
-@set UPDATED 27 February 2019
-@set VERSION 0.13
+@set UPDATED 12 March 2019
+@set VERSION 0.14
@dircategory Data Compression
@direntry
@@ -69,7 +69,8 @@ tar, which treat it like any other tar.lz archive. Tarlz can append files to
the end of such compressed archives.
Tarlz can create tar archives with five levels of compression granularity;
-per file, per block (default), per directory, appendable solid, and solid.
+per file (---no-solid), per block (---bsolid, default), per directory
+(---dsolid), appendable solid (---asolid), and solid (---solid).
@noindent
Of course, compressing each file (or each directory) individually can't
@@ -140,8 +141,7 @@ equivalent to @samp{-1 --solid}
tarlz supports the following options:
@table @code
-@item -h
-@itemx --help
+@item --help
Print an informative help message describing the options and exit.
@item -V
@@ -151,13 +151,15 @@ This version number should be included in all bug reports.
@item -A
@itemx --concatenate
-Append tar.lz archives to the end of a tar.lz archive. All the archives
-involved must be regular (seekable) files compressed as multimember lzip
-files, and the two end-of-file blocks plus any zero padding must be
-contained in the last lzip member of each archive. The intermediate
-end-of-file blocks are removed as each new archive is concatenated. Exit
-with status 0 without modifying the archive if no @var{files} have been
-specified. Tarlz can't concatenate uncompressed tar archives.
+Append one or more archives to the end of an archive. All the archives
+involved must be regular (seekable) files, and must be either all compressed
+or all uncompressed. Compressed and uncompressed archives can't be mixed.
+Compressed archives must be multimember lzip files with the two end-of-file
+blocks plus any zero padding contained in the last lzip member of each
+archive. The intermediate end-of-file blocks are removed as each new archive
+is concatenated. If the archive is uncompressed, tarlz parses and skips tar
+headers until it finds the end-of-file blocks. Exit with status 0 without
+modifying the archive if no @var{files} have been specified.
@anchor{--data-size}
@item -B @var{bytes}
@@ -190,19 +192,34 @@ option appears after a relative filename in the command line.
@itemx --diff
Find differences between archive and file system. For each tar member in the
archive, verify that the corresponding file exists and is of the same type
-(regular file, directory, etc). Report the differences found in type, mode
-(permissions), owner and group IDs, modification time, file size, file
-contents (of regular files), target (of symlinks) and device number (of
-block/character special files).
+(regular file, directory, etc). Report on standard output the differences
+found in type, mode (permissions), owner and group IDs, modification time,
+file size, file contents (of regular files), target (of symlinks) and device
+number (of block/character special files).
+
+As tarlz removes leading slashes from member names, the @samp{-C} option may
+be used in combination with @samp{--diff} when absolute filenames were used
+on archive creation: @w{@samp{tarlz -C / -d}}. Alternatively, tarlz may be
+run from the root directory to perform the comparison.
@item --ignore-ids
Make @samp{--diff} ignore differences in owner and group IDs. This option is
useful when comparing an @samp{--anonymous} archive.
+@item --exclude=@var{pattern}
+Exclude files matching a shell pattern like @samp{*.o}. A file is considered
+to match if any component of the filename matches. For example, @samp{*.o}
+matches @samp{foo.o}, @samp{foo.o/bar} and @samp{foo/bar.o}.
+
@item -f @var{archive}
@itemx --file=@var{archive}
-Use archive file @var{archive}. @samp{-} used as an @var{archive}
-argument reads from standard input or writes to standard output.
+Use archive file @var{archive}. @samp{-} used as an @var{archive} argument
+reads from standard input or writes to standard output.
+
+@item -h
+@itemx --dereference
+Follow symbolic links during archive creation, appending or comparison.
+Archive or compare the files they point to instead of the links themselves.
@item -n @var{n}
@itemx --threads=@var{n}
@@ -226,14 +243,17 @@ Quiet operation. Suppress all messages.
@item -r
@itemx --append
-Append files to the end of a tar.lz archive. The archive must be a
-regular (seekable) file compressed as a multimember lzip file, and the
-two end-of-file blocks plus any zero padding must be contained in the
-last lzip member of the archive. First this last member is removed, then
-the new members are appended, and then a new end-of-file member is
-appended to the archive. Exit with status 0 without modifying the
-archive if no @var{files} have been specified. Tarlz can't append files
-to an uncompressed tar archive.
+Append files to the end of an archive. The archive must be a regular
+(seekable) file either compressed or uncompressed. Compressed members can't
+be appended to an uncompressed archive, nor vice versa. If the archive is
+compressed, it must be a multimember lzip file with the two end-of-file
+blocks plus any zero padding contained in the last lzip member of the
+archive. Appending works as follows; first the end-of-file blocks are
+removed, then the new members are appended, and finally two new end-of-file
+blocks are appended to the archive. If the archive is uncompressed, tarlz
+parses and skips tar headers until it finds the end-of-file blocks. Exit
+with status 0 without modifying the archive if no @var{files} have been
+specified.
@item -t
@itemx --list
@@ -250,9 +270,10 @@ Extract files from an archive. If @var{files} are given, extract only
the @var{files} given. Else extract all the files in the archive.
@item -0 .. -9
-Set the compression level. The default compression level is @samp{-6}.
-Like lzip, tarlz also minimizes the dictionary size of the lzip members
-it creates, reducing the amount of memory required for decompression.
+Set the compression level for @samp{--create} and @samp{--append}. The
+default compression level is @samp{-6}. Like lzip, tarlz also minimizes the
+dictionary size of the lzip members it creates, reducing the amount of
+memory required for decompression.
@multitable {Level} {Dictionary size} {Match length limit}
@item Level @tab Dictionary size @tab Match length limit
@@ -269,8 +290,10 @@ it creates, reducing the amount of memory required for decompression.
@end multitable
@item --uncompressed
-With @samp{--create}, don't compress the created tar archive. Create an
-uncompressed tar archive instead.
+With @samp{--create}, don't compress the tar archive created. Create an
+uncompressed tar archive instead. With @samp{--append}, don't compress the
+new members appended to the tar archive. Compressed members can't be
+appended to an uncompressed archive, nor vice versa.
@item --asolid
When creating or appending to a compressed archive, use appendable solid
@@ -340,6 +363,13 @@ missing CRC instead of as a corrupt record. This misleading
format; i.e., the lack of a mandatory check sequence in the extended
records. @xref{crc32}.
+@item --out-slots=@var{n}
+Number of @w{1 MiB} output packets buffered per worker thread during
+multi-threaded creation or appending to compressed archives. Increasing the
+number of packets may increase compression speed if the files being archived
+are larger than @w{64 MiB} compressed, but requires more memory. Valid
+values range from 1 to 1024. The default value is 64.
+
@ignore
@item --permissive
Allow some violations of the archive format, like consecutive extended
@@ -382,7 +412,7 @@ The members simply appear one after another in the file, with no
additional information before, between, or after them.
Each lzip member contains one or more tar members in a simplified posix
-pax interchange format; the only pax typeflag value supported by tarlz
+pax interchange format. The only pax typeflag value supported by tarlz
(in addition to the typeflag values defined by the ustar format) is
@samp{x}. The pax format is an extension on top of the ustar format that
removes the size limitations of the ustar format.
@@ -766,7 +796,7 @@ the number of available processors), the uncompressed archive must be at
least as large as the number of worker threads times the block size
(@pxref{--data-size}). Else some processors will not get any data to
compress, and compression will be proportionally slower. The maximum speed
-increase achievable on a given file is limited by the ratio
+increase achievable on a given archive is limited by the ratio
@w{(uncompressed_size / data_size)}. For example, a tarball the size of gcc
or linux will scale up to 10 or 12 processors at level -9.
diff --git a/exclude.cc b/exclude.cc
new file mode 100644
index 0000000..fc9d67d
--- /dev/null
+++ b/exclude.cc
@@ -0,0 +1,55 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2019 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <climits>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <fnmatch.h>
+#include <pthread.h>
+#include <stdint.h>
+
+#include "tarlz.h"
+
+
+namespace Exclude {
+
+std::vector< std::string > patterns; // list of patterns
+
+} // end namespace Exclude
+
+
+void Exclude::add_pattern( const std::string & arg )
+ { patterns.push_back( arg ); }
+
+
+bool Exclude::excluded( const char * const filename )
+ {
+ if( patterns.empty() ) return false;
+ const char * p = filename;
+ while( *p )
+ {
+ for( unsigned i = 0; i < patterns.size(); ++i )
+ if( fnmatch( patterns[i].c_str(), p, FNM_LEADING_DIR ) == 0 ) return true;
+ while( *p && *p != '/' ) ++p; // skip component
+ while( *p == '/' ) ++p; // skip slashes
+ }
+ return false;
+ }
diff --git a/extended.cc b/extended.cc
index 5931be2..39b0f5a 100644
--- a/extended.cc
+++ b/extended.cc
@@ -201,7 +201,9 @@ bool Extended::parse( const char * const buf, const unsigned long long edsize,
if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 )
{
if( path_.size() && !permissive ) return false;
- path_.assign( tail + 5, rest - 5 );
+ unsigned long long len = rest - 5;
+ while( len > 1 && tail[5+len-1] == '/' ) --len; // trailing '/'
+ path_.assign( tail + 5, len );
// this also truncates path_ at the first embedded null character
path_.assign( remove_leading_dotslash( path_.c_str() ) );
}
@@ -275,3 +277,19 @@ void Extended::fill_from_ustar( const Tar_header header )
( typeflag == tf_regular || typeflag == tf_hiperf ) )
file_size( parse_octal( header + size_o, size_l ) );
}
+
+
+/* Returns file size from record or from ustar header, and resets file_size_.
+ Used for fast parsing of headers in uncompressed archives. */
+unsigned long long Extended::get_file_size_and_reset( const Tar_header header )
+ {
+ const unsigned long long tmp = file_size_;
+ file_size( 0 );
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ if( typeflag == tf_regular || typeflag == tf_hiperf )
+ {
+ if( tmp == 0 ) return parse_octal( header + size_o, size_l );
+ else return tmp;
+ }
+ return 0;
+ }
diff --git a/extract.cc b/extract.cc
index 04d974a..7d4b3ae 100644
--- a/extract.cc
+++ b/extract.cc
@@ -44,9 +44,9 @@
namespace {
-Resizable_buffer grbuf( initial_line_length );
+Resizable_buffer grbuf;
bool archive_is_uncompressed_seekable = false;
-bool has_lz_ext; // global var for archive_read
+bool archive_has_lz_ext; // local var for archive_read
bool skip_warn( const bool reset = false ) // avoid duplicate warnings
{
@@ -120,7 +120,7 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
if( !islz && !istar && !iseof ) // corrupt or invalid format
{
show_error( "This does not look like a POSIX tar archive." );
- if( has_lz_ext && rd >= min_member_size ) islz = true;
+ if( archive_has_lz_ext && rd >= min_member_size ) islz = true;
if( !islz ) return 1;
}
if( !islz ) // uncompressed
@@ -247,7 +247,7 @@ bool block_is_zero( const uint8_t * const buf, const int size )
}
-void format_member_name( const Extended & extended, const Tar_header header,
+bool format_member_name( const Extended & extended, const Tar_header header,
Resizable_buffer & rbuf, const bool long_format )
{
if( long_format )
@@ -279,27 +279,32 @@ void format_member_name( const Extended & extended, const Tar_header header,
1900 + tm->tm_year, 1 + tm->tm_mon, tm->tm_mday,
tm->tm_hour, tm->tm_min, extended.path().c_str(),
link_string, islink ? extended.linkpath().c_str() : "" );
- if( (int)rbuf.size() > len + offset || !rbuf.resize( len + offset + 1 ) )
- break;
+ if( (int)rbuf.size() > len + offset ) break;
+ if( !rbuf.resize( len + offset + 1 ) ) return false;
}
}
else
{
- if( rbuf.size() < extended.path().size() + 2 )
- rbuf.resize( extended.path().size() + 2 );
+ if( rbuf.size() < extended.path().size() + 2 &&
+ !rbuf.resize( extended.path().size() + 2 ) ) return false;
snprintf( rbuf(), rbuf.size(), "%s\n", extended.path().c_str() );
}
+ return true;
}
namespace {
-void show_member_name( const Extended & extended, const Tar_header header,
+bool show_member_name( const Extended & extended, const Tar_header header,
const int vlevel, Resizable_buffer & rbuf )
{
- if( verbosity < vlevel ) return;
- format_member_name( extended, header, rbuf, verbosity > vlevel );
- std::fputs( rbuf(), stdout );
- std::fflush( stdout );
+ if( verbosity >= vlevel )
+ {
+ if( !format_member_name( extended, header, rbuf, verbosity > vlevel ) )
+ { show_error( mem_msg ); return false; }
+ std::fputs( rbuf(), stdout );
+ std::fflush( stdout );
+ }
+ return true;
}
@@ -326,20 +331,21 @@ int skip_member( const int infd, const Extended & extended )
void show_file_diff( const char * const filename, const char * const msg )
{
- if( verbosity >= 0 ) std::fprintf( stderr, "%s: %s\n", filename, msg );
+ if( verbosity >= 0 )
+ { std::printf( "%s: %s\n", filename, msg ); std::fflush( stdout ); }
}
int compare_member( const int infd1, const Extended & extended,
const Tar_header header, const bool ignore_ids )
{
- show_member_name( extended, header, 1, grbuf );
+ if( !show_member_name( extended, header, 1, grbuf ) ) return 1;
unsigned long long rest = extended.file_size();
const char * const filename = extended.path().c_str();
const Typeflag typeflag = (Typeflag)header[typeflag_o];
bool diff = false, size_differs = false, type_differs = true;
struct stat st;
- if( lstat( filename, &st ) != 0 )
+ if( hstat( filename, &st ) != 0 )
show_file_error( filename, "Warning: Can't stat", errno );
else if( ( typeflag == tf_regular || typeflag == tf_hiperf ) &&
!S_ISREG( st.st_mode ) )
@@ -453,7 +459,7 @@ int compare_member( const int infd1, const Extended & extended,
int list_member( const int infd, const Extended & extended,
const Tar_header header )
{
- show_member_name( extended, header, 0, grbuf );
+ if( !show_member_name( extended, header, 0, grbuf ) ) return 1;
return skip_member( infd, extended );
}
@@ -481,7 +487,7 @@ int extract_member( const int infd, const Extended & extended,
const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
int outfd = -1;
- show_member_name( extended, header, 1, grbuf );
+ if( !show_member_name( extended, header, 1, grbuf ) ) return 1;
std::remove( filename );
make_path( filename );
switch( typeflag )
@@ -615,17 +621,16 @@ bool compare_tslash( const char * const name1, const char * const name2 )
namespace {
bool parse_records( const int infd, Extended & extended,
- const Tar_header header, const bool permissive )
+ const Tar_header header, Resizable_buffer & rbuf,
+ const bool permissive )
{
const unsigned long long edsize = parse_octal( header + size_o, size_l );
const unsigned long long bufsize = round_up( edsize );
- if( bufsize == 0 || edsize == 0 || edsize >= 1ULL << 33 )
+ if( edsize == 0 || edsize >= 1ULL << 33 || bufsize == 0 || bufsize >= INT_MAX )
return false; // overflow or no extended data
- char * const buf = new char[bufsize]; // extended records buffer
- const bool ret = ( archive_read( infd, (uint8_t *)buf, bufsize ) == 0 &&
- extended.parse( buf, edsize, permissive ) );
- delete[] buf;
- return ret;
+ if( !rbuf.resize( bufsize ) ) return false; // extended records buffer
+ return ( archive_read( infd, (uint8_t *)rbuf(), bufsize ) == 0 &&
+ extended.parse( rbuf(), edsize, permissive ) );
}
} // end namespace
@@ -702,7 +707,9 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
{ show_file_error( dir, "Error changing working directory", errno );
return 1; }
}
- if( !code && parser.argument( i ).size() ) name_pending[i] = true;
+ if( !code && parser.argument( i ).size() &&
+ !Exclude::excluded( parser.argument( i ).c_str() ) )
+ name_pending[i] = true;
}
// multi-threaded --list is faster even with 1 thread and 1 file in archive
@@ -722,11 +729,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
archive_is_uncompressed_seekable = true; // unless compressed corrupt
}
- has_lz_ext = // global var for archive_read
- ( archive_name.size() > 3 &&
- archive_name.compare( archive_name.size() - 3, 3, ".lz" ) == 0 ) ||
- ( archive_name.size() > 4 &&
- archive_name.compare( archive_name.size() - 4, 4, ".tlz" ) == 0 );
+ archive_has_lz_ext = has_lz_ext( archive_name ); // var for archive_read
Extended extended; // metadata from extended records
int retval = 0;
bool prev_extended = false; // prev header was extended
@@ -737,35 +740,46 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
if( ret == 2 ) return 2;
if( ret != 0 || !verify_ustar_chksum( header ) )
{
- if( ret == 0 && block_is_zero( header, header_size ) ) break; // EOF
+ if( ret == 0 && block_is_zero( header, header_size ) )
+ {
+ if( !prev_extended ) break; // EOF
+ show_file_error( archive_name.c_str(),
+ "Format violation: extended header followed by EOF blocks." );
+ return 2;
+ }
if( skip_warn() && verbosity >= 2 )
std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( header ) );
set_error_status( 2 ); continue;
}
- skip_warn( true ); // reset warning
+ skip_warn( true ); // reset warning
const Typeflag typeflag = (Typeflag)header[typeflag_o];
if( typeflag == tf_global )
{
if( prev_extended )
- { show_error( "Format violation: global header after extended header." );
+ { show_file_error( archive_name.c_str(),
+ "Format violation: extended header followed by global header." );
return 2; }
Extended dummy; // global headers are parsed and ignored
- if( !parse_records( infd, dummy, header, true ) )
- { show_error( "Error in global extended records. Skipping to next header." );
+ if( !parse_records( infd, dummy, header, grbuf, true ) )
+ { show_file_error( archive_name.c_str(),
+ "Error in global extended records. Skipping to next header." );
set_error_status( 2 ); }
continue;
}
if( typeflag == tf_extended )
{
if( prev_extended && !permissive )
- { show_error( "Format violation: consecutive extended headers found."
+ { show_file_error( archive_name.c_str(),
+ "Format violation: consecutive extended headers found."
/*" Use --permissive.", 0, true*/ ); return 2; }
- if( !parse_records( infd, extended, header, permissive ) )
- { show_error( "Error in extended records. Skipping to next header." );
+ if( !parse_records( infd, extended, header, grbuf, permissive ) )
+ { show_file_error( archive_name.c_str(),
+ "Error in extended records. Skipping to next header." );
extended.reset(); set_error_status( 2 ); }
else if( !extended.crc_present() && missing_crc )
- { show_error( "Missing CRC in extended records.", 0, true ); return 2; }
+ { show_file_error( archive_name.c_str(),
+ "Missing CRC in extended records." ); return 2; }
prev_extended = true;
continue;
}
diff --git a/list_lz.cc b/list_lz.cc
index 6e15db5..6a95bab 100644
--- a/list_lz.cc
+++ b/list_lz.cc
@@ -149,6 +149,7 @@ bool check_skip_filename( const Arg_parser & parser,
std::vector< char > & name_pending,
const char * const filename, const int filenames )
{
+ if( Exclude::excluded( filename ) ) return true; // skip excluded
bool skip = filenames > 0;
if( skip )
for( int i = 0; i < parser.arguments(); ++i )
@@ -377,7 +378,8 @@ int list_member_lz( LZ_Decoder * const decoder, const int infd,
}
if( verbosity < 0 || skip ) rbuf()[0] = 0;
- else format_member_name( extended, header, rbuf, verbosity > 0 );
+ else if( !format_member_name( extended, header, rbuf, verbosity > 0 ) )
+ { *msg = mem_msg; return 1; }
const Packet * const opacket = new Packet( member_id, rbuf(),
data_rest ? Packet::ok : Packet::member_done );
if( !courier.collect_packet( opacket, worker_id ) )
@@ -404,19 +406,19 @@ int parse_records_lz( LZ_Decoder * const decoder, const int infd,
long long & file_pos, const long long member_end,
const long long cdata_size, long long & data_pos,
Extended & extended, const Tar_header header,
- const char ** msg, const bool permissive )
+ Resizable_buffer & rbuf, const char ** msg,
+ const bool permissive )
{
const unsigned long long edsize = parse_octal( header + size_o, size_l );
const unsigned long long bufsize = round_up( edsize );
- if( bufsize == 0 || edsize == 0 || edsize >= 1ULL << 33 )
- return false; // overflow or no extended data
- char * const buf = new char[bufsize]; // extended records buffer
+ if( edsize == 0 || edsize >= 1ULL << 33 || bufsize == 0 || bufsize >= INT_MAX )
+ return 1; // overflow or no extended data
+ if( !rbuf.resize( bufsize ) ) return 1; // extended records buffer
int retval = archive_read_lz( decoder, infd, file_pos, member_end,
- cdata_size, (uint8_t *)buf, bufsize, msg );
+ cdata_size, (uint8_t *)rbuf(), bufsize, msg );
if( retval == 0 )
- { if( extended.parse( buf, edsize, permissive ) ) data_pos += bufsize;
+ { if( extended.parse( rbuf(), edsize, permissive ) ) data_pos += bufsize;
else retval = 1; }
- delete[] buf;
return retval;
}
@@ -452,7 +454,7 @@ extern "C" void * tworker( void * arg )
const int missing_crc = tmp.missing_crc;
const bool permissive = tmp.permissive;
- Resizable_buffer rbuf( initial_line_length );
+ Resizable_buffer rbuf;
LZ_Decoder * const decoder = LZ_decompress_open();
if( !rbuf.size() || !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
{ show_error( mem_msg ); cleanup_and_fail(); }
@@ -518,7 +520,8 @@ extern "C" void * tworker( void * arg )
cleanup_and_fail( 2 ); }
Extended dummy; // global headers are parsed and ignored
const int ret = parse_records_lz( decoder, infd, file_pos, member_end,
- cdata_size, data_pos, dummy, header, &msg, true );
+ cdata_size, data_pos, dummy, header,
+ rbuf, &msg, true );
if( ret != 0 )
{
if( !courier.request_mastership( i, worker_id ) ) goto done;
@@ -542,7 +545,8 @@ extern "C" void * tworker( void * arg )
{ msg = "Format violation: consecutive extended headers found.";
ret = 2; }
else ret = parse_records_lz( decoder, infd, file_pos, member_end,
- cdata_size, data_pos, extended, header, &msg, permissive );
+ cdata_size, data_pos, extended, header,
+ rbuf, &msg, permissive );
if( ret == 0 && !extended.crc_present() && missing_crc )
{ msg = "Missing CRC in extended records."; ret = 2; }
if( ret != 0 )
@@ -588,7 +592,7 @@ done:
if( LZ_decompress_close( decoder ) < 0 )
{
const Packet * const opacket = new Packet( lzip_index.members(),
- "LZ_decompress_close failed.", Packet::error );
+ "LZ_decompress_close failed.", Packet::error );
courier.collect_packet( opacket, worker_id );
}
courier.worker_finished();
diff --git a/main.cc b/main.cc
index fe74f7a..8dcd2b2 100644
--- a/main.cc
+++ b/main.cc
@@ -61,6 +61,7 @@ namespace {
const char * const program_name = "tarlz";
const char * const program_year = "2019";
const char * invocation_name = 0;
+bool dereference = false;
void show_help( const long num_online )
@@ -82,7 +83,7 @@ void show_help( const long num_online )
"can be used to recover some of the damaged members.\n"
"\nUsage: %s [options] [files]\n", invocation_name );
std::printf( "\nOptions:\n"
- " -h, --help display this help and exit\n"
+ " --help display this help and exit\n"
" -V, --version output version information and exit\n"
" -A, --concatenate append tar.lz archives to the end of an archive\n"
" -B, --data-size=<bytes> set target size of input data blocks [2x8=16 MiB]\n"
@@ -90,7 +91,9 @@ void show_help( const long num_online )
" -C, --directory=<dir> change to directory <dir>\n"
" -d, --diff find differences between archive and file system\n"
" --ignore-ids ignore differences in owner and group IDs\n"
+ " --exclude=<pattern> exclude files matching a shell pattern\n"
" -f, --file=<archive> use archive file <archive>\n"
+ " -h, --dereference follow symlinks; archive the files they point to\n"
" -n, --threads=<n> set number of (de)compression threads [%ld]\n"
" -q, --quiet suppress all messages\n"
" -r, --append append files to the end of an archive\n"
@@ -109,6 +112,7 @@ void show_help( const long num_online )
" --group=<group> use <group> name/ID for files added\n"
" --keep-damaged don't delete partially extracted files\n"
" --missing-crc exit with error status if missing extended CRC\n"
+ " --out-slots=<n> number of 1 MiB output packets buffered [64]\n"
/* " --permissive allow repeated extended headers and records\n"*/,
num_online );
if( verbosity >= 1 )
@@ -217,6 +221,10 @@ void set_group( const char * const arg )
} // end namespace
+int hstat( const char * const filename, struct stat * const st )
+ { return dereference ? stat( filename, st ) : lstat( filename, st ); }
+
+
int open_instream( const std::string & name )
{
const int infd = open( name.c_str(), O_RDONLY | O_BINARY );
@@ -288,8 +296,9 @@ int main( const int argc, const char * const argv[] )
{
std::string archive_name;
int debug_level = 0;
- int num_workers = -1; // start this many worker threads
int level = 6; // compression level, < 0 means uncompressed
+ int num_workers = -1; // start this many worker threads
+ int out_slots = 64;
Program_mode program_mode = m_none;
bool ignore_ids = false;
bool keep_damaged = false;
@@ -301,8 +310,9 @@ int main( const int argc, const char * const argv[] )
{ show_error( "Bad library version. At least lzlib 1.0 is required." );
return 1; }
- enum { opt_ano = 256, opt_aso, opt_bso, opt_crc, opt_dbg, opt_dso, opt_grp,
- opt_id, opt_kd, opt_nso, opt_own, opt_per, opt_sol, opt_un };
+ enum { opt_ano = 256, opt_aso, opt_bso, opt_crc, opt_dbg, opt_dso, opt_exc,
+ opt_grp, opt_hlp, opt_id, opt_kd, opt_nso, opt_out, opt_own, opt_per,
+ opt_sol, opt_un };
const Arg_parser::Option options[] =
{
{ '0', 0, Arg_parser::no },
@@ -321,7 +331,7 @@ int main( const int argc, const char * const argv[] )
{ 'C', "directory", Arg_parser::yes },
{ 'd', "diff", Arg_parser::no },
{ 'f', "file", Arg_parser::yes },
- { 'h', "help", Arg_parser::no },
+ { 'h', "dereference", Arg_parser::no },
{ 'H', "format", Arg_parser::yes },
{ 'n', "threads", Arg_parser::yes },
{ 'q', "quiet", Arg_parser::no },
@@ -335,11 +345,14 @@ int main( const int argc, const char * const argv[] )
{ opt_bso, "bsolid", Arg_parser::no },
{ opt_dbg, "debug", Arg_parser::yes },
{ opt_dso, "dsolid", Arg_parser::no },
+ { opt_exc, "exclude", Arg_parser::yes },
{ opt_grp, "group", Arg_parser::yes },
+ { opt_hlp, "help", Arg_parser::no },
{ opt_id, "ignore-ids", Arg_parser::no },
{ opt_kd, "keep-damaged", Arg_parser::no },
{ opt_crc, "missing-crc", Arg_parser::no },
{ opt_nso, "no-solid", Arg_parser::no },
+ { opt_out, "out-slots", Arg_parser::yes },
{ opt_own, "owner", Arg_parser::yes },
{ opt_per, "permissive", Arg_parser::no },
{ opt_sol, "solid", Arg_parser::no },
@@ -375,7 +388,7 @@ int main( const int argc, const char * const argv[] )
case 'C': break; // skip chdir
case 'd': set_mode( program_mode, m_diff ); break;
case 'f': if( sarg != "-" ) archive_name = sarg; break;
- case 'h': show_help( num_online ); return 0;
+ case 'h': dereference = true; break;
case 'H': break; // ignore format
case 'n': num_workers = getnum( arg, 0, max_workers ); break;
case 'q': verbosity = -1; break;
@@ -390,10 +403,13 @@ int main( const int argc, const char * const argv[] )
case opt_crc: missing_crc = true; break;
case opt_dbg: debug_level = getnum( arg, 0, 3 ); break;
case opt_dso: solidity = dsolid; break;
+ case opt_exc: Exclude::add_pattern( sarg ); break;
case opt_grp: set_group( arg ); break;
+ case opt_hlp: show_help( num_online ); return 0;
case opt_id: ignore_ids = true; break;
case opt_kd: keep_damaged = true; break;
case opt_nso: solidity = no_solid; break;
+ case opt_out: out_slots = getnum( arg, 1, 1024 ); break;
case opt_own: set_owner( arg ); break;
case opt_per: permissive = true; break;
case opt_sol: solidity = solid; break;
@@ -414,7 +430,8 @@ int main( const int argc, const char * const argv[] )
case m_none: show_error( "Missing operation.", 0, true ); return 2;
case m_append:
case m_create: return encode( archive_name, parser, filenames, level,
- num_workers, debug_level, program_mode == m_append );
+ num_workers, out_slots, debug_level,
+ program_mode == m_append, dereference );
case m_concatenate: return concatenate( archive_name, parser, filenames );
case m_diff:
case m_extract:
diff --git a/tarlz.h b/tarlz.h
index 10a2dd5..196ecb1 100644
--- a/tarlz.h
+++ b/tarlz.h
@@ -70,15 +70,17 @@ inline bool dotdot_at_i( const char * const filename, const int i )
}
-enum { initial_line_length = 1000 }; // must be >= 87 for format_member_name
-
class Resizable_buffer
{
char * p;
unsigned long size_; // size_ < LONG_MAX
public:
- explicit Resizable_buffer( const unsigned long initial_size )
+ // must be >= 87 for format_member_name
+ enum { default_initial_size = 2 * header_size };
+
+ explicit Resizable_buffer( const unsigned long initial_size =
+ default_initial_size )
: p( (char *)std::malloc( initial_size ) ), size_( p ? initial_size : 0 ) {}
~Resizable_buffer() { if( p ) std::free( p ); p = 0; size_ = 0; }
@@ -136,6 +138,7 @@ public:
const std::string & linkpath() const { return linkpath_; }
const std::string & path() const { return path_; }
unsigned long long file_size() const { return file_size_; }
+ unsigned long long get_file_size_and_reset( const Tar_header header );
void linkpath( const char * const lp ) { linkpath_ = lp; full_size_ = -1; }
void path( const char * const p ) { path_ = p; full_size_ = -1; }
@@ -311,6 +314,7 @@ extern int cl_data_size;
extern Solidity solidity;
bool writeblock_wrapper( const int outfd, const uint8_t * const buffer,
const int size );
+bool write_eof_records( const int outfd, const bool compressed );
const char * remove_leading_dotslash( const char * const filename,
const bool dotdot = false );
bool fill_headers( const char * const filename, Extended & extended,
@@ -323,23 +327,32 @@ void set_error_status( const int retval );
int final_exit_status( int retval, const bool show_msg = true );
unsigned ustar_chksum( const uint8_t * const header );
bool verify_ustar_chksum( const uint8_t * const header );
+bool has_lz_ext( const std::string & name );
class Arg_parser;
-int concatenate( const std::string & archive_name, const Arg_parser & parser,
+int concatenate( std::string archive_name, const Arg_parser & parser,
const int filenames );
int encode( const std::string & archive_name, const Arg_parser & parser,
const int filenames, const int level, const int num_workers,
- const int debug_level, const bool append );
+ const int out_slots, const int debug_level, const bool append,
+ const bool dereference );
// defined in create_lz.cc
int encode_lz( const Arg_parser & parser, const int dictionary_size,
const int match_len_limit, const int num_workers,
- const int outfd, const int debug_level );
+ const int outfd, const int out_slots, const int debug_level,
+ const bool dereference );
+
+// defined in exclude.cc
+namespace Exclude {
+void add_pattern( const std::string & arg );
+bool excluded( const char * const filename );
+} // end namespace Exclude
// defined in extract.cc
enum Program_mode { m_none, m_append, m_concatenate, m_create, m_diff,
m_extract, m_list };
bool block_is_zero( const uint8_t * const buf, const int size );
-void format_member_name( const Extended & extended, const Tar_header header,
+bool format_member_name( const Extended & extended, const Tar_header header,
Resizable_buffer & rbuf, const bool long_format );
bool compare_prefix_dir( const char * const dir, const char * const name );
bool compare_tslash( const char * const name1, const char * const name2 );
@@ -377,6 +390,8 @@ int seek_read( const int fd, uint8_t * const buf, const int size,
// defined in main.cc
extern int verbosity;
+struct stat;
+int hstat( const char * const filename, struct stat * const st );
int open_instream( const std::string & name );
int open_outstream( const std::string & name, const bool create = true );
void cleanup_and_fail( const int retval = 1 ); // terminate the program
diff --git a/testsuite/check.sh b/testsuite/check.sh
index aea9fbf..29330cf 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -55,6 +55,7 @@ bad3_lz="${testdir}"/test3_bad3.tar.lz
bad4_lz="${testdir}"/test3_bad4.tar.lz
bad5_lz="${testdir}"/test3_bad5.tar.lz
bad6_lz="${testdir}"/test3_bad6.tar.lz
+eof="${testdir}"/eof.tar
eof_lz="${testdir}"/eof.tar.lz
fail=0
lwarn=0
@@ -112,17 +113,13 @@ printf "testing tarlz-%s..." "$2"
[ ! -e out.tar.lz ] || test_failed $LINENO
"${TARLZ}" -rf out.tar.lz || test_failed $LINENO
[ ! -e out.tar.lz ] || test_failed $LINENO
-"${TARLZ}" -q -rf - "${in}"
-[ $? = 1 ] || test_failed $LINENO
-[ ! -e - ] || test_failed $LINENO
-"${TARLZ}" -q -r "${in}"
-[ $? = 1 ] || test_failed $LINENO
+"${TARLZ}" -r || test_failed $LINENO
"${TARLZ}" --uncompressed -q -rf out.tar "${in}"
[ $? = 1 ] || test_failed $LINENO
[ ! -e out.tar ] || test_failed $LINENO
cat "${test3_lz}" > test.tar.lz || framework_failure
"${TARLZ}" --uncompressed -q -rf test.tar.lz "${in}"
-[ $? = 1 ] || test_failed $LINENO
+[ $? = 2 ] || test_failed $LINENO
cmp "${test3_lz}" test.tar.lz || test_failed $LINENO
rm -f test.tar.lz || framework_failure
cat "${test3}" > test.tar || framework_failure
@@ -175,7 +172,7 @@ rm -f test.txt || framework_failure
cmp "${in}" test.txt || test_failed $LINENO
rm -f test.txt || framework_failure
-# reference files for cmp
+# test3 reference files for cmp
cat "${testdir}"/rfoo > cfoo || framework_failure
cat "${testdir}"/rbar > cbar || framework_failure
cat "${testdir}"/rbaz > cbaz || framework_failure
@@ -223,7 +220,31 @@ cmp cfoo dir/foo || test_failed $LINENO
cmp cbar dir/bar || test_failed $LINENO
cmp cbaz dir/baz || test_failed $LINENO
rm -rf dir || framework_failure
-#
+
+# --exclude
+"${TARLZ}" -xf "${test3}" --exclude='f*o' --exclude=baz || test_failed $LINENO
+[ ! -e foo ] || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+[ ! -e baz ] || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -xf "${test3_lz}" --exclude=bar || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -xf "${test3dir_lz}" --exclude='?ar' || test_failed $LINENO
+cmp cfoo dir/foo || test_failed $LINENO
+[ ! -e dir/bar ] || test_failed $LINENO
+cmp cbaz dir/baz || test_failed $LINENO
+rm -rf dir || framework_failure
+"${TARLZ}" -q -xf "${test3dir_lz}" --exclude=dir || test_failed $LINENO
+[ ! -e dir ] || test_failed $LINENO
+rm -rf dir || framework_failure
+"${TARLZ}" -q -xf "${test3dir_lz}" --exclude='*o' dir/foo || test_failed $LINENO
+[ ! -e dir ] || test_failed $LINENO
+rm -rf dir || framework_failure
+
+# eof
"${TARLZ}" -q -tf "${testdir}"/test3_eof1.tar.lz
[ $? = 2 ] || test_failed $LINENO
"${TARLZ}" -q -tf "${testdir}"/test3_eof2.tar.lz || test_failed $LINENO
@@ -314,6 +335,9 @@ for i in 1 2 3 4 5 6 ; do
done
# test --concatenate
+cat "${in}" > out.tar.lz || framework_failure # invalid tar.lz
+"${TARLZ}" -Aqf out.tar.lz "${test3_lz}"
+[ $? = 2 ] || test_failed $LINENO
cat "${in_tar_lz}" > out.tar.lz || framework_failure
"${TARLZ}" -Af out.tar.lz "${test3_lz}" || test_failed $LINENO
"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
@@ -321,14 +345,82 @@ cmp "${in}" test.txt || test_failed $LINENO
cmp cfoo foo || test_failed $LINENO
cmp cbar bar || test_failed $LINENO
cmp cbaz baz || test_failed $LINENO
+rm -f test.txt foo bar baz || framework_failure
touch aout.tar.lz || framework_failure # concatenate to empty file
+"${TARLZ}" -Aqf aout.tar.lz "${in_tar}"
+[ $? = 2 ] || test_failed $LINENO
+"${TARLZ}" -Af aout.tar.lz "${in_tar_lz}" "${test3_lz}" || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" -Af aout.tar.lz || test_failed $LINENO # concatenate nothing
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" -Aqf aout.tar.lz aout.tar.lz || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" -Aq "${in_tar_lz}" "${test3}" > aout.tar.lz # to stdout
+[ $? = 2 ] || test_failed $LINENO
+cmp "${in_tar_lz}" aout.tar.lz || test_failed $LINENO
+"${TARLZ}" -A "${in_tar_lz}" "${test3_lz}" > aout.tar.lz || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+cat "${eof_lz}" > aout.tar.lz || framework_failure # concatenate to empty archive
+"${TARLZ}" -Aqf aout.tar.lz "${in_tar}"
+[ $? = 2 ] || test_failed $LINENO
"${TARLZ}" -Af aout.tar.lz "${in_tar_lz}" "${test3_lz}" || test_failed $LINENO
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
cat "${in_tar_lz}" > aout.tar.lz || framework_failure
"${TARLZ}" -Aqf aout.tar.lz "${test3_lz}" "${test3}"
[ $? = 2 ] || test_failed $LINENO
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
-rm -f test.txt foo bar baz out.tar.lz aout.tar.lz || framework_failure
+rm -f aout.tar.lz || framework_failure
+touch aout.tar.lz || framework_failure # --exclude
+"${TARLZ}" -Af aout.tar.lz "${in_tar_lz}" "${test3_lz}" --exclude 'test3*' ||
+ test_failed $LINENO
+"${TARLZ}" -Af aout.tar.lz "${in_tar_lz}" "${test3_lz}" --exclude '*txt*' ||
+ test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+rm -f out.tar.lz aout.tar.lz || framework_failure
+
+# --uncompressed
+cat "${in}" > out.tar || framework_failure # invalid tar
+"${TARLZ}" -Aqf out.tar "${test3}"
+[ $? = 2 ] || test_failed $LINENO
+cat "${in_tar}" > out.tar || framework_failure
+"${TARLZ}" -Af out.tar "${test3}" || test_failed $LINENO
+"${TARLZ}" -xf out.tar || test_failed $LINENO
+cmp "${in}" test.txt || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f test.txt foo bar baz || framework_failure
+touch aout.tar || framework_failure # concatenate to empty file
+"${TARLZ}" -Aqf aout.tar "${in_tar_lz}"
+[ $? = 2 ] || test_failed $LINENO
+"${TARLZ}" -Af aout.tar "${in_tar}" "${test3}" || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" -Af aout.tar || test_failed $LINENO # concatenate nothing
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" -Aqf aout.tar aout.tar || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" -Aq "${in_tar}" "${test3_lz}" > aout.tar # to stdout
+[ $? = 2 ] || test_failed $LINENO
+cmp "${in_tar}" aout.tar || test_failed $LINENO
+"${TARLZ}" -A "${in_tar}" "${test3}" > aout.tar || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+cat "${eof}" > aout.tar || framework_failure # concatenate to empty archive
+"${TARLZ}" -Aqf aout.tar "${in_tar_lz}"
+[ $? = 2 ] || test_failed $LINENO
+"${TARLZ}" -Af aout.tar "${in_tar}" "${test3}" || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+cat "${in_tar}" > aout.tar || framework_failure
+"${TARLZ}" -Aqf aout.tar "${test3}" "${test3_lz}"
+[ $? = 2 ] || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+rm -f aout.tar || framework_failure
+touch aout.tar || framework_failure # --exclude
+"${TARLZ}" -Af aout.tar "${test3}" "${in_tar}" --exclude 'test3*' ||
+ test_failed $LINENO
+"${TARLZ}" -Af aout.tar "${test3}" "${in_tar}" --exclude '*txt*' ||
+ test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+rm -f out.tar aout.tar || framework_failure
# test --create
cat "${in}" > test.txt || framework_failure
@@ -346,7 +438,7 @@ rm -f test.txt out.tar out.tar.lz || framework_failure
cat cfoo > foo || framework_failure
rm -f bar || framework_failure
cat cbaz > baz || framework_failure
-"${TARLZ}" -q -cf out.tar.lz foo bar baz
+"${TARLZ}" -0 -q -cf out.tar.lz foo bar baz
[ $? = 1 ] || test_failed $LINENO
rm -f foo bar baz || framework_failure
"${TARLZ}" -xf out.tar.lz --missing-crc || test_failed $LINENO
@@ -364,13 +456,9 @@ rm -f out.tar.lz || framework_failure
cat cfoo > foo || framework_failure
cat cbar > bar || framework_failure
cat cbaz > baz || framework_failure
-"${TARLZ}" -0 -cf out.tar.lz foo bar baz || test_failed $LINENO
+"${TARLZ}" -0 -cf out.tar.lz foo bar baz --out-slots=1 || test_failed $LINENO
"${TARLZ}" -0 -q -cf aout.tar.lz foo bar aout.tar.lz baz || test_failed $LINENO
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
-"${TARLZ}" -q -Af aout.tar.lz aout.tar.lz || test_failed $LINENO
-cmp out.tar.lz aout.tar.lz || test_failed $LINENO
-"${TARLZ}" -q -rf aout.tar.lz aout.tar.lz || test_failed $LINENO
-cmp out.tar.lz aout.tar.lz || test_failed $LINENO
rm -f aout.tar.lz || framework_failure
#
"${TARLZ}" -0 -cf aout.tar.lz foo bar baz -C / || test_failed $LINENO
@@ -425,16 +513,81 @@ cmp cbaz dir1/baz || test_failed $LINENO
rm -rf dir1 || framework_failure
rm -f out.tar.lz aout.tar.lz || framework_failure
+# --exclude
+cat cfoo > foo || framework_failure
+cat cbar > bar || framework_failure
+cat cbaz > baz || framework_failure
+"${TARLZ}" -0 -cf out.tar.lz foo bar baz --exclude 'ba?' || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+[ ! -e baz ] || test_failed $LINENO
+rm -f out.tar.lz foo bar baz || framework_failure
+cat cfoo > foo || framework_failure
+cat cbar > bar || framework_failure
+cat cbaz > baz || framework_failure
+"${TARLZ}" --un -cf out.tar foo bar baz --exclude 'ba*' || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -xf out.tar || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+[ ! -e baz ] || test_failed $LINENO
+rm -f out.tar foo bar baz || framework_failure
+
+# test --dereference
+touch dummy_file || framework_failure
+if ln dummy_file dummy_link 2> /dev/null &&
+ ln -s dummy_file dummy_slink 2> /dev/null ; then
+ ln_works=yes
+else
+ printf "\nwarning: skipping link test: 'ln' does not work on your system."
+fi
+rm -f dummy_slink dummy_link dummy_file || framework_failure
+
+if [ "${ln_works}" = yes ] ; then
+ mkdir dir || framework_failure
+ cat cfoo > dir/foo || framework_failure
+ cat cbar > dir/bar || framework_failure
+ cat cbaz > dir/baz || framework_failure
+ ln -s dir dir_link || framework_failure
+ "${TARLZ}" -0 -cf out1 dir_link || test_failed $LINENO
+ "${TARLZ}" --un -cf out2 dir_link || test_failed $LINENO
+ "${TARLZ}" -0 -n0 -cf out3 dir_link || test_failed $LINENO
+ "${TARLZ}" -0 -h -cf hout1 dir_link || test_failed $LINENO
+ "${TARLZ}" --un -h -cf hout2 dir_link || test_failed $LINENO
+ "${TARLZ}" -0 -n0 -h -cf hout3 dir_link || test_failed $LINENO
+ rm -rf dir dir_link || framework_failure
+ for i in 1 2 3 ; do
+ "${TARLZ}" -xf out$i || test_failed $LINENO $i
+ [ -h dir_link ] || test_failed $LINENO $i
+ [ ! -e dir_link/foo ] || test_failed $LINENO $i
+ [ ! -e dir_link/bar ] || test_failed $LINENO $i
+ [ ! -e dir_link/baz ] || test_failed $LINENO $i
+ rm -rf dir_link out$i || framework_failure
+ "${TARLZ}" -xf hout$i || test_failed $LINENO $i
+ [ -d dir_link ] || test_failed $LINENO $i
+ cmp cfoo dir_link/foo || test_failed $LINENO $i
+ cmp cbar dir_link/bar || test_failed $LINENO $i
+ cmp cbaz dir_link/baz || test_failed $LINENO $i
+ rm -rf dir_link hout$i || framework_failure
+ done
+fi
+
# test --append
cat cfoo > foo || framework_failure
cat cbar > bar || framework_failure
cat cbaz > baz || framework_failure
-"${TARLZ}" -0 -cf out.tar.lz foo bar baz || test_failed $LINENO
+"${TARLZ}" -0 -cf out.tar.lz foo bar baz --out-slots=1024 || test_failed $LINENO
"${TARLZ}" -0 -cf nout.tar.lz foo bar baz --no-solid || test_failed $LINENO
"${TARLZ}" -0 -cf aout.tar.lz foo || test_failed $LINENO
"${TARLZ}" -0 -rf aout.tar.lz bar baz --no-solid || test_failed $LINENO
cmp nout.tar.lz aout.tar.lz || test_failed $LINENO
rm -f nout.tar.lz aout.tar.lz || framework_failure
+touch aout.tar || framework_failure # wrong extension empty file
+"${TARLZ}" -0 -rf aout.tar foo bar baz || test_failed $LINENO
+cmp out.tar.lz aout.tar || test_failed $LINENO
+rm -f aout.tar || framework_failure
touch aout.tar.lz || framework_failure # append to empty file
"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
@@ -445,10 +598,55 @@ cmp out.tar.lz aout.tar.lz || test_failed $LINENO
"${TARLZ}" -0 -q -rf aout.tar.lz nx_file
[ $? = 1 ] || test_failed $LINENO
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
-cat "${eof_lz}" > aout.tar.lz || framework_failure # append to empty archive
+"${TARLZ}" -0 -q -rf aout.tar.lz aout.tar.lz || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" -0 -r foo bar baz > aout.tar.lz || test_failed $LINENO # to stdout
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" --un -q -rf aout.tar.lz foo bar baz # wrong extension archive
+[ $? = 2 ] || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+cat "${eof_lz}" > aout.tar.lz || framework_failure # append to empty archive
"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO
cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" --un -q -rf aout.tar.lz foo bar baz # wrong extension empty archive
+[ $? = 2 ] || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
rm -f out.tar.lz aout.tar.lz || framework_failure
+#
+"${TARLZ}" --un -cf out.tar foo bar baz || test_failed $LINENO
+"${TARLZ}" --un -cf aout.tar foo || test_failed $LINENO
+"${TARLZ}" --un -rf aout.tar foo bar baz --exclude foo || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+rm -f aout.tar || framework_failure
+touch aout.tar.lz empty || framework_failure # wrong extension empty file
+"${TARLZ}" --un -q -rf aout.tar.lz foo bar baz
+[ $? = 2 ] || test_failed $LINENO
+cmp aout.tar.lz empty || test_failed $LINENO
+rm -f aout.tar.lz empty || framework_failure
+touch aout.tar || framework_failure # append to empty file
+"${TARLZ}" --un -rf aout.tar foo bar baz || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" --un -rf aout.tar || test_failed $LINENO # append nothing
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" --un -rf aout.tar -C nx_dir || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" --un -q -rf aout.tar nx_file
+[ $? = 1 ] || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" --un -q -rf aout.tar aout.tar || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" --un -r foo bar baz > aout.tar || test_failed $LINENO # to stdout
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" -0 -q -rf aout.tar foo bar baz # wrong extension archive
+[ $? = 2 ] || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+cat "${eof}" > aout.tar || framework_failure # append to empty archive
+"${TARLZ}" --un -rf aout.tar foo bar baz || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" -0 -q -rf aout.tar foo bar baz # wrong extension empty archive
+[ $? = 2 ] || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+rm -f out.tar aout.tar || framework_failure
# append to solid archive
"${TARLZ}" --solid -q -0 -cf out.tar.lz "${in}" foo bar || test_failed $LINENO
@@ -483,9 +681,11 @@ rm -f foo bar baz || framework_failure
if cmp out.tar aout.tar > /dev/null ; then
printf "\nwarning: --diff test can't be run as root."
else
- "${TARLZ}" -q -df "${test3_lz}"
+ "${TARLZ}" -df "${test3_lz}" > /dev/null
[ $? = 1 ] || test_failed $LINENO
"${TARLZ}" -df "${test3_lz}" --ignore-ids || test_failed $LINENO
+ "${TARLZ}" -df "${test3_lz}" --exclude '*' || test_failed $LINENO
+ "${TARLZ}" -df "${in_tar_lz}" --exclude '*' || test_failed $LINENO
fi
rm -f out.tar aout.tar foo bar baz || framework_failure
@@ -505,10 +705,7 @@ rmdir dir1 || framework_failure
rmdir dir1
rm -f out.tar || framework_failure
-touch dummy_file || framework_failure
-if ln dummy_file dummy_link 2> /dev/null &&
- ln -s dummy_file dummy_slink 2> /dev/null ; then
- ln_works=yes
+if [ "${ln_works}" = yes ] ; then
name_100=name_100_bytes_long_nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn
path_100=dir1/dir2/dir3/path_100_bytes_long_nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn
path_106=dir1/dir2/dir3/path_longer_than_100_bytes_nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn
@@ -543,15 +740,12 @@ if ln dummy_file dummy_link 2> /dev/null &&
cmp "${in}" dir1/dir2/dir3/link || test_failed $LINENO
"${TARLZ}" -0 -q -c ../tmp/dir1 | "${TARLZ}" -x || test_failed $LINENO
diff -ru tmp/dir1 dir1 || test_failed $LINENO
- rm -rf tmp/dir1 dir1 || framework_failure
+ rm -rf tmp dir1 || framework_failure
"${TARLZ}" -xf "${testdir}"/ts_in_link.tar.lz || test_failed $LINENO
"${TARLZ}" -df "${testdir}"/ts_in_link.tar.lz --ignore-ids ||
test_failed $LINENO
rm -f link1 link2 link3 link4 || framework_failure
-else
- printf "\nwarning: skipping link test: 'ln' does not work on your system."
fi
-rm -f dummy_slink dummy_link dummy_file || framework_failure
printf "\ntesting long names..."
diff --git a/testsuite/eof.tar b/testsuite/eof.tar
new file mode 100644
index 0000000..06d7405
--- /dev/null
+++ b/testsuite/eof.tar
Binary files differ
diff --git a/testsuite/test.txt.tar.lz b/testsuite/test.txt.tar.lz
index 15c0131..306eeeb 100644
--- a/testsuite/test.txt.tar.lz
+++ b/testsuite/test.txt.tar.lz
Binary files differ
diff --git a/testsuite/test_bad1.txt.tar.lz b/testsuite/test_bad1.txt.tar.lz
index f14173d..afb1e85 100644
--- a/testsuite/test_bad1.txt.tar.lz
+++ b/testsuite/test_bad1.txt.tar.lz
Binary files differ
diff --git a/testsuite/test_bad2.txt.tar.lz b/testsuite/test_bad2.txt.tar.lz
index 5d30ebf..598e121 100644
--- a/testsuite/test_bad2.txt.tar.lz
+++ b/testsuite/test_bad2.txt.tar.lz
Binary files differ