summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2019-01-23 17:42:00 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2019-01-23 17:42:00 +0000
commitaa4d2adf37f7449dd1a99df517de0a9ee97867bd (patch)
tree64105f8a638430ae309b44e2a745ff3c027b2eb1
parentAdding upstream version 0.8. (diff)
downloadtarlz-aa4d2adf37f7449dd1a99df517de0a9ee97867bd.tar.xz
tarlz-aa4d2adf37f7449dd1a99df517de0a9ee97867bd.zip
Adding upstream version 0.9.upstream/0.9
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
-rw-r--r--ChangeLog11
-rw-r--r--INSTALL11
-rw-r--r--Makefile.in10
-rw-r--r--NEWS26
-rw-r--r--README39
-rw-r--r--arg_parser.cc2
-rw-r--r--arg_parser.h2
-rwxr-xr-xconfigure6
-rw-r--r--create.cc55
-rw-r--r--doc/tarlz.125
-rw-r--r--doc/tarlz.info153
-rw-r--r--doc/tarlz.texi136
-rw-r--r--extract.cc335
-rw-r--r--list_lz.cc699
-rw-r--r--lzip.h42
-rw-r--r--lzip_index.cc204
-rw-r--r--lzip_index.h87
-rw-r--r--main.cc113
-rw-r--r--tarlz.h68
-rwxr-xr-xtestsuite/check.sh92
-rw-r--r--testsuite/tar_in_tlz1.tar.lzbin0 -> 7680 bytes
-rw-r--r--testsuite/tar_in_tlz2.tar.lzbin0 -> 7807 bytes
-rw-r--r--testsuite/test3_eof1.tar.lzbin0 -> 312 bytes
-rw-r--r--testsuite/test3_eof2.tar.lzbin0 -> 352 bytes
-rw-r--r--testsuite/test3_eof3.tar.lzbin0 -> 396 bytes
25 files changed, 1762 insertions, 354 deletions
diff --git a/ChangeLog b/ChangeLog
index 3e13c24..133419a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2019-01-22 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.9 released.
+ * Implemented multi-threaded '-t, --list'.
+ * Added new option '-n, --threads'.
+ * Recognize global pax headers. Ignore them for now.
+ * strtoul has been replaced with length-safe parsers.
+ * tarlz.texi: Added new chapter 'Limitations of parallel tar decoding'.
+
2018-12-16 Antonio Diaz Diaz <antonio@gnu.org>
* Version 0.8 released.
@@ -66,7 +75,7 @@
* Version 0.1 released.
-Copyright (C) 2013-2018 Antonio Diaz Diaz.
+Copyright (C) 2013-2019 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute and
diff --git a/INSTALL b/INSTALL
index 680cacf..fd50363 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,11 +1,10 @@
Requirements
------------
You will need a C++ compiler and the lzlib compression library installed.
-I use gcc 5.3.0 and 4.1.2, but the code should compile with any
-standards compliant compiler.
-Lzlib must be version 1.0 or newer, but --keep-damaged requires lzlib
-1.11-rc2 or newer to recover as much data as possible from each damaged
-member.
+I use gcc 5.3.0 and 4.1.2, but the code should compile with any standards
+compliant compiler.
+Lzlib must be version 1.0 or newer, but --keep-damaged requires lzlib 1.11
+or newer to recover as much data as possible from each damaged member.
Gcc is available at http://gcc.gnu.org.
Lzlib is available at http://www.nongnu.org/lzip/lzlib.html.
@@ -66,7 +65,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above.
-Copyright (C) 2013-2018 Antonio Diaz Diaz.
+Copyright (C) 2013-2019 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.
diff --git a/Makefile.in b/Makefile.in
index fdcae2d..8e41edb 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -4,11 +4,11 @@ INSTALL = install
INSTALL_PROGRAM = $(INSTALL) -m 755
INSTALL_DATA = $(INSTALL) -m 644
INSTALL_DIR = $(INSTALL) -d -m 755
-LIBS = -llz
+LIBS = -llz -lpthread
SHELL = /bin/sh
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
-objs = arg_parser.o create.o extract.o main.o
+objs = arg_parser.o lzip_index.o create.o extract.o list_lz.o main.o
.PHONY : all install install-bin install-info install-man \
@@ -31,7 +31,9 @@ main.o : main.cc
$(objs) : Makefile
arg_parser.o : arg_parser.h
create.o : arg_parser.h lzip.h tarlz.h
-extract.o : arg_parser.h lzip.h tarlz.h
+extract.o : arg_parser.h lzip.h lzip_index.h tarlz.h
+list_lz.o : arg_parser.h lzip.h lzip_index.h tarlz.h
+lzip_index.o : lzip.h lzip_index.h
main.o : arg_parser.h tarlz.h
@@ -127,7 +129,9 @@ dist : doc
$(DISTNAME)/testsuite/test.txt.tar.lz \
$(DISTNAME)/testsuite/test_bad[12].txt.tar.lz \
$(DISTNAME)/testsuite/test3.tar.lz \
+ $(DISTNAME)/testsuite/test3_eof[123].tar.lz \
$(DISTNAME)/testsuite/tlz_in_tar[12].tar \
+ $(DISTNAME)/testsuite/tar_in_tlz[12].tar.lz \
$(DISTNAME)/testsuite/test3_dir.tar.lz \
$(DISTNAME)/testsuite/test3_dot.tar.lz \
$(DISTNAME)/testsuite/t155.tar.lz \
diff --git a/NEWS b/NEWS
index 2dcbb9b..cc2b1db 100644
--- a/NEWS
+++ b/NEWS
@@ -1,18 +1,16 @@
-Changes in version 0.8:
+Changes in version 0.9:
-The new option '--anonymous', equivalent to '--owner=root --group=root', has
-been added.
+Multi-threaded '-t, --list' has been implemented. See chapter 'Limitations
+of parallel tar decoding' in the manual for details.
-On extraction and listing, tarlz now removes leading './' strings also from
-member names given in the command line. 'tarlz -xf foo ./bar' now extracts
-member 'bar' from archive 'foo'. (Reported by Viktor Sergiienko in the
-bug-tar mailing list).
+The new option '-n, --threads', which sets the number of decompression
+threads, has been added.
-Tarlz now writes extended headers with all fields zeroed except size,
-chksum, typeflag, magic and version. This prevents old tar programs from
-extracting the extended records as a file in the wrong place (with a
-truncated filename). Tarlz now also sets to zero those fields of the ustar
-header overridden by extended records.
+Tarlz now recognizes global pax headers, but for now ignores them.
-The chapter 'Amendments to pax format', explaining the reasons for the
-differences with the pax format, has been added.
+Tarlz now decodes numerical fields in headers using length-safe parsers
+instead of strtoul to prevent the parser from exceeding the end of the field
+if it does not contain a terminating character.
+
+The new chapter 'Limitations of parallel tar decoding' has been added to the
+manual.
diff --git a/README b/README
index 44edeea..5a9a673 100644
--- a/README
+++ b/README
@@ -1,22 +1,24 @@
Description
-Tarlz is a small and simple implementation of the tar archiver. By default
-tarlz creates, lists and extracts archives in a simplified posix pax format
-compressed with lzip on a per file basis. Each tar member is compressed in
-its own lzip member, as well as the end-of-file blocks. This method is fully
-backward compatible with standard tar tools like GNU tar, which treat the
-resulting multimember tar.lz archive like any other tar.lz archive. Tarlz
-can append files to the end of such compressed archives.
-
-Tarlz can create tar archives with four levels of compression
-granularity; per file, per directory, appendable solid, and solid.
+Tarlz is a combined implementation of the tar archiver and the lzip
+compressor. By default tarlz creates, lists and extracts archives in a
+simplified posix pax format compressed with lzip on a per file basis. Each
+tar member is compressed in its own lzip member, as well as the end-of-file
+blocks. This method adds an indexed lzip layer on top of the tar archive,
+making it possible to decode the archive safely in parallel. The resulting
+multimember tar.lz archive is fully backward compatible with standard tar
+tools like GNU tar, which treat it like any other tar.lz archive. Tarlz can
+append files to the end of such compressed archives.
+
+Tarlz can create tar archives with four levels of compression granularity;
+per file, per directory, appendable solid, and solid.
Of course, compressing each file (or each directory) individually is
less efficient than compressing the whole tar archive, but it has the
following advantages:
* The resulting multimember tar.lz archive can be decompressed in
- parallel with plzip, multiplying the decompression speed.
+ parallel, multiplying the decompression speed.
* New members can be appended to the archive (by removing the EOF
member) just like to an uncompressed tar archive.
@@ -32,13 +34,13 @@ following advantages:
corresponding solidly compressed tar.gz archive, except when
individually compressing files smaller than about 32 KiB.
-Note that the posix pax format has a serious flaw. The metadata stored
-in pax extended records are not protected by any kind of check sequence.
+Note that the posix pax format has a serious flaw. The metadata stored in
+pax extended records are not protected by any kind of check sequence.
Corruption in a long filename may cause the extraction of the file in the
-wrong place without warning. Corruption in a long file size may cause the
+wrong place without warning. Corruption in a large file size may cause the
truncation of the file or the appending of garbage to the file, both
-followed by a spurious warning about a corrupt header far from the place
-of the undetected corruption.
+followed by a spurious warning about a corrupt header far from the place of
+the undetected corruption.
Metadata like filename and file size must be always protected in an archive
format because of the adverse effects of undetected corruption in them,
@@ -51,9 +53,6 @@ a way compatible with standard tar tools.
Tarlz does not understand other tar formats like gnu, oldgnu, star or v7.
-Tarlz is intended as a showcase project for the maintainers of real tar
-programs to evaluate the format and perhaps implement it in their tools.
-
The diagram below shows the correspondence between each tar member
(formed by one or two headers plus optional data) in the tar archive and
each lzip member in the resulting multimember tar.lz archive:
@@ -69,7 +68,7 @@ tar.lz
+===============+=================================================+========+
-Copyright (C) 2013-2018 Antonio Diaz Diaz.
+Copyright (C) 2013-2019 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.
diff --git a/arg_parser.cc b/arg_parser.cc
index 008ebc8..ea32fde 100644
--- a/arg_parser.cc
+++ b/arg_parser.cc
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
- Copyright (C) 2006-2018 Antonio Diaz Diaz.
+ Copyright (C) 2006-2019 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
diff --git a/arg_parser.h b/arg_parser.h
index f015881..ceb9933 100644
--- a/arg_parser.h
+++ b/arg_parser.h
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
- Copyright (C) 2006-2018 Antonio Diaz Diaz.
+ Copyright (C) 2006-2019 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
diff --git a/configure b/configure
index 239d732..a0a9493 100755
--- a/configure
+++ b/configure
@@ -1,12 +1,12 @@
#! /bin/sh
# configure script for Tarlz - Archiver with multimember lzip compression
-# Copyright (C) 2013-2018 Antonio Diaz Diaz.
+# Copyright (C) 2013-2019 Antonio Diaz Diaz.
#
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
pkgname=tarlz
-pkgversion=0.8
+pkgversion=0.9
progname=tarlz
srctrigger=doc/${pkgname}.texi
@@ -170,7 +170,7 @@ echo "LDFLAGS = ${LDFLAGS}"
rm -f Makefile
cat > Makefile << EOF
# Makefile for Tarlz - Archiver with multimember lzip compression
-# Copyright (C) 2013-2018 Antonio Diaz Diaz.
+# Copyright (C) 2013-2019 Antonio Diaz Diaz.
# This file was generated automatically by configure. Don't edit.
#
# This Makefile is free software: you have unlimited permission
diff --git a/create.cc b/create.cc
index ba7d10a..7310aee 100644
--- a/create.cc
+++ b/create.cc
@@ -1,5 +1,5 @@
/* Tarlz - Archiver with multimember lzip compression
- Copyright (C) 2013-2018 Antonio Diaz Diaz.
+ Copyright (C) 2013-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -46,7 +46,7 @@ const CRC32C crc32c;
int cl_owner = -1; // global vars needed by add_member
int cl_group = -1;
-int cl_solid = 0; // 1 = dsolid, 2 = asolid, 3 = solid
+Solidity solidity = no_solid;
namespace {
@@ -110,7 +110,7 @@ bool check_appendable( const int fd, const bool remove_eof )
if( rd == 0 && errno == 0 ) return true; // append to empty archive
if( rd < min_member_size || ( rd != bufsize && errno ) ) return false;
const Lzip_header * const p = (const Lzip_header *)buf; // shut up gcc
- if( !p->verify_magic() ) return false;
+ if( !p->verify_magic() || !p->verify_version() ) return false;
LZ_Decoder * decoder = LZ_decompress_open(); // decompress first header
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ||
LZ_decompress_write( decoder, buf, rd ) != rd ||
@@ -133,8 +133,8 @@ bool check_appendable( const int fd, const bool remove_eof )
Lzip_header header;
if( seek_read( fd, header.data, Lzip_header::size,
end - member_size ) != Lzip_header::size ) return false;
- if( !header.verify_magic() || !isvalid_ds( header.dictionary_size() ) )
- return false;
+ if( !header.verify_magic() || !header.verify_version() ||
+ !isvalid_ds( header.dictionary_size() ) ) return false;
const unsigned long long data_size = trailer.data_size();
if( data_size < header_size || data_size > 32256 ) return false;
@@ -218,7 +218,7 @@ void print_hex( char * const buf, int size, unsigned long long num )
while( --size >= 0 ) { buf[size] = xdigit( num & 0x0F ); num >>= 4; }
}
-void print_octal( char * const buf, int size, unsigned long long num )
+void print_octal( uint8_t * const buf, int size, unsigned long long num )
{
while( --size >= 0 ) { buf[size] = '0' + ( num % 8 ); num /= 8; }
}
@@ -230,13 +230,14 @@ unsigned decimal_digits( unsigned long long value )
return digits;
}
-unsigned long long record_size( const unsigned keyword_size,
- const unsigned long long value_size )
+int record_size( const unsigned keyword_size, const unsigned long value_size )
{
// size = ' ' + keyword + '=' + value + '\n'
- const unsigned long long size = 1 + keyword_size + 1 + value_size + 1;
+ unsigned long long size = 1 + keyword_size + 1 + value_size + 1;
const unsigned d1 = decimal_digits( size );
- return decimal_digits( d1 + size ) + size;
+ size += decimal_digits( d1 + size );
+ if( size >= INT_MAX ) size = 0; // overflows snprintf size
+ return size;
}
bool write_extended( const Extended & extended )
@@ -274,9 +275,8 @@ bool write_extended( const Extended & extended )
init_tar_header( header );
header[typeflag_o] = tf_extended; // fill only required fields
print_octal( header + size_o, size_l - 1, edsize );
- print_octal( header + chksum_o, chksum_l - 1,
- ustar_chksum( (const uint8_t *)header ) );
- if( !archive_write( (const uint8_t *)header, header_size ) ) goto error;
+ print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
+ if( !archive_write( header, header_size ) ) goto error;
for( pos = 0; pos < bufsize; ) // write extended records to archive
{
int size = std::min( bufsize - pos, 1ULL << 20 );
@@ -387,7 +387,7 @@ int add_member( const char * const filename, const struct stat *,
typeflag = tf_symlink;
long len;
if( st.st_size <= linkname_l )
- len = readlink( filename, header + linkname_o, linkname_l );
+ len = readlink( filename, (char *)header + linkname_o, linkname_l );
else
{
char * const buf = new char[st.st_size+1];
@@ -414,20 +414,19 @@ int add_member( const char * const filename, const struct stat *,
header[typeflag_o] = typeflag;
const struct passwd * const pw = getpwuid( uid );
if( pw && pw->pw_name )
- std::strncpy( header + uname_o, pw->pw_name, uname_l - 1 );
+ std::strncpy( (char *)header + uname_o, pw->pw_name, uname_l - 1 );
const struct group * const gr = getgrgid( gid );
if( gr && gr->gr_name )
- std::strncpy( header + gname_o, gr->gr_name, gname_l - 1 );
+ std::strncpy( (char *)header + gname_o, gr->gr_name, gname_l - 1 );
if( file_size >= 1ULL << 33 ) extended.size = file_size;
else print_octal( header + size_o, size_l - 1, file_size );
- print_octal( header + chksum_o, chksum_l - 1,
- ustar_chksum( (const uint8_t *)header ) );
+ print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
const int infd = file_size ? open_instream( filename ) : -1;
if( file_size && infd < 0 ) { gretval = 1; return 0; }
if( !extended.empty() && !write_extended( extended ) )
{ show_error( "Error writing extended header", errno ); return 1; }
- if( !archive_write( (const uint8_t *)header, header_size ) )
+ if( !archive_write( header, header_size ) )
{ show_error( "Error writing ustar header", errno ); return 1; }
if( file_size )
{
@@ -460,7 +459,7 @@ int add_member( const char * const filename, const struct stat *,
if( close( infd ) != 0 )
{ show_file_error( filename, "Error closing file", errno ); return 1; }
}
- if( encoder && cl_solid == 0 && !archive_write( 0, 0 ) ) // flush encoder
+ if( encoder && solidity == no_solid && !archive_write( 0, 0 ) )
{ show_error( "Error flushing encoder", errno ); return 1; }
if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
return 0;
@@ -469,18 +468,18 @@ int add_member( const char * const filename, const struct stat *,
} // end namespace
-unsigned ustar_chksum( const uint8_t * const buf )
+unsigned ustar_chksum( const uint8_t * const header )
{
unsigned chksum = chksum_l * 0x20; // treat chksum field as spaces
- for( int i = 0; i < chksum_o; ++i ) chksum += buf[i];
- for( int i = chksum_o + chksum_l; i < header_size; ++i ) chksum += buf[i];
+ for( int i = 0; i < chksum_o; ++i ) chksum += header[i];
+ for( int i = chksum_o + chksum_l; i < header_size; ++i ) chksum += header[i];
return chksum;
}
-bool verify_ustar_chksum( const uint8_t * const buf )
- { return ( verify_ustar_magic( buf ) &&
- ustar_chksum( buf ) == strtoul( (const char *)buf + chksum_o, 0, 8 ) ); }
+bool verify_ustar_chksum( const uint8_t * const header )
+ { return ( verify_ustar_magic( header ) &&
+ ustar_chksum( header ) == parse_octal( header + chksum_o, chksum_l ) ); }
int concatenate( const std::string & archive_name, const Arg_parser & parser,
@@ -611,7 +610,7 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
if( gretval < 1 ) gretval = 1; }
else if( ( retval = nftw( filename, add_member, 16, FTW_PHYS ) ) != 0 )
break; // write error
- else if( encoder && cl_solid == 1 && !archive_write( 0, 0 ) ) // flush encoder
+ else if( encoder && solidity == dsolid && !archive_write( 0, 0 ) )
{ show_error( "Error flushing encoder", errno ); retval = 1; }
}
@@ -620,7 +619,7 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
enum { bufsize = 2 * header_size };
uint8_t buf[bufsize];
std::memset( buf, 0, bufsize );
- if( encoder && cl_solid == 2 && !archive_write( 0, 0 ) ) // flush encoder
+ if( encoder && solidity == asolid && !archive_write( 0, 0 ) )
{ show_error( "Error flushing encoder", errno ); retval = 1; }
else if( !archive_write( buf, bufsize ) ||
( encoder && !archive_write( 0, 0 ) ) ) // flush encoder
diff --git a/doc/tarlz.1 b/doc/tarlz.1
index 906fee0..b83a7e6 100644
--- a/doc/tarlz.1
+++ b/doc/tarlz.1
@@ -1,18 +1,20 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
-.TH TARLZ "1" "December 2018" "tarlz 0.8" "User Commands"
+.TH TARLZ "1" "January 2019" "tarlz 0.9" "User Commands"
.SH NAME
tarlz \- creates tar archives with multimember lzip compression
.SH SYNOPSIS
.B tarlz
[\fI\,options\/\fR] [\fI\,files\/\fR]
.SH DESCRIPTION
-Tarlz is a small and simple implementation of the tar archiver. By default
-tarlz creates, lists and extracts archives in a simplified posix pax format
-compressed with lzip on a per file basis. Each tar member is compressed in
-its own lzip member, as well as the end\-of\-file blocks. This method is fully
-backward compatible with standard tar tools like GNU tar, which treat the
-resulting multimember tar.lz archive like any other tar.lz archive. Tarlz
-can append files to the end of such compressed archives.
+Tarlz is a combined implementation of the tar archiver and the lzip
+compressor. By default tarlz creates, lists and extracts archives in a
+simplified posix pax format compressed with lzip on a per file basis. Each
+tar member is compressed in its own lzip member, as well as the end\-of\-file
+blocks. This method adds an indexed lzip layer on top of the tar archive,
+making it possible to decode the archive safely in parallel. The resulting
+multimember tar.lz archive is fully backward compatible with standard tar
+tools like GNU tar, which treat it like any other tar.lz archive. Tarlz can
+append files to the end of such compressed archives.
.PP
The tarlz file format is a safe posix\-style backup format. In case of
corruption, tarlz can extract all the undamaged members from the tar.lz
@@ -40,6 +42,9 @@ change to directory <dir>
\fB\-f\fR, \fB\-\-file=\fR<archive>
use archive file <archive>
.TP
+\fB\-n\fR, \fB\-\-threads=\fR<n>
+set number of decompression threads [2]
+.TP
\fB\-q\fR, \fB\-\-quiet\fR
suppress all messages
.TP
@@ -97,8 +102,8 @@ Report bugs to lzip\-bug@nongnu.org
.br
Tarlz home page: http://www.nongnu.org/lzip/tarlz.html
.SH COPYRIGHT
-Copyright \(co 2018 Antonio Diaz Diaz.
-Using lzlib 1.11\-rc2
+Copyright \(co 2019 Antonio Diaz Diaz.
+Using lzlib 1.11
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
.br
This is free software: you are free to change and redistribute it.
diff --git a/doc/tarlz.info b/doc/tarlz.info
index d6d17d0..7f90766 100644
--- a/doc/tarlz.info
+++ b/doc/tarlz.info
@@ -11,7 +11,7 @@ File: tarlz.info, Node: Top, Next: Introduction, Up: (dir)
Tarlz Manual
************
-This manual is for Tarlz (version 0.8, 16 December 2018).
+This manual is for Tarlz (version 0.9, 22 January 2019).
* Menu:
@@ -19,12 +19,13 @@ This manual is for Tarlz (version 0.8, 16 December 2018).
* Invoking tarlz:: Command line interface
* File format:: Detailed format of the compressed archive
* Amendments to pax format:: The reasons for the differences with pax
+* Multi-threaded tar:: Limitations of parallel tar decoding
* Examples:: A small tutorial with examples
* Problems:: Reporting bugs
* Concept index:: Index of concepts
- Copyright (C) 2013-2018 Antonio Diaz Diaz.
+ Copyright (C) 2013-2019 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to
copy, distribute and modify it.
@@ -35,12 +36,14 @@ File: tarlz.info, Node: Introduction, Next: Invoking tarlz, Prev: Top, Up: T
1 Introduction
**************
-Tarlz is a small and simple implementation of the tar archiver. By
-default tarlz creates, lists and extracts archives in a simplified
-posix pax format compressed with lzip on a per file basis. Each tar
-member is compressed in its own lzip member, as well as the end-of-file
-blocks. This method is fully backward compatible with standard tar tools
-like GNU tar, which treat the resulting multimember tar.lz archive like
+Tarlz is a combined implementation of the tar archiver and the lzip
+compressor. By default tarlz creates, lists and extracts archives in a
+simplified posix pax format compressed with lzip on a per file basis.
+Each tar member is compressed in its own lzip member, as well as the
+end-of-file blocks. This method adds an indexed lzip layer on top of
+the tar archive, making it possible to decode the archive safely in
+parallel. The resulting multimember tar.lz archive is fully backward
+compatible with standard tar tools like GNU tar, which treat it like
any other tar.lz archive. Tarlz can append files to the end of such
compressed archives.
@@ -52,7 +55,7 @@ less efficient than compressing the whole tar archive, but it has the
following advantages:
* The resulting multimember tar.lz archive can be decompressed in
- parallel with plzip, multiplying the decompression speed.
+ parallel, multiplying the decompression speed.
* New members can be appended to the archive (by removing the EOF
member) just like to an uncompressed tar archive.
@@ -74,10 +77,6 @@ with standard tar tools. *Note crc32::.
Tarlz does not understand other tar formats like 'gnu', 'oldgnu',
'star' or 'v7'.
- Tarlz is intended as a showcase project for the maintainers of real
-tar programs to evaluate the format and perhaps implement it in their
-tools.
-

File: tarlz.info, Node: Invoking tarlz, Next: File format, Prev: Introduction, Up: Top
@@ -141,6 +140,21 @@ archive 'foo'.
Use archive file ARCHIVE. '-' used as an ARCHIVE argument reads
from standard input or writes to standard output.
+'-n N'
+'--threads=N'
+ Set the number of decompression threads, overriding the system's
+ default. Valid values range from 0 to "as many as your system can
+ support". A value of 0 disables threads entirely. If this option
+ is not used, tarlz tries to detect the number of processors in the
+ system and use it as default value. 'tarlz --help' shows the
+ system's default value. This option currently only has effect when
+ listing the contents of a multimember compressed archive. *Note
+ Multi-threaded tar::.
+
+ Note that the number of usable threads is limited during
+ decompression to the number of lzip members in the tar.lz archive,
+ which you can find by running 'lzip -lv archive.tar.lz'.
+
'-q'
'--quiet'
Quiet operation. Suppress all messages.
@@ -288,6 +302,11 @@ following sequence:
* Zero or more blocks that contain the contents of the file.
+ Each tar member must be contiguously stored in a lzip member for the
+parallel decoding operations like '--list' to work. If any tar member
+is split over two or more lzip members, the archive must be decoded
+sequentially. *Note Multi-threaded tar::.
+
At the end of the archive file there are two 512-byte blocks filled
with binary zeros, interpreted as an end-of-archive indicator. These EOF
blocks are either compressed in a separate lzip member or compressed
@@ -417,19 +436,12 @@ record is used to store the linkname.
The mode field provides 12 access permission bits. The following
table shows the symbolic name of each bit and its octal value:
-Bit Name Bit value
-S_ISUID 04000
-S_ISGID 02000
-S_ISVTX 01000
-S_IRUSR 00400
-S_IWUSR 00200
-S_IXUSR 00100
-S_IRGRP 00040
-S_IWGRP 00020
-S_IXGRP 00010
-S_IROTH 00004
-S_IWOTH 00002
-S_IXOTH 00001
+Bit Name Value Bit Name Value Bit Name Value
+---------------------------------------------------
+S_ISUID 04000 S_ISGID 02000 S_ISVTX 01000
+S_IRUSR 00400 S_IWUSR 00200 S_IXUSR 00100
+S_IRGRP 00040 S_IWGRP 00020 S_IXGRP 00010
+S_IROTH 00004 S_IWOTH 00002 S_IXOTH 00001
The uid and gid fields are the user and group ID of the owner and
group of the file, respectively.
@@ -485,12 +497,16 @@ file archived:
The magic field contains the ASCII null-terminated string "ustar".
The version field contains the characters "00" (0x30,0x30). The fields
-uname, and gname are null-terminated character strings. Each numeric
-field contains a leading zero-filled, null-terminated octal number using
-digits from the ISO/IEC 646:1991 (ASCII) standard.
+uname, and gname are null-terminated character strings except when all
+characters in the array contain non-null characters including the last
+character. Each numeric field contains a leading space- or zero-filled,
+optionally null-terminated octal number using digits from the ISO/IEC
+646:1991 (ASCII) standard. Tarlz is able to decode numeric fields 1
+byte larger than standard ustar by not requiring a terminating null
+character.

-File: tarlz.info, Node: Amendments to pax format, Next: Examples, Prev: File format, Up: Top
+File: tarlz.info, Node: Amendments to pax format, Next: Multi-threaded tar, Prev: File format, Up: Top
4 The reasons for the differences with pax
******************************************
@@ -508,7 +524,7 @@ and the concrete reasons to implement them.
The posix pax format has a serious flaw. The metadata stored in pax
extended records are not protected by any kind of check sequence.
Corruption in a long filename may cause the extraction of the file in
-the wrong place without warning. Corruption in a long file size may
+the wrong place without warning. Corruption in a large file size may
cause the truncation of the file or the appending of garbage to the
file, both followed by a spurious warning about a corrupt header far
from the place of the undetected corruption.
@@ -573,9 +589,57 @@ prevents accidental double UTF-8 conversions. If the need arises this
behavior will be adjusted with a command line option in the future.

-File: tarlz.info, Node: Examples, Next: Problems, Prev: Amendments to pax format, Up: Top
+File: tarlz.info, Node: Multi-threaded tar, Next: Examples, Prev: Amendments to pax format, Up: Top
+
+5 Limitations of parallel tar decoding
+**************************************
+
+Safely decoding an arbitrary tar archive in parallel is impossible. For
+example, if a tar archive containing another tar archive is decoded
+starting from some position other than the beginning, there is no way
+to know if the first header found there belongs to the outer tar
+archive or to the inner tar archive. Tar is a format inherently serial;
+it was designed for tapes.
+
+ In the case of compressed tar archives, the start of each compressed
+block determines one point through which the tar archive can be decoded
+in parallel. Therefore, in tar.lz archives the decoding operations
+can't be parallelized if the tar members are not aligned with the lzip
+members. Tar archives compressed with plzip can't be decoded in
+parallel because tar and plzip do not have a way to align both sets of
+members. Certainly one can decompress one such archive with a
+multi-threaded tool like plzip, but the increase in speed is not as
+large as it could be because plzip must serialize the decompressed data
+and pass them to tar, which decodes them sequentially, one tar member
+at a time.
+
+ On the other hand, if the tar.lz archive is created with a tool like
+tarlz, which can guarantee the alignment between tar members and lzip
+members because it controls both archiving and compression, then the
+lzip format becomes an indexed layer on top of the tar archive which
+makes possible decoding it safely in parallel.
+
+ Tarlz is able to automatically decode aligned and unaligned
+multimember tar.lz archives, keeping backwards compatibility. If tarlz
+finds a member misalignment during multi-threaded decoding, it switches
+to single-threaded mode and continues decoding the archive. Currently
+only the '--list' option is able to do multi-threaded decoding.
+
+ If the files in the archive are large, multi-threaded '--list' on a
+regular tar.lz archive can be hundreds of times faster than sequential
+'--list' because, in addition to using several processors, it only
+needs to decompress part of each lzip member. See the following example
+listing the Silesia corpus on a dual core machine:
+
+ tarlz -9 -cf silesia.tar.lz silesia
+ time lzip -cd silesia.tar.lz | tar -tf - (5.032s)
+ time plzip -cd silesia.tar.lz | tar -tf - (3.256s)
+ time tarlz -tf silesia.tar.lz (0.020s)
+
+
+File: tarlz.info, Node: Examples, Next: Problems, Prev: Multi-threaded tar, Up: Top
-5 A small tutorial with examples
+6 A small tutorial with examples
********************************
Example 1: Create a multimember compressed archive 'archive.tar.lz'
@@ -633,7 +697,7 @@ Example 8: Copy the contents of directory 'sourcedir' to the directory

File: tarlz.info, Node: Problems, Next: Concept index, Prev: Examples, Up: Top
-6 Reporting bugs
+7 Reporting bugs
****************
There are probably bugs in tarlz. There are certainly errors and
@@ -670,16 +734,17 @@ Concept index

Tag Table:
Node: Top223
-Node: Introduction946
-Node: Invoking tarlz3084
-Node: File format9606
-Ref: key_crc3214138
-Node: Amendments to pax format19215
-Ref: crc3219729
-Ref: flawed-compat20753
-Node: Examples23126
-Node: Problems24802
-Node: Concept index25328
+Node: Introduction1012
+Node: Invoking tarlz3124
+Node: File format10384
+Ref: key_crc3215169
+Node: Amendments to pax format20586
+Ref: crc3221110
+Ref: flawed-compat22135
+Node: Multi-threaded tar24508
+Node: Examples27012
+Node: Problems28682
+Node: Concept index29208

End Tag Table
diff --git a/doc/tarlz.texi b/doc/tarlz.texi
index 4c6d16a..d9bdc14 100644
--- a/doc/tarlz.texi
+++ b/doc/tarlz.texi
@@ -6,8 +6,8 @@
@finalout
@c %**end of header
-@set UPDATED 16 December 2018
-@set VERSION 0.8
+@set UPDATED 22 January 2019
+@set VERSION 0.9
@dircategory Data Compression
@direntry
@@ -39,13 +39,14 @@ This manual is for Tarlz (version @value{VERSION}, @value{UPDATED}).
* Invoking tarlz:: Command line interface
* File format:: Detailed format of the compressed archive
* Amendments to pax format:: The reasons for the differences with pax
+* Multi-threaded tar:: Limitations of parallel tar decoding
* Examples:: A small tutorial with examples
* Problems:: Reporting bugs
* Concept index:: Index of concepts
@end menu
@sp 1
-Copyright @copyright{} 2013-2018 Antonio Diaz Diaz.
+Copyright @copyright{} 2013-2019 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission
to copy, distribute and modify it.
@@ -55,18 +56,20 @@ to copy, distribute and modify it.
@chapter Introduction
@cindex introduction
-@uref{http://www.nongnu.org/lzip/tarlz.html,,Tarlz} is a small and simple
-implementation of the tar archiver. By default tarlz creates, lists and
-extracts archives in a simplified posix pax format compressed with
-@uref{http://www.nongnu.org/lzip/lzip.html,,lzip} on a per file basis. Each
-tar member is compressed in its own lzip member, as well as the end-of-file
-blocks. This method is fully backward compatible with standard tar tools
-like GNU tar, which treat the resulting multimember tar.lz archive like any
-other tar.lz archive. Tarlz can append files to the end of such compressed
-archives.
-
-Tarlz can create tar archives with four levels of compression
-granularity; per file, per directory, appendable solid, and solid.
+@uref{http://www.nongnu.org/lzip/tarlz.html,,Tarlz} is a combined
+implementation of the tar archiver and the
+@uref{http://www.nongnu.org/lzip/lzip.html,,lzip} compressor. By default
+tarlz creates, lists and extracts archives in a simplified posix pax format
+compressed with lzip on a per file basis. Each tar member is compressed in
+its own lzip member, as well as the end-of-file blocks. This method adds an
+indexed lzip layer on top of the tar archive, making it possible to decode
+the archive safely in parallel. The resulting multimember tar.lz archive is
+fully backward compatible with standard tar tools like GNU tar, which treat
+it like any other tar.lz archive. Tarlz can append files to the end of such
+compressed archives.
+
+Tarlz can create tar archives with four levels of compression granularity;
+per file, per directory, appendable solid, and solid.
@noindent
Of course, compressing each file (or each directory) individually is
@@ -76,7 +79,7 @@ following advantages:
@itemize @bullet
@item
The resulting multimember tar.lz archive can be decompressed in
-parallel with plzip, multiplying the decompression speed.
+parallel, multiplying the decompression speed.
@item
New members can be appended to the archive (by removing the EOF
@@ -102,9 +105,6 @@ standard tar tools. @xref{crc32}.
Tarlz does not understand other tar formats like @samp{gnu}, @samp{oldgnu},
@samp{star} or @samp{v7}.
-Tarlz is intended as a showcase project for the maintainers of real tar
-programs to evaluate the format and perhaps implement it in their tools.
-
@node Invoking tarlz
@chapter Invoking tarlz
@@ -174,6 +174,20 @@ previous @code{-C} option.
Use archive file @var{archive}. @samp{-} used as an @var{archive}
argument reads from standard input or writes to standard output.
+@item -n @var{n}
+@itemx --threads=@var{n}
+Set the number of decompression threads, overriding the system's default.
+Valid values range from 0 to "as many as your system can support". A value
+of 0 disables threads entirely. If this option is not used, tarlz tries to
+detect the number of processors in the system and use it as default value.
+@w{@samp{tarlz --help}} shows the system's default value. This option
+currently only has effect when listing the contents of a multimember
+compressed archive. @xref{Multi-threaded tar}.
+
+Note that the number of usable threads is limited during decompression to
+the number of lzip members in the tar.lz archive, which you can find by
+running @w{@code{lzip -lv archive.tar.lz}}.
+
@item -q
@itemx --quiet
Quiet operation. Suppress all messages.
@@ -335,6 +349,11 @@ associated fields in this header block for this file.
Zero or more blocks that contain the contents of the file.
@end itemize
+Each tar member must be contiguously stored in a lzip member for the
+parallel decoding operations like @code{--list} to work. If any tar member
+is split over two or more lzip members, the archive must be decoded
+sequentially. @xref{Multi-threaded tar}.
+
At the end of the archive file there are two 512-byte blocks filled with
binary zeros, interpreted as an end-of-archive indicator. These EOF
blocks are either compressed in a separate lzip member or compressed
@@ -481,20 +500,12 @@ is used to store the linkname.
The mode field provides 12 access permission bits. The following table
shows the symbolic name of each bit and its octal value:
-@multitable {Bit Name} {Bit value}
-@item Bit Name @tab Bit value
-@item S_ISUID @tab 04000
-@item S_ISGID @tab 02000
-@item S_ISVTX @tab 01000
-@item S_IRUSR @tab 00400
-@item S_IWUSR @tab 00200
-@item S_IXUSR @tab 00100
-@item S_IRGRP @tab 00040
-@item S_IWGRP @tab 00020
-@item S_IXGRP @tab 00010
-@item S_IROTH @tab 00004
-@item S_IWOTH @tab 00002
-@item S_IXOTH @tab 00001
+@multitable {Bit Name} {Value} {Bit Name} {Value} {Bit Name} {Value}
+@headitem Bit Name @tab Value @tab Bit Name @tab Value @tab Bit Name @tab Value
+@item S_ISUID @tab 04000 @tab S_ISGID @tab 02000 @tab S_ISVTX @tab 01000
+@item S_IRUSR @tab 00400 @tab S_IWUSR @tab 00200 @tab S_IXUSR @tab 00100
+@item S_IRGRP @tab 00040 @tab S_IWGRP @tab 00020 @tab S_IXGRP @tab 00010
+@item S_IROTH @tab 00004 @tab S_IWOTH @tab 00002 @tab S_IXOTH @tab 00001
@end multitable
The uid and gid fields are the user and group ID of the owner and group
@@ -551,10 +562,13 @@ regular file (type 0).
@end table
The magic field contains the ASCII null-terminated string "ustar". The
-version field contains the characters "00" (0x30,0x30). The fields
-uname, and gname are null-terminated character strings. Each numeric
-field contains a leading zero-filled, null-terminated octal number using
-digits from the ISO/IEC 646:1991 (ASCII) standard.
+version field contains the characters "00" (0x30,0x30). The fields uname,
+and gname are null-terminated character strings except when all characters
+in the array contain non-null characters including the last character. Each
+numeric field contains a leading space- or zero-filled, optionally
+null-terminated octal number using digits from the ISO/IEC 646:1991 (ASCII)
+standard. Tarlz is able to decode numeric fields 1 byte larger than standard
+ustar by not requiring a terminating null character.
@node Amendments to pax format
@@ -574,7 +588,7 @@ concrete reasons to implement them.
The posix pax format has a serious flaw. The metadata stored in pax extended
records are not protected by any kind of check sequence. Corruption in a
long filename may cause the extraction of the file in the wrong place
-without warning. Corruption in a long file size may cause the truncation of
+without warning. Corruption in a large file size may cause the truncation of
the file or the appending of garbage to the file, both followed by a
spurious warning about a corrupt header far from the place of the undetected
corruption.
@@ -636,6 +650,52 @@ double UTF-8 conversions. If the need arises this behavior will be adjusted
with a command line option in the future.
+@node Multi-threaded tar
+@chapter Limitations of parallel tar decoding
+
+Safely decoding an arbitrary tar archive in parallel is impossible. For
+example, if a tar archive containing another tar archive is decoded starting
+from some position other than the beginning, there is no way to know if the
+first header found there belongs to the outer tar archive or to the inner
+tar archive. Tar is a format inherently serial; it was designed for tapes.
+
+In the case of compressed tar archives, the start of each compressed block
+determines one point through which the tar archive can be decoded in
+parallel. Therefore, in tar.lz archives the decoding operations can't be
+parallelized if the tar members are not aligned with the lzip members. Tar
+archives compressed with plzip can't be decoded in parallel because tar and
+plzip do not have a way to align both sets of members. Certainly one can
+decompress one such archive with a multi-threaded tool like plzip, but the
+increase in speed is not as large as it could be because plzip must
+serialize the decompressed data and pass them to tar, which decodes them
+sequentially, one tar member at a time.
+
+On the other hand, if the tar.lz archive is created with a tool like tarlz,
+which can guarantee the alignment between tar members and lzip members
+because it controls both archiving and compression, then the lzip format
+becomes an indexed layer on top of the tar archive which makes possible
+decoding it safely in parallel.
+
+Tarlz is able to automatically decode aligned and unaligned multimember
+tar.lz archives, keeping backwards compatibility. If tarlz finds a member
+misalignment during multi-threaded decoding, it switches to single-threaded
+mode and continues decoding the archive. Currently only the @code{--list}
+option is able to do multi-threaded decoding.
+
+If the files in the archive are large, multi-threaded @code{--list} on a
+regular tar.lz archive can be hundreds of times faster than sequential
+@code{--list} because, in addition to using several processors, it only
+needs to decompress part of each lzip member. See the following example
+listing the Silesia corpus on a dual core machine:
+
+@example
+tarlz -9 -cf silesia.tar.lz silesia
+time lzip -cd silesia.tar.lz | tar -tf - (5.032s)
+time plzip -cd silesia.tar.lz | tar -tf - (3.256s)
+time tarlz -tf silesia.tar.lz (0.020s)
+@end example
+
+
@node Examples
@chapter A small tutorial with examples
@cindex examples
diff --git a/extract.cc b/extract.cc
index 58cda61..e25f5b6 100644
--- a/extract.cc
+++ b/extract.cc
@@ -1,5 +1,5 @@
/* Tarlz - Archiver with multimember lzip compression
- Copyright (C) 2013-2018 Antonio Diaz Diaz.
+ Copyright (C) 2013-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,9 @@
#define _FILE_OFFSET_BITS 64
#include <algorithm>
+#include <cctype>
#include <cerrno>
+#include <climits>
#include <cstdio>
#include <cstdlib>
#include <cstring>
@@ -36,13 +38,15 @@
#include "arg_parser.h"
#include "lzip.h"
+#include "lzip_index.h"
#include "tarlz.h"
namespace {
+Resizable_buffer grbuf( initial_line_length );
int gretval = 0;
-bool has_lz_ext; // global var for archive_read
+bool has_lz_ext; // global var for archive_read
void skip_warn( const bool reset = false ) // avoid duplicate warnings
{
@@ -83,13 +87,6 @@ bool make_path( const std::string & name )
}
-inline bool block_is_zero( const uint8_t * const buf, const int size )
- {
- for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false;
- return true;
- }
-
-
// Return value: 0 = OK, 1 = damaged member, 2 = fatal error.
// If sizep and error, return in *sizep the number of bytes read.
// The first 6 bytes of the archive must be intact for islz to be meaningful.
@@ -114,6 +111,7 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
{ show_error( "Error reading archive", errno ); fatal = true; return 2; }
const Lzip_header & header = (*(const Lzip_header *)buf);
bool islz = ( rd >= min_member_size && header.verify_magic() &&
+ header.verify_version() &&
isvalid_ds( header.dictionary_size() ) );
const bool istar = ( rd == size && verify_ustar_chksum( buf ) );
const bool iseof =
@@ -160,8 +158,8 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
skip_warn(); gretval = 2; return 1;
}
if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
- { LZ_decompress_close( decoder );
- show_error( "Archive ends unexpectedly." ); fatal = true; return 2; }
+ { LZ_decompress_close( decoder );
+ show_error( "Archive ends unexpectedly." ); fatal = true; return 2; }
sz += rd; if( sizep ) *sizep = sz;
if( sz == size && LZ_decompress_finished( decoder ) == 1 &&
LZ_decompress_close( decoder ) < 0 )
@@ -185,12 +183,14 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
}
-const char * mode_string( const Tar_header header )
+enum { mode_string_size = 10,
+ group_string_size = 1 + uname_l + 1 + gname_l + 1 }; // 67
+
+void format_mode_string( const Tar_header header, char buf[mode_string_size] )
{
- static char buf[11];
const Typeflag typeflag = (Typeflag)header[typeflag_o];
- std::memcpy( buf, "----------", sizeof buf - 1 );
+ std::memcpy( buf, "----------", mode_string_size );
switch( typeflag )
{
case tf_regular: break;
@@ -203,7 +203,7 @@ const char * mode_string( const Tar_header header )
case tf_hiperf: buf[0] = 'C'; break;
default: buf[0] = '?';
}
- const mode_t mode = strtoul( header + mode_o, 0, 8 ); // 12 bits
+ const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
const bool setuid = mode & S_ISUID;
const bool setgid = mode & S_ISGID;
const bool sticky = mode & S_ISVTX;
@@ -219,46 +219,79 @@ const char * mode_string( const Tar_header header )
if( mode & S_IWOTH ) buf[8] = 'w';
if( mode & S_IXOTH ) buf[9] = sticky ? 't' : 'x';
else if( sticky ) buf[9] = 'T';
- return buf;
}
-const char * user_group_string( const Tar_header header )
+int format_user_group_string( const Tar_header header,
+ char buf[group_string_size] )
{
- enum { bufsize = uname_l + 1 + gname_l + 1 };
- static char buf[bufsize];
-
+ int len;
if( header[uname_o] && header[gname_o] )
- snprintf( buf, bufsize, "%.32s/%.32s", header + uname_o, header + gname_o );
+ len = snprintf( buf, group_string_size,
+ " %.32s/%.32s", header + uname_o, header + gname_o );
else
{
- const int uid = strtoul( header + uid_o, 0, 8 );
- const int gid = strtoul( header + gid_o, 0, 8 );
- snprintf( buf, bufsize, "%u/%u", uid, gid );
+ const unsigned uid = parse_octal( header + uid_o, uid_l );
+ const unsigned gid = parse_octal( header + gid_o, gid_l );
+ len = snprintf( buf, group_string_size, " %u/%u", uid, gid );
}
- return buf;
+ return len;
}
+} // end namespace
-void show_member_name( const Extended & extended, const Tar_header header,
- const int vlevel )
+bool block_is_zero( const uint8_t * const buf, const int size )
{
- if( verbosity < vlevel ) return;
- if( verbosity > vlevel )
+ for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false;
+ return true;
+ }
+
+
+void format_member_name( const Extended & extended, const Tar_header header,
+ Resizable_buffer & rbuf, const bool long_format )
+ {
+ if( long_format )
{
- const time_t mtime = strtoull( header + mtime_o, 0, 8 ); // 33 bits
- const struct tm * const tm = localtime( &mtime );
+ format_mode_string( header, rbuf() );
+ const int group_string_len =
+ format_user_group_string( header, rbuf() + mode_string_size );
+ const int offset = mode_string_size + group_string_len;
+ const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
+ struct tm tms;
+ const struct tm * tm = localtime_r( &mtime, &tms );
+ if( !tm )
+ { time_t z = 0; tm = localtime_r( &z, &tms ); if( !tm ) tm = &tms; }
const Typeflag typeflag = (Typeflag)header[typeflag_o];
const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
const char * const link_string = !islink ? "" :
( ( typeflag == tf_link ) ? " link to " : " -> " );
- std::printf( "%s %s %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n",
- mode_string( header ), user_group_string( header ),
- extended.size, 1900 + tm->tm_year, 1 + tm->tm_mon,
- tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path.c_str(),
- link_string, !islink ? "" : extended.linkpath.c_str() );
+ for( int i = 0; i < 2; ++i )
+ {
+ const int len = snprintf( rbuf() + offset, rbuf.size() - offset,
+ " %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n",
+ extended.size, 1900 + tm->tm_year, 1 + tm->tm_mon,
+ tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path.c_str(),
+ link_string, !islink ? "" : extended.linkpath.c_str() );
+ if( (int)rbuf.size() > len + offset ) break;
+ else rbuf.resize( len + offset + 1 );
+ }
+ }
+ else
+ {
+ if( rbuf.size() < extended.path.size() + 2 )
+ rbuf.resize( extended.path.size() + 2 );
+ snprintf( rbuf(), rbuf.size(), "%s\n", extended.path.c_str() );
}
- else std::printf( "%s\n", extended.path.c_str() );
+ }
+
+namespace {
+
+void show_member_name( const Extended & extended, const Tar_header header,
+ const int vlevel, Resizable_buffer & rbuf )
+ {
+ if( verbosity < vlevel ) return;
+ format_member_name( extended, header, rbuf, verbosity > vlevel );
+ std::fputs( rbuf(), stdout );
std::fflush( stdout );
}
@@ -266,7 +299,7 @@ void show_member_name( const Extended & extended, const Tar_header header,
int list_member( const int infd, const Extended & extended,
const Tar_header header, const bool skip )
{
- if( !skip ) show_member_name( extended, header, 0 );
+ if( !skip ) show_member_name( extended, header, 0, grbuf );
const unsigned bufsize = 32 * header_size;
uint8_t buf[bufsize];
@@ -304,13 +337,13 @@ int extract_member( const int infd, const Extended & extended,
show_file_error( filename, "Contains a '..' component, skipping." );
return list_member( infd, extended, header, true );
}
- const mode_t mode = strtoul( header + mode_o, 0, 8 ); // 12 bits
- const time_t mtime = strtoull( header + mtime_o, 0, 8 ); // 33 bits
+ const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
+ const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
const Typeflag typeflag = (Typeflag)header[typeflag_o];
const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
int outfd = -1;
- show_member_name( extended, header, 1 );
+ show_member_name( extended, header, 1, grbuf );
std::remove( filename );
make_path( filename );
switch( typeflag )
@@ -352,8 +385,9 @@ int extract_member( const int infd, const Extended & extended,
case tf_chardev:
case tf_blockdev:
{
- const unsigned dev = makedev( strtoul( header + devmajor_o, 0, 8 ),
- strtoul( header + devminor_o, 0, 8 ) );
+ const unsigned dev =
+ makedev( parse_octal( header + devmajor_o, devmajor_l ),
+ parse_octal( header + devminor_o, devminor_l ) );
const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode;
if( mknod( filename, dmode, dev ) != 0 )
{
@@ -376,8 +410,8 @@ int extract_member( const int infd, const Extended & extended,
return 2;
}
- const uid_t uid = (uid_t)strtoul( header + uid_o, 0, 8 );
- const gid_t gid = (gid_t)strtoul( header + gid_o, 0, 8 );
+ const uid_t uid = (uid_t)parse_octal( header + uid_o, uid_l );
+ const gid_t gid = (gid_t)parse_octal( header + gid_o, gid_l );
if( !islink && chown( filename, uid, gid ) != 0 &&
errno != EPERM && errno != EINVAL )
{
@@ -423,6 +457,7 @@ int extract_member( const int infd, const Extended & extended,
return 0;
}
+} // end namespace
// Removes any amount of leading "./" and '/' strings.
const char * remove_leading_slash( const char * const filename )
@@ -464,78 +499,163 @@ bool compare_tslash( const char * const name1, const char * const name2 )
return ( !*p && !*q );
}
-} // end namespace
+namespace {
+unsigned long long parse_decimal( const char * const ptr,
+ const char ** const tailp,
+ const unsigned long long size )
+ {
+ unsigned long long result = 0;
+ unsigned long long i = 0;
+ while( i < size && std::isspace( ptr[i] ) ) ++i;
+ if( !std::isdigit( (unsigned char)ptr[i] ) )
+ { if( tailp ) *tailp = ptr; return 0; }
+ for( ; i < size && std::isdigit( (unsigned char)ptr[i] ); ++i )
+ {
+ const unsigned long long prev = result;
+ result *= 10; result += ptr[i] - '0';
+ if( result < prev || result > LLONG_MAX ) // overflow
+ { if( tailp ) *tailp = ptr; return 0; }
+ }
+ if( tailp ) *tailp = ptr + i;
+ return result;
+ }
-bool Extended::parse( const int infd, const Tar_header header,
- const bool permissive )
+
+uint32_t parse_record_crc( const char * const ptr )
{
- const unsigned long long edsize = strtoull( header + size_o, 0, 8 );
+ uint32_t crc = 0;
+ for( int i = 0; i < 8; ++i )
+ {
+ crc <<= 4;
+ if( ptr[i] >= '0' && ptr[i] <= '9' ) crc += ptr[i] - '0';
+ else if( ptr[i] >= 'A' && ptr[i] <= 'F' ) crc += ptr[i] + 10 - 'A';
+ else if( ptr[i] >= 'a' && ptr[i] <= 'f' ) crc += ptr[i] + 10 - 'a';
+ else { crc = 0; break; } // invalid digit in crc string
+ }
+ return crc;
+ }
+
+
+bool parse_records( const int infd, Extended & extended,
+ const Tar_header header, const bool permissive )
+ {
+ const unsigned long long edsize = parse_octal( header + size_o, size_l );
const unsigned long long bufsize = round_up( edsize );
if( bufsize == 0 || edsize == 0 || edsize >= 1ULL << 33 )
return false; // overflow or no extended data
char * const buf = new char[bufsize]; // extended records buffer
- if( archive_read( infd, (uint8_t *)buf, bufsize ) != 0 ) goto error;
+ const bool ret = ( archive_read( infd, (uint8_t *)buf, bufsize ) == 0 &&
+ extended.parse( buf, edsize, permissive ) );
+ delete[] buf;
+ return ret;
+ }
+
+} // end namespace
+
+
+/* Returns the number of bytes really read.
+ If (returned value < size) and (errno == 0), means EOF was reached.
+*/
+int readblock( const int fd, uint8_t * const buf, const int size )
+ {
+ int sz = 0;
+ errno = 0;
+ while( sz < size )
+ {
+ const int n = read( fd, buf + sz, size - sz );
+ if( n > 0 ) sz += n;
+ else if( n == 0 ) break; // EOF
+ else if( errno != EINTR ) break;
+ errno = 0;
+ }
+ return sz;
+ }
+
+
+/* Returns the number of bytes really written.
+ If (returned value < size), it is always an error.
+*/
+int writeblock( const int fd, const uint8_t * const buf, const int size )
+ {
+ int sz = 0;
+ errno = 0;
+ while( sz < size )
+ {
+ const int n = write( fd, buf + sz, size - sz );
+ if( n > 0 ) sz += n;
+ else if( n < 0 && errno != EINTR ) break;
+ errno = 0;
+ }
+ return sz;
+ }
+
+
+unsigned long long parse_octal( const uint8_t * const ptr, const int size )
+ {
+ unsigned long long result = 0;
+ int i = 0;
+ while( i < size && std::isspace( ptr[i] ) ) ++i;
+ for( ; i < size && ptr[i] >= '0' && ptr[i] <= '7'; ++i )
+ { result <<= 3; result += ptr[i] - '0'; }
+ return result;
+ }
+
+
+bool Extended::parse( const char * const buf, const unsigned long long edsize,
+ const bool permissive )
+ {
for( unsigned long long pos = 0; pos < edsize; ) // parse records
{
- char * tail;
- const unsigned long long rsize = strtoull( buf + pos, &tail, 10 );
+ const char * tail;
+ const unsigned long long rsize =
+ parse_decimal( buf + pos, &tail, edsize - pos );
if( rsize == 0 || rsize > edsize - pos || tail[0] != ' ' ||
- buf[pos+rsize-1] != '\n' ) goto error;
+ buf[pos+rsize-1] != '\n' ) return false;
++tail; // point to keyword
- // length of (keyword + '=' + value) without the final newline
- const unsigned long long rest = ( buf + pos + rsize - 1 ) - tail;
+ // rest = length of (keyword + '=' + value) without the final newline
+ const unsigned long long rest = ( buf + ( pos + rsize - 1 ) ) - tail;
if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 )
- { if( path.size() && !permissive ) goto error;
+ { if( path.size() && !permissive ) return false;
path.assign( tail + 5, rest - 5 ); }
else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 )
- { if( linkpath.size() && !permissive ) goto error;
+ { if( linkpath.size() && !permissive ) return false;
linkpath.assign( tail + 9, rest - 9 ); }
else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 )
{
- if( size != 0 && !permissive ) goto error;
- size = 0;
- for( unsigned long long i = 5; i < rest; ++i )
- {
- if( tail[i] < '0' || tail[i] > '9' ) goto error;
- const unsigned long long prev = size;
- size = size * 10 + ( tail[i] - '0' );
- if( size < prev ) goto error; // overflow
- }
- if( size < 1ULL << 33 ) goto error; // size fits in ustar header
+ if( size != 0 && !permissive ) return false;
+ size = parse_decimal( tail + 5, &tail, rest - 5 );
+ // parse error or size fits in ustar header
+ if( size < 1ULL << 33 || tail != buf + ( pos + rsize - 1 ) ) return false;
}
else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 )
{
- if( crc_present && !permissive ) goto error;
- if( rsize != 22 ) goto error;
- char * t;
- const uint32_t stored_crc = strtoul( tail + 10, &t, 16 );
- if( t - tail - 10 != 8 || t[0] != '\n' ) goto error;
+ if( crc_present && !permissive ) return false;
+ if( rsize != 22 ) return false;
+ const uint32_t stored_crc = parse_record_crc( tail + 10 );
const uint32_t computed_crc =
crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize );
crc_present = true;
- if( stored_crc != computed_crc ) goto error;
+ if( stored_crc != computed_crc ) return false;
}
pos += rsize;
}
- delete[] buf;
return true;
-error:
- delete[] buf;
- return false;
}
int decode( const std::string & archive_name, const Arg_parser & parser,
- const int filenames, const bool keep_damaged, const bool listing,
- const bool missing_crc, const bool permissive )
+ const int filenames, const int num_workers, const int debug_level,
+ const bool keep_damaged, const bool listing, const bool missing_crc,
+ const bool permissive )
{
const int infd = archive_name.size() ?
open_instream( archive_name ) : STDIN_FILENO;
if( infd < 0 ) return 1;
- // execute -C options and mark filenames to be extracted or listed
- std::vector< bool > name_pending( parser.arguments(), false );
+ // Execute -C options and mark filenames to be extracted or listed.
+ // name_pending is of type char instead of bool to allow concurrent update.
+ std::vector< char > name_pending( parser.arguments(), false );
for( int i = 0; i < parser.arguments(); ++i )
{
const int code = parser.code( i );
@@ -549,34 +669,57 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
if( !code ) name_pending[i] = true;
}
- has_lz_ext =
+ if( listing && num_workers > 0 ) // multi-threaded --list
+ {
+ const Lzip_index lzip_index( infd, true, false );
+ const long members = lzip_index.members();
+ if( lzip_index.retval() == 0 && ( members >= 3 ||
+ ( members >= 2 && lzip_index.dblock( members - 1 ).size() > 1024 ) ) )
+ { //show_file_error( archive_name.c_str(), "Is compressed seekable" );
+ return list_lz( parser, name_pending, lzip_index, filenames,
+ debug_level, infd, std::min( (long)num_workers, members ),
+ missing_crc, permissive ); }
+ lseek( infd, 0, SEEK_SET );
+ }
+
+ has_lz_ext = // global var for archive_read
( archive_name.size() > 3 &&
archive_name.compare( archive_name.size() - 3, 3, ".lz" ) == 0 ) ||
( archive_name.size() > 4 &&
archive_name.compare( archive_name.size() - 4, 4, ".tlz" ) == 0 );
- Extended extended; // metadata from extended records
+ Extended extended; // metadata from extended records
int retval = 0;
- bool prev_extended = false; // prev header was extended
- while( true ) // process one member per iteration
+ bool prev_extended = false; // prev header was extended
+ while( true ) // process one tar member per iteration
{
- uint8_t buf[header_size];
- const int ret = archive_read( infd, buf, header_size );
+ Tar_header header;
+ const int ret = archive_read( infd, header, header_size );
if( ret == 2 ) return 2;
- if( ret != 0 || !verify_ustar_chksum( buf ) )
+ if( ret != 0 || !verify_ustar_chksum( header ) )
{
- if( ret == 0 && block_is_zero( buf, header_size ) ) break; // EOF
+ if( ret == 0 && block_is_zero( header, header_size ) ) break; // EOF
skip_warn(); gretval = 2; continue;
}
- skip_warn( true ); // reset warning
+ skip_warn( true ); // reset warning
- const char * const header = (const char *)buf;
const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ if( typeflag == tf_global )
+ {
+ if( prev_extended )
+ { show_error( "Format violation: global header after extended header." );
+ return 2; }
+ Extended dummy; // global headers are parsed and ignored
+ if( !parse_records( infd, dummy, header, true ) )
+ { show_error( "Error in global extended records. Skipping to next header." );
+ gretval = 2; }
+ continue;
+ }
if( typeflag == tf_extended )
{
if( prev_extended && !permissive )
{ show_error( "Format violation: consecutive extended headers found."
- /*" Use --permissive."*/, 0, true ); return 2; }
- if( !extended.parse( infd, header, permissive ) )
+ /*" Use --permissive.", 0, true*/ ); return 2; }
+ if( !parse_records( infd, extended, header, permissive ) )
{ show_error( "Error in extended records. Skipping to next header." );
extended.reset(); gretval = 2; }
else if( !extended.crc_present && missing_crc )
@@ -586,7 +729,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
}
prev_extended = false;
- if( extended.linkpath.empty() )
+ if( extended.linkpath.empty() ) // copy linkpath from ustar header
{
for( int i = 0; i < linkname_l && header[linkname_o+i]; ++i )
extended.linkpath += header[linkname_o+i];
@@ -595,7 +738,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
extended.linkpath.resize( extended.linkpath.size() - 1 );
}
- if( extended.path.empty() )
+ if( extended.path.empty() ) // copy path from ustar header
{
char stored_name[prefix_l+1+name_l+1];
int len = 0;
@@ -624,7 +767,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
if( extended.size == 0 &&
( typeflag == tf_regular || typeflag == tf_hiperf ) )
- extended.size = strtoull( header + size_o, 0, 8 );
+ extended.size = parse_octal( header + size_o, size_l );
if( listing || skip )
retval = list_member( infd, extended, header, skip );
diff --git a/list_lz.cc b/list_lz.cc
new file mode 100644
index 0000000..d41d2b7
--- /dev/null
+++ b/list_lz.cc
@@ -0,0 +1,699 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2019 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <queue>
+#include <string>
+#include <vector>
+#include <pthread.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <lzlib.h>
+
+#include "arg_parser.h"
+#include "lzip.h"
+#include "lzip_index.h"
+#include "tarlz.h"
+
+
+// Returns the number of bytes really read.
+// If (returned value < size) and (errno == 0), means EOF was reached.
+//
+int preadblock( const int fd, uint8_t * const buf, const int size,
+ const long long pos )
+ {
+ int sz = 0;
+ errno = 0;
+ while( sz < size )
+ {
+ const int n = pread( fd, buf + sz, size - sz, pos + sz );
+ if( n > 0 ) sz += n;
+ else if( n == 0 ) break; // EOF
+ else if( errno != EINTR ) break;
+ errno = 0;
+ }
+ return sz;
+ }
+
+
+// Returns the number of bytes really written.
+// If (returned value < size), it is always an error.
+//
+int pwriteblock( const int fd, const uint8_t * const buf, const int size,
+ const long long pos )
+ {
+ int sz = 0;
+ errno = 0;
+ while( sz < size )
+ {
+ const int n = pwrite( fd, buf + sz, size - sz, pos + sz );
+ if( n > 0 ) sz += n;
+ else if( n < 0 && errno != EINTR ) break;
+ errno = 0;
+ }
+ return sz;
+ }
+
+
+namespace {
+
+// This can be called from any thread, main thread or sub-threads alike,
+// since they all call common helper functions that call cleanup_and_fail()
+// in case of an error.
+//
+void cleanup_and_fail( const int retval = 2 )
+ {
+ // only one thread can delete and exit
+ static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+
+ pthread_mutex_lock( &mutex ); // ignore errors to avoid loop
+ std::exit( retval );
+ }
+
+
+void xinit_mutex( pthread_mutex_t * const mutex )
+ {
+ const int errcode = pthread_mutex_init( mutex, 0 );
+ if( errcode )
+ { show_error( "pthread_mutex_init", errcode ); cleanup_and_fail(); }
+ }
+
+void xinit_cond( pthread_cond_t * const cond )
+ {
+ const int errcode = pthread_cond_init( cond, 0 );
+ if( errcode )
+ { show_error( "pthread_cond_init", errcode ); cleanup_and_fail(); }
+ }
+
+
+void xdestroy_mutex( pthread_mutex_t * const mutex )
+ {
+ const int errcode = pthread_mutex_destroy( mutex );
+ if( errcode )
+ { show_error( "pthread_mutex_destroy", errcode ); cleanup_and_fail(); }
+ }
+
+void xdestroy_cond( pthread_cond_t * const cond )
+ {
+ const int errcode = pthread_cond_destroy( cond );
+ if( errcode )
+ { show_error( "pthread_cond_destroy", errcode ); cleanup_and_fail(); }
+ }
+
+
+void xlock( pthread_mutex_t * const mutex )
+ {
+ const int errcode = pthread_mutex_lock( mutex );
+ if( errcode )
+ { show_error( "pthread_mutex_lock", errcode ); cleanup_and_fail(); }
+ }
+
+
+void xunlock( pthread_mutex_t * const mutex )
+ {
+ const int errcode = pthread_mutex_unlock( mutex );
+ if( errcode )
+ { show_error( "pthread_mutex_unlock", errcode ); cleanup_and_fail(); }
+ }
+
+
+void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex )
+ {
+ const int errcode = pthread_cond_wait( cond, mutex );
+ if( errcode )
+ { show_error( "pthread_cond_wait", errcode ); cleanup_and_fail(); }
+ }
+
+
+void xsignal( pthread_cond_t * const cond )
+ {
+ const int errcode = pthread_cond_signal( cond );
+ if( errcode )
+ { show_error( "pthread_cond_signal", errcode ); cleanup_and_fail(); }
+ }
+
+
+void xbroadcast( pthread_cond_t * const cond )
+ {
+ const int errcode = pthread_cond_broadcast( cond );
+ if( errcode )
+ { show_error( "pthread_cond_broadcast", errcode ); cleanup_and_fail(); }
+ }
+
+
+struct Packet // member name and metadata or error message
+ {
+ enum Status { ok, member_done, error };
+ long member_id; // lzip member containing the header of this tar member
+ std::string line; // member name and metadata ready to print
+ Status status;
+ Packet( const long i, const char * const msg, const Status s = ok )
+ : member_id( i ), line( msg ), status( s ) {}
+ };
+
+
+class Packet_courier // moves packets around
+ {
+public:
+ unsigned ocheck_counter;
+ unsigned owait_counter;
+private:
+ long error_member_id; // first lzip member with error/misalign/eof
+ int deliver_worker_id; // worker queue currently delivering packets
+ int master_worker_id; // worker in charge if error/misalignment/eof
+ std::vector< std::queue< Packet * > > opacket_queues;
+ int num_working; // number of workers still running
+ const int num_workers; // number of workers
+ const unsigned out_slots; // max output packets per queue
+ pthread_mutex_t omutex;
+ pthread_cond_t oav_or_exit; // output packet available or all workers exited
+ std::vector< pthread_cond_t > slot_av; // output slot available
+ pthread_cond_t check_master;
+
+ Packet_courier( const Packet_courier & ); // declared as private
+ void operator=( const Packet_courier & ); // declared as private
+
+public:
+ Packet_courier( const int workers, const int slots )
+ : ocheck_counter( 0 ), owait_counter( 0 ),
+ error_member_id( -1 ), deliver_worker_id( 0 ), master_worker_id( -1 ),
+ opacket_queues( workers ), num_working( workers ),
+ num_workers( workers ), out_slots( slots ), slot_av( workers )
+ {
+ xinit_mutex( &omutex ); xinit_cond( &oav_or_exit );
+ for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] );
+ xinit_cond( &check_master );
+ }
+
+ ~Packet_courier()
+ {
+ xdestroy_cond( &check_master );
+ for( unsigned i = 0; i < slot_av.size(); ++i ) xdestroy_cond( &slot_av[i] );
+ xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex );
+ }
+
+ bool mastership_granted() const { return master_worker_id >= 0; }
+
+ bool request_mastership( const long member_id, const int worker_id )
+ {
+ xlock( &omutex );
+ if( mastership_granted() ) // already granted
+ { xunlock( &omutex ); return ( master_worker_id == worker_id ); }
+ if( error_member_id < 0 || error_member_id > member_id )
+ error_member_id = member_id;
+ while( !mastership_granted() && ( worker_id != deliver_worker_id ||
+ !opacket_queues[deliver_worker_id].empty() ) )
+ xwait( &check_master, &omutex );
+ if( !mastership_granted() && worker_id == deliver_worker_id &&
+ opacket_queues[deliver_worker_id].empty() )
+ {
+ master_worker_id = worker_id; // grant mastership
+ for( int i = 0; i < num_workers; ++i ) // delete all packets
+ while( !opacket_queues[i].empty() )
+ opacket_queues[i].pop();
+ xbroadcast( &check_master );
+ xunlock( &omutex );
+ return true;
+ }
+ xunlock( &omutex );
+ return false; // mastership granted to another worker
+ }
+
+ void worker_finished()
+ {
+ // notify muxer when last worker exits
+ xlock( &omutex );
+ if( --num_working == 0 ) xsignal( &oav_or_exit );
+ xunlock( &omutex );
+ }
+
+ // collect a packet from a worker
+ bool collect_packet( Packet * const opacket, const int worker_id )
+ {
+ xlock( &omutex );
+ if( ( mastership_granted() && master_worker_id != worker_id ) ||
+ ( error_member_id >= 0 && error_member_id < opacket->member_id ) )
+ { xunlock( &omutex ); return false; } // reject packet
+ while( opacket_queues[worker_id].size() >= out_slots )
+ xwait( &slot_av[worker_id], &omutex );
+ opacket_queues[worker_id].push( opacket );
+ if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit );
+ xunlock( &omutex );
+ return true;
+ }
+
+ // deliver a packet to muxer
+ // if packet.status == Packet::member_done, move to next queue
+ Packet * deliver_packet()
+ {
+ Packet * opacket = 0;
+ xlock( &omutex );
+ ++ocheck_counter;
+ while( opacket_queues[deliver_worker_id].empty() && num_working > 0 )
+ {
+ ++owait_counter;
+ if( !mastership_granted() && error_member_id >= 0 )
+ xbroadcast( &check_master ); // mastership requested not yet granted
+ xwait( &oav_or_exit, &omutex );
+ }
+ if( !opacket_queues[deliver_worker_id].empty() )
+ {
+ opacket = opacket_queues[deliver_worker_id].front();
+ opacket_queues[deliver_worker_id].pop();
+ if( opacket_queues[deliver_worker_id].size() + 1 == out_slots )
+ xsignal( &slot_av[deliver_worker_id] );
+ if( opacket->status == Packet::member_done && !mastership_granted() )
+ { if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0; }
+ }
+ xunlock( &omutex );
+ return opacket;
+ }
+
+ bool finished() // all packets delivered to muxer
+ {
+ if( num_working != 0 ) return false;
+ for( int i = 0; i < num_workers; ++i )
+ if( !opacket_queues[i].empty() ) return false;
+ return true;
+ }
+ };
+
+
+/* Return value: -1 = member_end exceeded, 0 = OK,
+ 1 = damaged member, 2 = fatal error.
+ If sizep and error, return in *sizep the number of bytes read. */
+int archive_read_lz( LZ_Decoder * const decoder, const int infd,
+ long long & file_pos, const long long member_end,
+ const long long cdata_size, uint8_t * const buf,
+ const int size,
+ const char ** msg, int * const sizep = 0 )
+ {
+ int sz = 0;
+
+ if( sizep ) *sizep = 0;
+ while( sz < size )
+ {
+ const int rd = LZ_decompress_read( decoder, buf + sz, size - sz );
+ if( rd < 0 )
+ { *msg = LZ_strerror( LZ_decompress_errno( decoder ) ); return 1; }
+ if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
+ { *msg = "Archive ends unexpectedly."; return 2; }
+ sz += rd; if( sizep ) *sizep = sz;
+ if( sz < size && LZ_decompress_write_size( decoder ) > 0 )
+ {
+ const long long ibuf_size = 16384; // try 65536
+ uint8_t ibuf[ibuf_size];
+ const long long rest = ( file_pos < member_end ) ?
+ member_end - file_pos : cdata_size - file_pos;
+ const int rsize = std::min( LZ_decompress_write_size( decoder ),
+ (int)std::min( ibuf_size, rest ) );
+ if( rsize <= 0 ) LZ_decompress_finish( decoder );
+ else
+ {
+ const int rd = preadblock( infd, ibuf, rsize, file_pos );
+ if( LZ_decompress_write( decoder, ibuf, rd ) != rd )
+ internal_error( "library error (LZ_decompress_write)." );
+ file_pos += rd;
+ if( rd < rsize )
+ {
+ LZ_decompress_finish( decoder );
+ if( errno ) { *msg = "Error reading archive"; return 2; }
+ }
+ }
+ }
+ }
+ return ( file_pos > member_end ) ? -1 : 0;
+ }
+
+
+int list_member_lz( LZ_Decoder * const decoder, const int infd,
+ long long & file_pos, const long long member_end,
+ const long long cdata_size, long long & data_pos,
+ const long long mdata_end, Packet_courier & courier,
+ const Extended & extended, const Tar_header header,
+ Resizable_buffer & rbuf, const long member_id,
+ const int worker_id, const char ** msg, const bool skip )
+ {
+ unsigned long long rest = extended.size;
+ const int rem = extended.size % header_size;
+ const int padding = rem ? header_size - rem : 0;
+ const long long data_rest = mdata_end - ( data_pos + rest + padding );
+ bool master = false;
+
+ if( data_rest < 0 ) // tar member exceeds lzip member end
+ {
+ if( courier.request_mastership( member_id, worker_id ) ) master = true;
+ else return 2;
+ }
+
+ if( verbosity < 0 || skip ) rbuf()[0] = 0;
+ else format_member_name( extended, header, rbuf, verbosity > 0 );
+ Packet * const opacket = new Packet( member_id, rbuf(),
+ data_rest ? Packet::ok : Packet::member_done );
+ courier.collect_packet( opacket, worker_id );
+ if( !data_rest ) { data_pos = mdata_end; return 0; }
+
+ const unsigned bufsize = 32 * header_size;
+ uint8_t buf[bufsize];
+ while( rest > 0 )
+ {
+ const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding;
+ const int ret = archive_read_lz( decoder, infd, file_pos, member_end,
+ cdata_size, buf, rsize, msg );
+ if( ret > 0 ) return ret;
+ data_pos += rsize;
+ if( rest < bufsize ) break;
+ rest -= rsize;
+ }
+ return ( master ? -1 : 0 );
+ }
+
+
+int parse_records_lz( LZ_Decoder * const decoder, const int infd,
+ long long & file_pos, const long long member_end,
+ const long long cdata_size, long long & data_pos,
+ Extended & extended, const Tar_header header,
+ const char ** msg, const bool permissive )
+ {
+ const unsigned long long edsize = parse_octal( header + size_o, size_l );
+ const unsigned long long bufsize = round_up( edsize );
+ if( bufsize == 0 || edsize == 0 || edsize >= 1ULL << 33 )
+ return false; // overflow or no extended data
+ char * const buf = new char[bufsize]; // extended records buffer
+ int retval = archive_read_lz( decoder, infd, file_pos, member_end,
+ cdata_size, (uint8_t *)buf, bufsize, msg );
+ if( retval == 0 )
+ { if( extended.parse( buf, edsize, permissive ) ) data_pos += bufsize;
+ else retval = 1; }
+ delete[] buf;
+ return retval;
+ }
+
+
+struct Worker_arg
+ {
+ const Lzip_index * lzip_index;
+ Packet_courier * courier;
+ const Arg_parser * parser;
+ std::vector< char > * name_pending;
+ int worker_id;
+ int num_workers;
+ int infd;
+ int filenames;
+ bool missing_crc;
+ bool permissive;
+ };
+
+
+ // read lzip members from archive, list their tar members, and
+ // give the produced packets to courier.
+extern "C" void * dworker_l( void * arg )
+ {
+ const Worker_arg & tmp = *(const Worker_arg *)arg;
+ const Lzip_index & lzip_index = *tmp.lzip_index;
+ Packet_courier & courier = *tmp.courier;
+ const Arg_parser & parser = *tmp.parser;
+ std::vector< char > & name_pending = *tmp.name_pending;
+ const int worker_id = tmp.worker_id;
+ const int num_workers = tmp.num_workers;
+ const int infd = tmp.infd;
+ const int filenames = tmp.filenames;
+ const int missing_crc = tmp.missing_crc;
+ const bool permissive = tmp.permissive;
+
+ LZ_Decoder * const decoder = LZ_decompress_open();
+ if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
+ { show_error( "Not enough memory." ); cleanup_and_fail(); }
+
+ const long long cdata_size = lzip_index.cdata_size();
+ Resizable_buffer rbuf( initial_line_length );
+ bool master = false;
+ for( long i = worker_id; !master && i < lzip_index.members(); i += num_workers )
+ {
+ long long data_pos = lzip_index.dblock( i ).pos();
+ const long long mdata_end = lzip_index.dblock( i ).end();
+ long long data_end = mdata_end;
+ long long file_pos = lzip_index.mblock( i ).pos();
+ long long member_end = lzip_index.mblock( i ).end();
+
+ Extended extended; // metadata from extended records
+ int retval = 0;
+ bool prev_extended = false; // prev header was extended
+ LZ_decompress_reset( decoder ); // prepare for new member
+ while( true ) // process one tar member per iteration
+ {
+ if( data_pos >= data_end ) break;
+ Tar_header header;
+ const char * msg = 0;
+ const int ret = archive_read_lz( decoder, infd, file_pos, member_end,
+ cdata_size, header, header_size, &msg );
+ if( ret != 0 )
+ {
+ if( !courier.request_mastership( i, worker_id ) ) goto done;
+ master = true;
+ if( ret > 0 )
+ {
+ Packet * const opacket = new Packet( i, msg, Packet::error );
+ courier.collect_packet( opacket, worker_id );
+ goto done;
+ }
+ // member_end exceeded, process rest of file
+ else { data_end = lzip_index.udata_size(); member_end = cdata_size; }
+ }
+ data_pos += header_size;
+ if( !verify_ustar_chksum( header ) )
+ {
+ if( !courier.request_mastership( i, worker_id ) ) goto done;
+ master = true;
+ if( block_is_zero( header, header_size ) ) break; // EOF
+ Packet * const opacket = new Packet( i,
+ ( data_pos > header_size ) ? "Corrupt or invalid header." :
+ "This does not look like a POSIX tar.lz archive.", Packet::error );
+ courier.collect_packet( opacket, worker_id );
+ goto done;
+ }
+
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ if( typeflag == tf_global )
+ {
+ if( prev_extended )
+ { show_error( "Format violation: global header after extended header." );
+ cleanup_and_fail(); }
+ Extended dummy; // global headers are parsed and ignored
+ const int ret = parse_records_lz( decoder, infd, file_pos, member_end,
+ cdata_size, data_pos, dummy, header, &msg, true );
+ if( ret != 0 )
+ {
+ if( !courier.request_mastership( i, worker_id ) ) goto done;
+ master = true;
+ if( ret > 0 )
+ {
+ if( !msg ) msg = "Error in global extended records.";
+ Packet * const opacket = new Packet( i, msg, Packet::error );
+ courier.collect_packet( opacket, worker_id );
+ if( ret == 2 ) goto done;
+ }
+ // member_end exceeded, process rest of file
+ else { data_end = lzip_index.udata_size(); member_end = cdata_size; }
+ }
+ continue;
+ }
+ if( typeflag == tf_extended )
+ {
+ int ret = 0;
+ if( prev_extended && !permissive )
+ { msg = "Format violation: consecutive extended headers found.";
+ ret = 2; }
+ else ret = parse_records_lz( decoder, infd, file_pos, member_end,
+ cdata_size, data_pos, extended, header, &msg, permissive );
+ if( ret == 0 && !extended.crc_present && missing_crc )
+ { msg = "Missing CRC in extended records."; ret = 2; }
+ if( ret != 0 )
+ {
+ if( !courier.request_mastership( i, worker_id ) ) goto done;
+ master = true;
+ if( ret > 0 )
+ {
+ if( !msg ) msg = "Error in extended records.";
+ Packet * const opacket = new Packet( i, msg, Packet::error );
+ courier.collect_packet( opacket, worker_id );
+ extended.reset();
+ if( ret == 2 ) goto done;
+ }
+ // member_end exceeded, process rest of file
+ else { data_end = lzip_index.udata_size(); member_end = cdata_size; }
+ }
+ prev_extended = true;
+ continue;
+ }
+ prev_extended = false;
+
+ if( extended.linkpath.empty() ) // copy linkpath from ustar header
+ {
+ for( int i = 0; i < linkname_l && header[linkname_o+i]; ++i )
+ extended.linkpath += header[linkname_o+i];
+ while( extended.linkpath.size() > 1 && // trailing '/'
+ extended.linkpath[extended.linkpath.size()-1] == '/' )
+ extended.linkpath.resize( extended.linkpath.size() - 1 );
+ }
+
+ if( extended.path.empty() ) // copy path from ustar header
+ {
+ char stored_name[prefix_l+1+name_l+1];
+ int len = 0;
+ while( len < prefix_l && header[prefix_o+len] )
+ { stored_name[len] = header[prefix_o+len]; ++len; }
+ if( len && header[name_o] ) stored_name[len++] = '/';
+ for( int i = 0; i < name_l && header[name_o+i]; ++i )
+ { stored_name[len] = header[name_o+i]; ++len; }
+ while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/'
+ stored_name[len] = 0;
+ extended.path = remove_leading_slash( stored_name );
+ }
+ const char * const filename = extended.path.c_str();
+
+ bool skip = filenames > 0;
+ if( skip )
+ for( int i = 0; i < parser.arguments(); ++i )
+ if( parser.code( i ) == 0 )
+ {
+ const char * const name =
+ remove_leading_slash( parser.argument( i ).c_str() );
+ if( compare_prefix_dir( name, filename ) ||
+ compare_tslash( name, filename ) )
+ { skip = false; name_pending[i] = false; break; }
+ }
+
+ if( extended.size == 0 &&
+ ( typeflag == tf_regular || typeflag == tf_hiperf ) )
+ extended.size = parse_octal( header + size_o, size_l );
+
+ retval = list_member_lz( decoder, infd, file_pos, member_end,
+ cdata_size, data_pos, mdata_end, courier,
+ extended, header, rbuf, i, worker_id, &msg, skip );
+ extended.reset();
+ if( retval < 0 ) // member_end exceeded, process rest of file
+ { master = true;
+ data_end = lzip_index.udata_size(); member_end = cdata_size; }
+ else if( retval > 0 )
+ { show_error( msg );
+ show_error( "Error is not recoverable: exiting now." );
+ cleanup_and_fail(); }
+ }
+ }
+ if( LZ_decompress_close( decoder ) < 0 )
+ {
+ Packet * const opacket = new Packet( lzip_index.members(),
+ "LZ_decompress_close failed.", Packet::error );
+ courier.collect_packet( opacket, worker_id );
+ }
+done:
+ courier.worker_finished();
+ return 0;
+ }
+
+
+ // get from courier the processed and sorted packets, and print
+ // the member lines on stdout or the diagnostics on stderr.
+void muxer( Packet_courier & courier )
+ {
+ while( true )
+ {
+ Packet * const opacket = courier.deliver_packet();
+ if( !opacket ) break; // queue is empty. all workers exited
+
+ if( opacket->status == Packet::error )
+ { show_error( opacket->line.c_str() ); cleanup_and_fail(); }
+ if( opacket->line.size() )
+ { std::fputs( opacket->line.c_str(), stdout );
+ std::fflush( stdout ); }
+ delete opacket;
+ }
+ if( !courier.mastership_granted() ) // no worker found EOF blocks
+ { show_error( "Archive ends unexpectedly." ); cleanup_and_fail(); }
+ }
+
+} // end namespace
+
+
+ // init the courier, then start the workers and call the muxer.
+int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
+ const Lzip_index & lzip_index, const int filenames,
+ const int debug_level, const int infd, const int num_workers,
+ const bool missing_crc, const bool permissive )
+ {
+ const int out_slots = 100;
+ Packet_courier courier( num_workers, out_slots );
+
+ Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
+ pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
+ if( !worker_args || !worker_threads )
+ { show_error( "Not enough memory." ); cleanup_and_fail(); }
+ for( int i = 0; i < num_workers; ++i )
+ {
+ worker_args[i].lzip_index = &lzip_index;
+ worker_args[i].courier = &courier;
+ worker_args[i].parser = &parser;
+ worker_args[i].name_pending = &name_pending;
+ worker_args[i].worker_id = i;
+ worker_args[i].num_workers = num_workers;
+ worker_args[i].infd = infd;
+ worker_args[i].filenames = filenames;
+ worker_args[i].missing_crc = missing_crc;
+ worker_args[i].permissive = permissive;
+ const int errcode =
+ pthread_create( &worker_threads[i], 0, dworker_l, &worker_args[i] );
+ if( errcode )
+ { show_error( "Can't create worker threads", errcode ); cleanup_and_fail(); }
+ }
+
+ muxer( courier );
+
+ for( int i = num_workers - 1; i >= 0; --i )
+ {
+ const int errcode = pthread_join( worker_threads[i], 0 );
+ if( errcode )
+ { show_error( "Can't join worker threads", errcode ); cleanup_and_fail(); }
+ }
+ delete[] worker_threads;
+ delete[] worker_args;
+
+ int retval = 0;
+ for( int i = 0; i < parser.arguments(); ++i )
+ if( parser.code( i ) == 0 && name_pending[i] )
+ {
+ show_file_error( parser.argument( i ).c_str(), "Not found in archive." );
+ retval = 1;
+ }
+
+ if( debug_level & 1 )
+ std::fprintf( stderr,
+ "muxer tried to consume from workers %8u times\n"
+ "muxer had to wait %8u times\n",
+ courier.ocheck_counter,
+ courier.owait_counter );
+
+ if( !courier.finished() ) internal_error( "courier not finished." );
+ return retval;
+ }
diff --git a/lzip.h b/lzip.h
index 03ce788..d88e9c7 100644
--- a/lzip.h
+++ b/lzip.h
@@ -1,5 +1,5 @@
/* Tarlz - Archiver with multimember lzip compression
- Copyright (C) 2013-2018 Antonio Diaz Diaz.
+ Copyright (C) 2013-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -53,7 +53,7 @@ inline bool isvalid_ds( const unsigned dictionary_size )
dictionary_size <= max_dictionary_size ); }
-const uint8_t lzip_magic[5] = { 0x4C, 0x5A, 0x49, 0x50, 1 }; // "LZIP\1"
+const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP"
struct Lzip_header
{
@@ -63,14 +63,24 @@ struct Lzip_header
enum { size = 6 };
bool verify_magic() const
- { return ( std::memcmp( data, lzip_magic, 5 ) == 0 ); }
+ { return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); }
bool verify_prefix( const int sz ) const // detect (truncated) header
{
- for( int i = 0; i < sz && i < 5; ++i )
+ for( int i = 0; i < sz && i < 4; ++i )
if( data[i] != lzip_magic[i] ) return false;
return ( sz > 0 );
}
+ bool verify_corrupt() const // detect corrupt header
+ {
+ int matches = 0;
+ for( int i = 0; i < 4; ++i )
+ if( data[i] == lzip_magic[i] ) ++matches;
+ return ( matches > 1 && matches < 4 );
+ }
+
+ uint8_t version() const { return data[4]; }
+ bool verify_version() const { return ( data[4] == 1 ); }
unsigned dictionary_size() const
{
@@ -109,4 +119,28 @@ struct Lzip_trailer
for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
return tmp;
}
+
+ bool verify_consistency() const // check internal consistency
+ {
+ const unsigned crc = data_crc();
+ const unsigned long long dsize = data_size();
+ if( ( crc == 0 ) != ( dsize == 0 ) ) return false;
+ const unsigned long long msize = member_size();
+ if( msize < min_member_size ) return false;
+ const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size;
+ if( mlimit > dsize && msize > mlimit ) return false;
+ const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1;
+ if( dlimit > msize && dsize > dlimit ) return false;
+ return true;
+ }
};
+
+
+const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
+const char * const bad_dict_msg = "Invalid dictionary size in member header.";
+const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
+const char * const trailing_msg = "Trailing data not allowed.";
+
+// defined in extract.cc
+int readblock( const int fd, uint8_t * const buf, const int size );
+int writeblock( const int fd, const uint8_t * const buf, const int size );
diff --git a/lzip_index.cc b/lzip_index.cc
new file mode 100644
index 0000000..cb4e9b1
--- /dev/null
+++ b/lzip_index.cc
@@ -0,0 +1,204 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2019 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "lzip.h"
+#include "lzip_index.h"
+
+
+namespace {
+
+int seek_read( const int fd, uint8_t * const buf, const int size,
+ const long long pos )
+ {
+ if( lseek( fd, pos, SEEK_SET ) == pos )
+ return readblock( fd, buf, size );
+ return 0;
+ }
+
+const char * bad_version( const unsigned version )
+ {
+ static char buf[80];
+ snprintf( buf, sizeof buf, "Version %u member format not supported.",
+ version );
+ return buf;
+ }
+
+} // end namespace
+
+
+void Lzip_index::set_errno_error( const char * const msg )
+ {
+ error_ = msg; error_ += std::strerror( errno );
+ retval_ = 1;
+ }
+
+void Lzip_index::set_num_error( const char * const msg, unsigned long long num )
+ {
+ char buf[80];
+ snprintf( buf, sizeof buf, "%s%llu", msg, num );
+ error_ = buf;
+ retval_ = 2;
+ }
+
+
+// If successful, push last member and set pos to member header.
+bool Lzip_index::skip_trailing_data( const int fd, long long & pos,
+ const bool ignore_trailing, const bool loose_trailing )
+ {
+ enum { block_size = 16384,
+ buffer_size = block_size + Lzip_trailer::size - 1 + Lzip_header::size };
+ uint8_t buffer[buffer_size];
+ if( pos < min_member_size ) return false;
+ int bsize = pos % block_size; // total bytes in buffer
+ if( bsize <= buffer_size - block_size ) bsize += block_size;
+ int search_size = bsize; // bytes to search for trailer
+ int rd_size = bsize; // bytes to read from file
+ unsigned long long ipos = pos - rd_size; // aligned to block_size
+
+ while( true )
+ {
+ if( seek_read( fd, buffer, rd_size, ipos ) != rd_size )
+ { set_errno_error( "Error seeking member trailer: " ); return false; }
+ const uint8_t max_msb = ( ipos + search_size ) >> 56;
+ for( int i = search_size; i >= Lzip_trailer::size; --i )
+ if( buffer[i-1] <= max_msb ) // most significant byte of member_size
+ {
+ const Lzip_trailer & trailer =
+ *(const Lzip_trailer *)( buffer + i - Lzip_trailer::size );
+ const unsigned long long member_size = trailer.member_size();
+ if( member_size == 0 ) // skip trailing zeros
+ { while( i > Lzip_trailer::size && buffer[i-9] == 0 ) --i; continue; }
+ if( member_size > ipos + i || !trailer.verify_consistency() )
+ continue;
+ Lzip_header header;
+ if( seek_read( fd, header.data, Lzip_header::size,
+ ipos + i - member_size ) != Lzip_header::size )
+ { set_errno_error( "Error reading member header: " ); return false; }
+ const unsigned dictionary_size = header.dictionary_size();
+ if( !header.verify_magic() || !header.verify_version() ||
+ !isvalid_ds( dictionary_size ) ) continue;
+ if( (*(const Lzip_header *)( buffer + i )).verify_prefix( bsize - i ) )
+ { error_ = "Last member in input file is truncated or corrupt.";
+ retval_ = 2; return false; }
+ if( !loose_trailing && bsize - i >= Lzip_header::size &&
+ (*(const Lzip_header *)( buffer + i )).verify_corrupt() )
+ { error_ = corrupt_mm_msg; retval_ = 2; return false; }
+ if( !ignore_trailing )
+ { error_ = trailing_msg; retval_ = 2; return false; }
+ pos = ipos + i - member_size;
+ member_vector.push_back( Member( 0, trailer.data_size(), pos,
+ member_size, dictionary_size ) );
+ return true;
+ }
+ if( ipos <= 0 )
+ { set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size );
+ return false; }
+ bsize = buffer_size;
+ search_size = bsize - Lzip_header::size;
+ rd_size = block_size;
+ ipos -= rd_size;
+ std::memcpy( buffer + rd_size, buffer, buffer_size - rd_size );
+ }
+ }
+
+
+Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
+ const bool loose_trailing )
+ : insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 )
+ {
+ if( insize < 0 )
+ { set_errno_error( "Input file is not seekable: " ); return; }
+ if( insize < min_member_size )
+ { error_ = "Input file is too short."; retval_ = 2; return; }
+ if( insize > INT64_MAX )
+ { error_ = "Input file is too long (2^63 bytes or more).";
+ retval_ = 2; return; }
+
+ Lzip_header header;
+ if( seek_read( infd, header.data, Lzip_header::size, 0 ) != Lzip_header::size )
+ { set_errno_error( "Error reading member header: " ); return; }
+ if( !header.verify_magic() )
+ { error_ = bad_magic_msg; retval_ = 2; return; }
+ if( !header.verify_version() )
+ { error_ = bad_version( header.version() ); retval_ = 2; return; }
+ if( !isvalid_ds( header.dictionary_size() ) )
+ { error_ = bad_dict_msg; retval_ = 2; return; }
+
+ long long pos = insize; // always points to a header or to EOF
+ while( pos >= min_member_size )
+ {
+ Lzip_trailer trailer;
+ if( seek_read( infd, trailer.data, Lzip_trailer::size,
+ pos - Lzip_trailer::size ) != Lzip_trailer::size )
+ { set_errno_error( "Error reading member trailer: " ); break; }
+ const unsigned long long member_size = trailer.member_size();
+ if( member_size > (unsigned long long)pos || !trailer.verify_consistency() )
+ {
+ if( member_vector.empty() )
+ { if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) )
+ continue; else return; }
+ set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size );
+ break;
+ }
+ if( seek_read( infd, header.data, Lzip_header::size,
+ pos - member_size ) != Lzip_header::size )
+ { set_errno_error( "Error reading member header: " ); break; }
+ const unsigned dictionary_size = header.dictionary_size();
+ if( !header.verify_magic() || !header.verify_version() ||
+ !isvalid_ds( dictionary_size ) )
+ {
+ if( member_vector.empty() )
+ { if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) )
+ continue; else return; }
+ set_num_error( "Bad header at pos ", pos - member_size );
+ break;
+ }
+ pos -= member_size;
+ member_vector.push_back( Member( 0, trailer.data_size(), pos,
+ member_size, dictionary_size ) );
+ }
+ if( pos != 0 || member_vector.empty() )
+ {
+ member_vector.clear();
+ if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; }
+ return;
+ }
+ std::reverse( member_vector.begin(), member_vector.end() );
+ for( unsigned long i = 0; ; ++i )
+ {
+ const long long end = member_vector[i].dblock.end();
+ if( end < 0 || end > INT64_MAX )
+ {
+ member_vector.clear();
+ error_ = "Data in input file is too long (2^63 bytes or more).";
+ retval_ = 2; return;
+ }
+ if( i + 1 >= member_vector.size() ) break;
+ member_vector[i+1].dblock.pos( end );
+ }
+ }
diff --git a/lzip_index.h b/lzip_index.h
new file mode 100644
index 0000000..9ff6ee9
--- /dev/null
+++ b/lzip_index.h
@@ -0,0 +1,87 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2019 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef INT64_MAX
+#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL
+#endif
+
+
+class Block
+ {
+ long long pos_, size_; // pos + size <= INT64_MAX
+
+public:
+ Block( const long long p, const long long s ) : pos_( p ), size_( s ) {}
+
+ long long pos() const { return pos_; }
+ long long size() const { return size_; }
+ long long end() const { return pos_ + size_; }
+
+ void pos( const long long p ) { pos_ = p; }
+ void size( const long long s ) { size_ = s; }
+ };
+
+
+class Lzip_index
+ {
+ struct Member
+ {
+ Block dblock, mblock; // data block, member block
+ unsigned dictionary_size;
+
+ Member( const long long dp, const long long ds,
+ const long long mp, const long long ms, const unsigned dict_size )
+ : dblock( dp, ds ), mblock( mp, ms ), dictionary_size( dict_size ) {}
+ };
+
+ std::vector< Member > member_vector;
+ std::string error_;
+ const long long insize;
+ int retval_;
+
+ void set_errno_error( const char * const msg );
+ void set_num_error( const char * const msg, unsigned long long num );
+ bool skip_trailing_data( const int fd, long long & pos,
+ const bool ignore_trailing, const bool loose_trailing );
+
+public:
+ Lzip_index( const int infd, const bool ignore_trailing,
+ const bool loose_trailing );
+
+ long members() const { return member_vector.size(); }
+ const std::string & error() const { return error_; }
+ int retval() const { return retval_; }
+
+ long long udata_size() const
+ { if( member_vector.empty() ) return 0;
+ return member_vector.back().dblock.end(); }
+
+ long long cdata_size() const
+ { if( member_vector.empty() ) return 0;
+ return member_vector.back().mblock.end(); }
+
+ // total size including trailing data (if any)
+ long long file_size() const
+ { if( insize >= 0 ) return insize; else return 0; }
+
+ const Block & dblock( const long i ) const
+ { return member_vector[i].dblock; }
+ const Block & mblock( const long i ) const
+ { return member_vector[i].mblock; }
+ unsigned dictionary_size( const long i ) const
+ { return member_vector[i].dictionary_size; }
+ };
diff --git a/main.cc b/main.cc
index 69c46b4..86734c1 100644
--- a/main.cc
+++ b/main.cc
@@ -1,5 +1,5 @@
/* Tarlz - Archiver with multimember lzip compression
- Copyright (C) 2013-2018 Antonio Diaz Diaz.
+ Copyright (C) 2013-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -32,15 +32,16 @@
#include <string>
#include <vector>
#include <fcntl.h>
+#include <pthread.h>
#include <stdint.h>
#include <unistd.h>
#include <sys/stat.h>
#include <grp.h>
#include <pwd.h>
+#include <lzlib.h>
#if defined(__OS2__)
#include <io.h>
#endif
-#include <lzlib.h>
#include "arg_parser.h"
#include "tarlz.h"
@@ -58,21 +59,23 @@ int verbosity = 0;
namespace {
const char * const program_name = "tarlz";
-const char * const program_year = "2018";
+const char * const program_year = "2019";
const char * invocation_name = 0;
enum Mode { m_none, m_append, m_concatenate, m_create, m_extract, m_list };
-void show_help()
+void show_help( const long num_online )
{
- std::printf( "Tarlz is a small and simple implementation of the tar archiver. By default\n"
- "tarlz creates, lists and extracts archives in a simplified posix pax format\n"
- "compressed with lzip on a per file basis. Each tar member is compressed in\n"
- "its own lzip member, as well as the end-of-file blocks. This method is fully\n"
- "backward compatible with standard tar tools like GNU tar, which treat the\n"
- "resulting multimember tar.lz archive like any other tar.lz archive. Tarlz\n"
- "can append files to the end of such compressed archives.\n"
+ std::printf( "Tarlz is a combined implementation of the tar archiver and the lzip\n"
+ "compressor. By default tarlz creates, lists and extracts archives in a\n"
+ "simplified posix pax format compressed with lzip on a per file basis. Each\n"
+ "tar member is compressed in its own lzip member, as well as the end-of-file\n"
+ "blocks. This method adds an indexed lzip layer on top of the tar archive,\n"
+ "making it possible to decode the archive safely in parallel. The resulting\n"
+ "multimember tar.lz archive is fully backward compatible with standard tar\n"
+ "tools like GNU tar, which treat it like any other tar.lz archive. Tarlz can\n"
+ "append files to the end of such compressed archives.\n"
"\nThe tarlz file format is a safe posix-style backup format. In case of\n"
"corruption, tarlz can extract all the undamaged members from the tar.lz\n"
"archive, skipping over the damaged members, just like the standard\n"
@@ -87,6 +90,7 @@ void show_help()
" -c, --create create a new archive\n"
" -C, --directory=<dir> change to directory <dir>\n"
" -f, --file=<archive> use archive file <archive>\n"
+ " -n, --threads=<n> set number of decompression threads [%ld]\n"
" -q, --quiet suppress all messages\n"
" -r, --append append files to the end of an archive\n"
" -t, --list list the contents of an archive\n"
@@ -103,8 +107,13 @@ void show_help()
" --keep-damaged don't delete partially extracted files\n"
" --missing-crc exit with error status if missing extended CRC\n"
// " --permissive allow repeated extended headers and records\n"
- " --uncompressed don't compress the archive created\n"
- "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
+ " --uncompressed don't compress the archive created\n",
+ num_online );
+ if( verbosity >= 1 )
+ {
+ std::printf( " --debug=<level> (0-1) print debug statistics to stderr\n" );
+ }
+ std::printf( "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
"invalid input file, 3 for an internal consistency error (eg, bug) which\n"
"caused tarlz to panic.\n"
@@ -189,7 +198,8 @@ void set_owner( const char * const arg )
{
const struct passwd * const pw = getpwnam( arg );
if( pw ) cl_owner = pw->pw_uid;
- else if( std::isdigit( arg[0] ) ) cl_owner = getnum( arg, 0, INT_MAX );
+ else if( std::isdigit( (unsigned char)arg[0] ) )
+ cl_owner = getnum( arg, 0, INT_MAX );
else { show_file_error( arg, "Invalid owner" ); std::exit( 1 ); }
}
@@ -197,7 +207,8 @@ void set_group( const char * const arg )
{
const struct group * const gr = getgrnam( arg );
if( gr ) cl_group = gr->gr_gid;
- else if( std::isdigit( arg[0] ) ) cl_group = getnum( arg, 0, INT_MAX );
+ else if( std::isdigit( (unsigned char)arg[0] ) )
+ cl_group = getnum( arg, 0, INT_MAX );
else { show_file_error( arg, "Invalid group" ); std::exit( 1 ); }
}
@@ -226,43 +237,6 @@ int open_outstream( const std::string & name, const bool create )
}
-/* Returns the number of bytes really read.
- If (returned value < size) and (errno == 0), means EOF was reached.
-*/
-int readblock( const int fd, uint8_t * const buf, const int size )
- {
- int sz = 0;
- errno = 0;
- while( sz < size )
- {
- const int n = read( fd, buf + sz, size - sz );
- if( n > 0 ) sz += n;
- else if( n == 0 ) break; // EOF
- else if( errno != EINTR ) break;
- errno = 0;
- }
- return sz;
- }
-
-
-/* Returns the number of bytes really written.
- If (returned value < size), it is always an error.
-*/
-int writeblock( const int fd, const uint8_t * const buf, const int size )
- {
- int sz = 0;
- errno = 0;
- while( sz < size )
- {
- const int n = write( fd, buf + sz, size - sz );
- if( n > 0 ) sz += n;
- else if( n < 0 && errno != EINTR ) break;
- errno = 0;
- }
- return sz;
- }
-
-
void show_error( const char * const msg, const int errcode, const bool help )
{
if( verbosity < 0 ) return;
@@ -297,8 +271,10 @@ void internal_error( const char * const msg )
int main( const int argc, const char * const argv[] )
{
std::string archive_name;
+ int debug_level = 0;
+ int num_workers = -1; // start this many worker threads
+ int level = 6; // compression level, < 0 means uncompressed
Mode program_mode = m_none;
- int level = 6; // compression level, < 0 = uncompressed
bool keep_damaged = false;
bool missing_crc = false;
bool permissive = false;
@@ -308,8 +284,8 @@ int main( const int argc, const char * const argv[] )
{ show_error( "Bad library version. At least lzlib 1.0 is required." );
return 1; }
- enum { opt_ano = 256, opt_aso, opt_crc, opt_dso, opt_grp, opt_kd, opt_nso,
- opt_own, opt_per, opt_sol, opt_un };
+ enum { opt_ano = 256, opt_aso, opt_crc, opt_dbg, opt_dso, opt_grp, opt_kd,
+ opt_nso, opt_own, opt_per, opt_sol, opt_un };
const Arg_parser::Option options[] =
{
{ '0', 0, Arg_parser::no },
@@ -328,6 +304,7 @@ int main( const int argc, const char * const argv[] )
{ 'f', "file", Arg_parser::yes },
{ 'h', "help", Arg_parser::no },
{ 'H', "format", Arg_parser::yes },
+ { 'n', "threads", Arg_parser::yes },
{ 'q', "quiet", Arg_parser::no },
{ 'r', "append", Arg_parser::no },
{ 't', "list", Arg_parser::no },
@@ -336,6 +313,7 @@ int main( const int argc, const char * const argv[] )
{ 'x', "extract", Arg_parser::no },
{ opt_ano, "anonymous", Arg_parser::no },
{ opt_aso, "asolid", Arg_parser::no },
+ { opt_dbg, "debug", Arg_parser::yes },
{ opt_dso, "dsolid", Arg_parser::no },
{ opt_grp, "group", Arg_parser::yes },
{ opt_kd, "keep-damaged", Arg_parser::no },
@@ -351,6 +329,11 @@ int main( const int argc, const char * const argv[] )
if( parser.error().size() ) // bad option
{ show_error( parser.error().c_str(), 0, true ); return 1; }
+ const long num_online = std::max( 1L, sysconf( _SC_NPROCESSORS_ONLN ) );
+ long max_workers = sysconf( _SC_THREAD_THREADS_MAX );
+ if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) )
+ max_workers = INT_MAX / sizeof (pthread_t);
+
int filenames = 0;
for( int argind = 0; argind < parser.arguments(); ++argind )
{
@@ -367,8 +350,9 @@ int main( const int argc, const char * const argv[] )
case 'c': set_mode( program_mode, m_create ); break;
case 'C': break; // skip chdir
case 'f': if( sarg != "-" ) archive_name = sarg; break;
- case 'h': show_help(); return 0;
+ case 'h': show_help( num_online ); return 0;
case 'H': break; // ignore format
+ case 'n': num_workers = getnum( arg, 0, max_workers ); break;
case 'q': verbosity = -1; break;
case 'r': set_mode( program_mode, m_append ); break;
case 't': set_mode( program_mode, m_list ); break;
@@ -376,15 +360,16 @@ int main( const int argc, const char * const argv[] )
case 'V': show_version(); return 0;
case 'x': set_mode( program_mode, m_extract ); break;
case opt_ano: set_owner( "root" ); set_group( "root" ); break;
- case opt_aso: cl_solid = 2; break;
+ case opt_aso: solidity = asolid; break;
case opt_crc: missing_crc = true; break;
- case opt_dso: cl_solid = 1; break;
+ case opt_dbg: debug_level = getnum( arg, 0, 3 ); break;
+ case opt_dso: solidity = dsolid; break;
case opt_grp: set_group( arg ); break;
case opt_kd: keep_damaged = true; break;
- case opt_nso: cl_solid = 0; break;
+ case opt_nso: solidity = no_solid; break;
case opt_own: set_owner( arg ); break;
case opt_per: permissive = true; break;
- case opt_sol: cl_solid = 3; break;
+ case opt_sol: solidity = solid; break;
case opt_un: level = -1; break;
default : internal_error( "uncaught option" );
}
@@ -395,6 +380,8 @@ int main( const int argc, const char * const argv[] )
setmode( STDOUT_FILENO, O_BINARY );
#endif
+ if( num_workers < 0 ) num_workers = std::min( num_online, max_workers );
+
switch( program_mode )
{
case m_none: show_error( "Missing operation.", 0, true ); return 2;
@@ -403,8 +390,8 @@ int main( const int argc, const char * const argv[] )
program_mode == m_append );
case m_concatenate: return concatenate( archive_name, parser, filenames );
case m_extract:
- case m_list: return decode( archive_name, parser, filenames,
- keep_damaged, program_mode == m_list,
- missing_crc, permissive );
+ case m_list: return decode( archive_name, parser, filenames, num_workers,
+ debug_level, keep_damaged, program_mode == m_list,
+ missing_crc, permissive );
}
}
diff --git a/tarlz.h b/tarlz.h
index bc84c53..09baaf2 100644
--- a/tarlz.h
+++ b/tarlz.h
@@ -1,5 +1,5 @@
/* Tarlz - Archiver with multimember lzip compression
- Copyright (C) 2013-2018 Antonio Diaz Diaz.
+ Copyright (C) 2013-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -16,7 +16,7 @@
*/
enum { header_size = 512 };
-typedef char Tar_header[header_size];
+typedef uint8_t Tar_header[header_size];
enum Offsets {
name_o = 0, mode_o = 100, uid_o = 108, gid_o = 116, size_o = 124,
@@ -33,13 +33,13 @@ enum Lengths {
enum Typeflag {
tf_regular = '0', tf_link = '1', tf_symlink = '2', tf_chardev = '3',
tf_blockdev = '4', tf_directory = '5', tf_fifo = '6', tf_hiperf = '7',
- tf_extended = 'x' };
+ tf_global = 'g', tf_extended = 'x' };
const uint8_t ustar_magic[magic_l] =
{ 0x75, 0x73, 0x74, 0x61, 0x72, 0 }; // "ustar\0"
-inline bool verify_ustar_magic( const uint8_t * const buf )
- { return std::memcmp( buf + magic_o, ustar_magic, magic_l ) == 0; }
+inline bool verify_ustar_magic( const uint8_t * const header )
+ { return std::memcmp( header + magic_o, ustar_magic, magic_l ) == 0; }
class CRC32C // Uses CRC32-C (Castagnoli) polynomial.
@@ -101,15 +101,44 @@ struct Extended // stores metadata from/for extended records
void reset()
{ linkpath.clear(); path.clear(); size = 0; crc_present = false; }
bool empty() { return linkpath.empty() && path.empty() && size == 0; }
- bool parse( const int infd, const Tar_header header, const bool permissive );
+ bool parse( const char * const buf, const unsigned long long edsize,
+ const bool permissive );
+ };
+
+
+enum { initial_line_length = 1000 }; // must be >= 77
+
+class Resizable_buffer
+ {
+ char * p;
+ unsigned size_;
+
+public:
+ explicit Resizable_buffer( const unsigned initial_size )
+ : p( (char *)std::malloc( initial_size ) ), size_( p ? initial_size : 0 ) {}
+ ~Resizable_buffer() { if( p ) std::free( p ); p = 0; size_ = 0; }
+
+ bool resize( const unsigned new_size )
+ {
+ if( size_ < new_size )
+ {
+ char * const tmp = (char *)std::realloc( p, new_size );
+ if( !tmp ) return false;
+ p = tmp; size_ = new_size;
+ }
+ return true;
+ }
+ char * operator()() const { return p; }
+ unsigned size() const { return size_; }
};
// defined in create.cc
+enum Solidity { no_solid, dsolid, asolid, solid };
extern int cl_owner;
extern int cl_group;
-extern int cl_solid;
-unsigned ustar_chksum( const uint8_t * const buf );
-bool verify_ustar_chksum( const uint8_t * const buf );
+extern Solidity solidity;
+unsigned ustar_chksum( const uint8_t * const header );
+bool verify_ustar_chksum( const uint8_t * const header );
class Arg_parser;
int concatenate( const std::string & archive_name, const Arg_parser & parser,
const int filenames );
@@ -117,16 +146,29 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
const int filenames, const int level, const bool append );
// defined in extract.cc
+bool block_is_zero( const uint8_t * const buf, const int size );
+void format_member_name( const Extended & extended, const Tar_header header,
+ Resizable_buffer & rbuf, const bool long_format );
+const char * remove_leading_slash( const char * const filename );
+bool compare_prefix_dir( const char * const dir, const char * const name );
+bool compare_tslash( const char * const name1, const char * const name2 );
+unsigned long long parse_octal( const uint8_t * const ptr, const int size );
int decode( const std::string & archive_name, const Arg_parser & parser,
- const int filenames, const bool keep_damaged, const bool listing,
- const bool missing_crc, const bool permissive );
+ const int filenames, const int num_workers, const int debug_level,
+ const bool keep_damaged, const bool listing, const bool missing_crc,
+ const bool permissive );
+
+// defined in list_lz.cc
+class Lzip_index;
+int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
+ const Lzip_index & lzip_index, const int filenames,
+ const int debug_level, const int infd, const int num_workers,
+ const bool missing_crc, const bool permissive );
// defined in main.cc
extern int verbosity;
int open_instream( const std::string & name );
int open_outstream( const std::string & name, const bool create = true );
-int readblock( const int fd, uint8_t * const buf, const int size );
-int writeblock( const int fd, const uint8_t * const buf, const int size );
void show_error( const char * const msg, const int errcode = 0,
const bool help = false );
void show_file_error( const char * const filename, const char * const msg,
diff --git a/testsuite/check.sh b/testsuite/check.sh
index 6bdf2d7..f6f989f 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -1,6 +1,6 @@
#! /bin/sh
# check script for Tarlz - Archiver with multimember lzip compression
-# Copyright (C) 2013-2018 Antonio Diaz Diaz.
+# Copyright (C) 2013-2019 Antonio Diaz Diaz.
#
# This script is free software: you have unlimited permission
# to copy, distribute and modify it.
@@ -38,6 +38,8 @@ test3="${testdir}"/test3.tar
test3_lz="${testdir}"/test3.tar.lz
test3dir_lz="${testdir}"/test3_dir.tar.lz
test3dot_lz="${testdir}"/test3_dot.tar.lz
+tarint1_lz="${testdir}"/tar_in_tlz1.tar.lz
+tarint2_lz="${testdir}"/tar_in_tlz2.tar.lz
t155="${testdir}"/t155.tar
t155_lz="${testdir}"/t155.tar.lz
tlzit1="${testdir}"/tlz_in_tar1.tar
@@ -58,11 +60,14 @@ fail=0
lwarn=0
test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
lzlib_1_11() { [ ${lwarn} = 0 ] &&
- printf "\nwarning: testing --keep-damaged requires lzlib-1.11-rc2 or newer\n$1"
+ printf "\nwarning: testing --keep-damaged requires lzlib-1.11 or newer\n$1"
lwarn=1 ; }
# Description of test files for tarlz:
-# t155.tar[.lz] directory + file + link + eof, all with 155 char names
+# test.txt.tar.lz: 1 member (test.txt).
+# t155.tar[.lz]: directory + file + link + eof, all with 155 char names
+# tar_in_tlz1.tar.lz 2 members (test.txt.tar test3.tar) 3 lzip members
+# tar_in_tlz2.tar.lz 2 members (test.txt.tar test3.tar) 5 lzip members
# test_bad1.tar.lz: truncated at offset 6000 (of 7495)
# test_bad2.tar.lz: byte at offset 6000 changed from 0x56 to 0x46
# test3.tar: 3 members (foo bar baz) + 2 zeroed 512-byte blocks
@@ -80,6 +85,9 @@ lzlib_1_11() { [ ${lwarn} = 0 ] &&
# test3_bad4.tar.lz: combined damage of test3_bad2.tar.lz and test3_bad3.tar.lz
# test3_bad5.tar.lz: [71-134] --> zeroed (first trailer + seconf header)
# test3_bad6.tar.lz: 510 zeros prepended to test3.tar.lz (header in two blocks)
+# test3_eof1.tar.lz: test3.tar.lz without eof blocks
+# test3_eof2.tar.lz: test3.tar.lz with only one eof block
+# test3_eof3.tar.lz: test3.tar.lz with one zeroed block between foo and bar
# tlz_in_tar1.tar: 1 member (test3.tar.lz) first magic damaged
# tlz_in_tar2.tar: 2 members (foo test3.tar.lz) first magic damaged
# ug32chars.tar.lz: 1 member (foo) with 32-character owner and group names
@@ -155,7 +163,7 @@ rm -f test.txt || framework_failure
"${TARLZ}" -xf "${in_tar}" --missing-crc || test_failed $LINENO
cmp "${in}" test.txt || test_failed $LINENO
rm -f test.txt || framework_failure
-
+#
printf "foo\n" > cfoo || framework_failure
printf "bar\n" > cbar || framework_failure
printf "baz\n" > cbaz || framework_failure
@@ -165,6 +173,7 @@ cmp cfoo foo || test_failed $LINENO
cmp cbar bar || test_failed $LINENO
cmp cbaz baz || test_failed $LINENO
rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -tf "${test3_lz}" ./foo ./bar ./baz || test_failed $LINENO
"${TARLZ}" -q -xf "${test3_lz}" ./foo ./bar ./baz || test_failed $LINENO
cmp cfoo foo || test_failed $LINENO
cmp cbar bar || test_failed $LINENO
@@ -180,6 +189,7 @@ cmp cfoo foo || test_failed $LINENO
cmp cbar bar || test_failed $LINENO
cmp cbaz baz || test_failed $LINENO
rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -tf "${test3dot_lz}" foo bar baz || test_failed $LINENO
"${TARLZ}" -q -xf "${test3dot_lz}" foo bar baz || test_failed $LINENO
cmp cfoo foo || test_failed $LINENO
cmp cbar bar || test_failed $LINENO
@@ -190,11 +200,75 @@ cmp cfoo dir/foo || test_failed $LINENO
cmp cbar dir/bar || test_failed $LINENO
cmp cbaz dir/baz || test_failed $LINENO
rm -rf dir || framework_failure
+"${TARLZ}" -q -tf "${test3dir_lz}" dir/foo dir/bar dir/baz || test_failed $LINENO
"${TARLZ}" -q -xf "${test3dir_lz}" dir/foo dir/bar dir/baz || test_failed $LINENO
cmp cfoo dir/foo || test_failed $LINENO
cmp cbar dir/bar || test_failed $LINENO
cmp cbaz dir/baz || test_failed $LINENO
rm -rf dir || framework_failure
+#
+"${TARLZ}" -q -tf "${testdir}"/test3_eof1.tar.lz
+[ $? = 2 ] || test_failed $LINENO
+"${TARLZ}" -q -tf "${testdir}"/test3_eof2.tar.lz || test_failed $LINENO
+"${TARLZ}" -q -tf "${testdir}"/test3_eof3.tar.lz || test_failed $LINENO
+"${TARLZ}" -q -n0 -tf "${testdir}"/test3_eof1.tar.lz
+[ $? = 2 ] || test_failed $LINENO
+"${TARLZ}" -q -n0 -tf "${testdir}"/test3_eof2.tar.lz || test_failed $LINENO
+"${TARLZ}" -q -n0 -tf "${testdir}"/test3_eof3.tar.lz || test_failed $LINENO
+#
+"${TARLZ}" -q -xf "${testdir}"/test3_eof1.tar.lz
+[ $? = 2 ] || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -xf "${testdir}"/test3_eof2.tar.lz || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -xf "${testdir}"/test3_eof3.tar.lz || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+[ ! -e baz ] || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+#
+"${TARLZ}" -q -n0 -xf "${testdir}"/test3_eof1.tar.lz
+[ $? = 2 ] || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -n0 -xf "${testdir}"/test3_eof2.tar.lz || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -n0 -xf "${testdir}"/test3_eof3.tar.lz || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+[ ! -e baz ] || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+#
+for i in "${tarint1_lz}" "${tarint2_lz}" ; do
+ for j in 0 2 6 ; do
+ "${TARLZ}" -tf "$i" --threads=$j > out$j ||
+ test_failed $LINENO "$i $j"
+ "${TARLZ}" -tvf "$i" --threads=$j > outv$j ||
+ test_failed $LINENO "$i $j"
+ done
+ cmp out0 out2 || test_failed $LINENO
+ cmp out0 out6 || test_failed $LINENO
+ cmp out2 out6 || test_failed $LINENO
+ cmp outv0 outv2 || test_failed $LINENO
+ cmp outv0 outv2 || test_failed $LINENO
+ cmp outv2 outv6 || test_failed $LINENO
+ rm -f out0 out2 out6 outv0 outv2 outv6 || framework_failure
+ "${TARLZ}" -xf "$i" || test_failed $LINENO
+ cmp "${in_tar}" test.txt.tar || test_failed $LINENO
+ cmp "${test3}" test3.tar || test_failed $LINENO
+ rm -f test.txt.tar test3.tar || framework_failure
+done
# test --concatenate
cat "${in_tar_lz}" > out.tar.lz || framework_failure
@@ -464,13 +538,13 @@ rm -f truncated.tar || framework_failure
rm -f test.txt || framework_failure
for i in "${inbad1}" "${inbad2}" ; do
"${TARLZ}" -q -xf "${i}.tar.lz"
- [ $? = 2 ] || test_failed $LINENO "${i}"
- [ ! -e test.txt ] || test_failed $LINENO "${i}"
+ [ $? = 2 ] || test_failed $LINENO "$i"
+ [ ! -e test.txt ] || test_failed $LINENO "$i"
rm -f test.txt || framework_failure
"${TARLZ}" -q -xf "${i}.tar.lz" --keep-damaged
- [ $? = 2 ] || test_failed $LINENO "${i}"
- [ -e test.txt ] || test_failed $LINENO "${i}"
- cmp "${i}" test.txt 2> /dev/null || lzlib_1_11 "$LINENO ${i}"
+ [ $? = 2 ] || test_failed $LINENO "$i"
+ [ -e test.txt ] || test_failed $LINENO "$i"
+ cmp "$i" test.txt 2> /dev/null || lzlib_1_11 "$LINENO $i"
rm -f test.txt || framework_failure
done
#
diff --git a/testsuite/tar_in_tlz1.tar.lz b/testsuite/tar_in_tlz1.tar.lz
new file mode 100644
index 0000000..bf04f25
--- /dev/null
+++ b/testsuite/tar_in_tlz1.tar.lz
Binary files differ
diff --git a/testsuite/tar_in_tlz2.tar.lz b/testsuite/tar_in_tlz2.tar.lz
new file mode 100644
index 0000000..de8453b
--- /dev/null
+++ b/testsuite/tar_in_tlz2.tar.lz
Binary files differ
diff --git a/testsuite/test3_eof1.tar.lz b/testsuite/test3_eof1.tar.lz
new file mode 100644
index 0000000..0eb86e4
--- /dev/null
+++ b/testsuite/test3_eof1.tar.lz
Binary files differ
diff --git a/testsuite/test3_eof2.tar.lz b/testsuite/test3_eof2.tar.lz
new file mode 100644
index 0000000..1f47953
--- /dev/null
+++ b/testsuite/test3_eof2.tar.lz
Binary files differ
diff --git a/testsuite/test3_eof3.tar.lz b/testsuite/test3_eof3.tar.lz
new file mode 100644
index 0000000..20ba9f8
--- /dev/null
+++ b/testsuite/test3_eof3.tar.lz
Binary files differ