18 files changed, 1473 insertions, 623 deletions
diff --git a/ChangeLog b/ChangeLog
index ccb672e..547430f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2019-02-13  Antonio Diaz Diaz  <antonio@gnu.org>
+
+	* Version 0.11 released.
+	* extract.cc (archive_read): Fixed endless loop with empty lz file.
+	* Implemented multi-threaded '-c, --create' and '-r, --append'.
+	* '--bsolid' is now the default compression granularity.
+	* create.cc (remove_leading_dotslash): Remember more than one prefix.
+	* tarlz.texi: Added new chapter 'Minimum archive sizes'.
+
 2019-01-31  Antonio Diaz Diaz  <antonio@gnu.org>
 
 	* Version 0.10 released.
diff --git a/Makefile.in b/Makefile.in
index 289818f..294d0ac 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -8,7 +8,8 @@ LIBS = -llz -lpthread
 SHELL = /bin/sh
 CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
 
-objs = arg_parser.o lzip_index.o create.o extended.o extract.o list_lz.o main.o
+objs = arg_parser.o lzip_index.o create.o create_lz.o extended.o extract.o \
+       list_lz.o main.o
 
 
 .PHONY : all install install-bin install-info install-man \
@@ -31,6 +32,7 @@ main.o : main.cc
 $(objs)      : Makefile
 arg_parser.o : arg_parser.h
 create.o     : arg_parser.h tarlz.h
+create_lz.o  : arg_parser.h tarlz.h
 extended.o   : tarlz.h
 extract.o    : arg_parser.h lzip_index.h tarlz.h
 list_lz.o    : arg_parser.h lzip_index.h tarlz.h
@@ -104,7 +106,7 @@ uninstall-man :
 
 dist : doc
 	ln -sf $(VPATH) $(DISTNAME)
-	tarlz --solid --owner=root --group=root -9cvf $(DISTNAME).tar.lz \
+	tarlz --solid --anonymous -9cvf $(DISTNAME).tar.lz \
 	  $(DISTNAME)/AUTHORS \
 	  $(DISTNAME)/COPYING \
 	  $(DISTNAME)/ChangeLog \
diff --git a/NEWS b/NEWS
index 223e417..13719c7 100644
--- a/NEWS
+++ b/NEWS
@@ -1,15 +1,15 @@
-Changes in version 0.10:
+Changes in version 0.11:
 
-The new option '--bsolid', which selects per-data-block compression of the
-archive, has been added. This option improves compression efficiency for
-archives with lots of small files.
+An endless loop happening when trying to list or extract from an empty
+tar.lz archive has been fixed.
 
-The new option '-B, --data-size', which sets the size of the input data
-blocks for '--bsolid', has been added.
+Multi-threaded '-c, --create' and '-r, --append' have been implemented.
 
-If an extended header is required for any reason (for example a file size
-larger than 8 GiB or a link name longer than 100 bytes), tarlz now moves the
-filename also to the extended header to prevent an ustar tool from trying to
-extract the file or link. This also makes easier during parallel extraction
-or listing the detection of a tar member split between two lzip members at
-the boundary between the extended header and the ustar header.
+The default compression granularity has been changed to '--bsolid'
+(per block compression) instead of '--no-solid' (per file compression).
+
+The message "Removing leading '<prefix>' from member names." is now shown
+once for each <prefix>.
+
+The new chapter 'Minimum archive sizes required for multi-threaded block
+compression' has been added to the manual.
diff --git a/README b/README
index 5a9a673..5de36bc 100644
--- a/README
+++ b/README
@@ -1,21 +1,21 @@
 Description
 
-Tarlz is a combined implementation of the tar archiver and the lzip
-compressor. By default tarlz creates, lists and extracts archives in a
-simplified posix pax format compressed with lzip on a per file basis. Each
-tar member is compressed in its own lzip member, as well as the end-of-file
-blocks. This method adds an indexed lzip layer on top of the tar archive,
-making it possible to decode the archive safely in parallel. The resulting
-multimember tar.lz archive is fully backward compatible with standard tar
-tools like GNU tar, which treat it like any other tar.lz archive. Tarlz can
-append files to the end of such compressed archives.
-
-Tarlz can create tar archives with four levels of compression granularity;
-per file, per directory, appendable solid, and solid.
-
-Of course, compressing each file (or each directory) individually is
-less efficient than compressing the whole tar archive, but it has the
-following advantages:
+Tarlz is a massively parallel (multi-threaded) combined implementation of
+the tar archiver and the lzip compressor. Tarlz creates, lists and extracts
+archives in a simplified posix pax format compressed with lzip, keeping the
+alignment between tar members and lzip members. This method adds an indexed
+lzip layer on top of the tar archive, making it possible to decode the
+archive safely in parallel. The resulting multimember tar.lz archive is
+fully backward compatible with standard tar tools like GNU tar, which treat
+it like any other tar.lz archive. Tarlz can append files to the end of such
+compressed archives.
+
+Tarlz can create tar archives with five levels of compression granularity;
+per file, per block, per directory, appendable solid, and solid.
+
+Of course, compressing each file (or each directory) individually can't
+achieve a compression ratio as high as compressing solidly the whole tar
+archive, but it has the following advantages:
 
    * The resulting multimember tar.lz archive can be decompressed in
      parallel, multiplying the decompression speed.
@@ -48,14 +48,15 @@ potentially much worse that undetected corruption in the data. Even more so
 in the case of pax because the amount of metadata it stores is potentially
 large, making undetected corruption more probable.
 
-Because of the above, tarlz protects the extended records with a CRC in
-a way compatible with standard tar tools.
+Because of the above, tarlz protects the extended records with a CRC in a
+way compatible with standard tar tools.
 
 Tarlz does not understand other tar formats like gnu, oldgnu, star or v7.
 
-The diagram below shows the correspondence between each tar member
-(formed by one or two headers plus optional data) in the tar archive and
-each lzip member in the resulting multimember tar.lz archive:
+The diagram below shows the correspondence between each tar member (formed
+by one or two headers plus optional data) in the tar archive and each lzip
+member in the resulting multimember tar.lz archive, when per file
+compression is used:
 
 tar
 +========+======+=================+===============+========+======+========+
diff --git a/configure b/configure
index a7cccfc..3c7bda9 100755
--- a/configure
+++ b/configure
@@ -6,7 +6,7 @@
 # to copy, distribute and modify it.
 
 pkgname=tarlz
-pkgversion=0.10a
+pkgversion=0.11
 progname=tarlz
 srctrigger=doc/${pkgname}.texi
 
diff --git a/create.cc b/create.cc
index 813923a..1cc7cb3 100644
--- a/create.cc
+++ b/create.cc
@@ -43,18 +43,53 @@
 
 const CRC32 crc32c( true );
 
-int cl_owner = -1;		// global vars needed by add_member
+int cl_owner = -1;			// global vars needed by add_member
 int cl_group = -1;
 int cl_data_size = 0;
-Solidity solidity = no_solid;
+Solidity solidity = bsolid;
 
 namespace {
 
-LZ_Encoder * encoder = 0;	// local vars needed by add_member
+LZ_Encoder * encoder = 0;		// local vars needed by add_member
 const char * archive_namep = 0;
-unsigned long long partial_data_size = 0;	// current block size
-int outfd = -1;
-int gretval = 0;
+unsigned long long partial_data_size = 0;	// size of current block
+Resizable_buffer grbuf( 2 * header_size );	// extended header + data
+int goutfd = -1;
+int error_status = 0;
+
+class File_is_the_archive
+  {
+  dev_t archive_dev;
+  ino_t archive_ino;
+  bool initialized;
+
+public:
+  File_is_the_archive() : initialized( false ) {}
+  bool init( const int fd )
+    {
+    struct stat st;
+    if( fstat( fd, &st ) != 0 ) return false;
+    if( S_ISREG( st.st_mode ) )
+      { archive_dev = st.st_dev; archive_ino = st.st_ino; initialized = true; }
+    return true;
+    }
+  bool operator()( const struct stat & st ) const
+    {
+    return initialized && archive_dev == st.st_dev && archive_ino == st.st_ino;
+    }
+  } file_is_the_archive;
+
+
+bool option_C_after_relative_filename( const Arg_parser & parser )
+  {
+  for( int i = 0; i < parser.arguments(); ++i )
+    if( !parser.code( i ) && parser.argument( i ).size() &&
+        parser.argument( i )[0] != '/' )	// relative_filename
+      while( ++i < parser.arguments() )
+        if( parser.code( i ) == 'C' ) return true;
+  return false;
+  }
+
 
 int seek_read( const int fd, uint8_t * const buf, const int size,
                const long long pos )
@@ -151,33 +186,14 @@ bool check_appendable( const int fd, const bool remove_eof )
   }
 
 
-class File_is_the_archive
-  {
-  dev_t archive_dev;
-  ino_t archive_ino;
-  bool initialized;
-
-public:
-  File_is_the_archive() : initialized( false ) {}
-  bool init( const int fd )
-    {
-    struct stat st;
-    if( fstat( fd, &st ) != 0 ) return false;
-    if( S_ISREG( st.st_mode ) )
-      { archive_dev = st.st_dev; archive_ino = st.st_ino; initialized = true; }
-    return true;
-    }
-  bool operator()( const struct stat & st ) const
-    {
-    return initialized && archive_dev == st.st_dev && archive_ino == st.st_ino;
-    }
-  } file_is_the_archive;
-
-
 bool archive_write( const uint8_t * const buf, const int size )
   {
+  static bool flushed = true;		// avoid flushing empty lzip members
+
+  if( size <= 0 && flushed ) return true;
+  flushed = ( size <= 0 );
   if( !encoder )					// uncompressed
-    return ( writeblock( outfd, buf, size ) == size );
+    return ( writeblock( goutfd, buf, size ) == size );
   enum { obuf_size = 65536 };
   uint8_t obuf[obuf_size];
   int sz = 0;
@@ -191,7 +207,7 @@ bool archive_write( const uint8_t * const buf, const int size )
     const int rd = LZ_compress_read( encoder, obuf, obuf_size );
     if( rd < 0 ) internal_error( "library error (LZ_compress_read)." );
     if( rd == 0 && sz >= size ) break;
-    if( writeblock( outfd, obuf, rd ) != rd ) return false;
+    if( writeblock( goutfd, obuf, rd ) != rd ) return false;
     }
   if( LZ_compress_finished( encoder ) == 1 &&
       LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 )
@@ -200,103 +216,17 @@ bool archive_write( const uint8_t * const buf, const int size )
   }
 
 
-void init_tar_header( Tar_header header )	// set magic and version
-  {
-  std::memset( header, 0, header_size );
-  std::memcpy( header + magic_o, ustar_magic, magic_l - 1 );
-  header[version_o] = header[version_o+1] = '0';
-  }
-
-
-unsigned char xdigit( const unsigned value )
-  {
-  if( value <= 9 ) return '0' + value;
-  if( value <= 15 ) return 'A' + value - 10;
-  return 0;
-  }
-
-void print_hex( char * const buf, int size, unsigned long long num )
-  {
-  while( --size >= 0 ) { buf[size] = xdigit( num & 0x0F ); num >>= 4; }
-  }
-
-void print_octal( uint8_t * const buf, int size, unsigned long long num )
-  {
-  while( --size >= 0 ) { buf[size] = '0' + ( num % 8 ); num /= 8; }
-  }
-
 bool write_extended( const Extended & extended )
   {
-  const int path_rec = extended.recsize_path();
-  const int lpath_rec = extended.recsize_linkpath();
-  const int size_rec = extended.recsize_file_size();
-  const unsigned long long edsize = extended.edsize();
-  const unsigned long long bufsize = extended.edsize_pad();
-  if( edsize >= 1ULL << 33 ) return false;	// too much extended data
-  if( bufsize == 0 ) return edsize == 0;	// overflow or no extended data
-  char * const buf = new char[bufsize+1];	// extended records buffer
-  unsigned long long pos = path_rec;		// goto can't cross these
-  const unsigned crc_size = Extended::crc_record.size();
-
-  if( path_rec && snprintf( buf, path_rec + 1, "%d path=%s\n",
-                            path_rec, extended.path().c_str() ) != path_rec )
-    goto error;
-  if( lpath_rec && snprintf( buf + pos, lpath_rec + 1, "%d linkpath=%s\n",
-                     lpath_rec, extended.linkpath().c_str() ) != lpath_rec )
-    goto error;
-  pos += lpath_rec;
-  if( size_rec && snprintf( buf + pos, size_rec + 1, "%d size=%llu\n",
-                            size_rec, extended.file_size() ) != size_rec )
-    goto error;
-  pos += size_rec;
-  std::memcpy( buf + pos, Extended::crc_record.c_str(), crc_size );
-  pos += crc_size;
-  if( pos != edsize ) goto error;
-  print_hex( buf + edsize - 9, 8,
-             crc32c.windowed_crc( (const uint8_t *)buf, edsize - 9, edsize ) );
-  std::memset( buf + edsize, 0, bufsize - edsize );	// wipe padding
-  Tar_header header;				// extended header
-  init_tar_header( header );
-  header[typeflag_o] = tf_extended;		// fill only required fields
-  print_octal( header + size_o, size_l - 1, edsize );
-  print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
-  if( !archive_write( header, header_size ) ) goto error;
-  for( pos = 0; pos < bufsize; )	// write extended records to archive
+  const long long ebsize = extended.format_block( grbuf );
+  if( ebsize < 0 ) return false;
+  for( long long pos = 0; pos < ebsize; )    // write extended block to archive
     {
-    int size = std::min( bufsize - pos, 1ULL << 20 );
-    if( !archive_write( (const uint8_t *)buf + pos, size ) ) goto error;
+    int size = std::min( ebsize - pos, 1LL << 20 );
+    if( !archive_write( (const uint8_t *)grbuf() + pos, size ) ) return false;
     pos += size;
     }
-  delete[] buf;
   return true;
-error:
-  delete[] buf;
-  return false;
-  }
-
-
-const char * remove_leading_dotdot( const char * const filename )
-  {
-  static std::string prefix;
-  const char * p = filename;
-
-  for( int i = 0; filename[i]; ++i )
-    if( filename[i] == '.' && filename[i+1] == '.' &&
-        ( i == 0 || filename[i-1] == '/' ) &&
-        ( filename[i+2] == 0 || filename[i+2] == '/' ) ) p = filename + i + 2;
-  while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p;
-  if( p != filename )
-    {
-    std::string msg( filename, p - filename );
-    if( prefix != msg )
-      {
-      prefix = msg;
-      msg = "Removing leading '"; msg += prefix; msg += "' from member names.";
-      show_error( msg.c_str() );
-      }
-    }
-  if( *p == 0 ) p = ".";
-  return p;
   }
 
 
@@ -304,7 +234,7 @@ const char * remove_leading_dotdot( const char * const filename )
 bool store_name( const char * const filename, Extended & extended,
                  Tar_header header, const bool force_extended_name )
   {
-  const char * const stored_name = remove_leading_dotdot( filename );
+  const char * const stored_name = remove_leading_dotslash( filename, true );
 
   if( !force_extended_name )	// try storing filename in the ustar header
     {
@@ -327,18 +257,113 @@ bool store_name( const char * const filename, Extended & extended,
   }
 
 
+// add one tar member to the archive
 int add_member( const char * const filename, const struct stat *,
                 const int flag, struct FTW * )
   {
+  unsigned long long file_size = 0;
+  Extended extended;		// metadata for extended records
+  Tar_header header;
+  if( !fill_headers( filename, extended, header, file_size, flag ) ) return 0;
+  const int infd = file_size ? open_instream( filename ) : -1;
+  if( file_size && infd < 0 ) { set_error_status( 1 ); return 0; }
+
+  if( encoder && solidity == bsolid &&
+      block_is_full( extended, file_size, partial_data_size ) &&
+      !archive_write( 0, 0 ) )
+    { show_error( "Error flushing encoder", errno ); return 1; }
+
+  if( !write_extended( extended ) )
+    { show_error( "Error writing extended header", errno ); return 1; }
+  if( !archive_write( header, header_size ) )
+    { show_error( "Error writing ustar header", errno ); return 1; }
+  if( file_size )
+    {
+    enum { bufsize = 32 * header_size };
+    uint8_t buf[bufsize];
+    unsigned long long rest = file_size;
+    while( rest > 0 )
+      {
+      int size = std::min( rest, (unsigned long long)bufsize );
+      const int rd = readblock( infd, buf, size );
+      rest -= rd;
+      if( rd != size )
+        {
+        if( verbosity >= 0 )
+          std::fprintf( stderr, "File '%s' ends unexpectedly at pos %llu\n",
+                        filename, file_size - rest );
+        close( infd ); return 1;
+        }
+      if( rest == 0 )				// last read
+        {
+        const int rem = file_size % header_size;
+        if( rem > 0 )
+          { const int padding = header_size - rem;
+            std::memset( buf + size, 0, padding ); size += padding; }
+        }
+      if( !archive_write( buf, size ) )
+        { show_error( "Error writing archive", errno ); close( infd );
+          return 1; }
+      }
+    if( close( infd ) != 0 )
+      { show_file_error( filename, "Error closing file", errno ); return 1; }
+    }
+  if( encoder && solidity == no_solid && !archive_write( 0, 0 ) )
+    { show_error( "Error flushing encoder", errno ); return 1; }
+  if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
+  return 0;
+  }
+
+} // end namespace
+
+
+/* Removes any amount of leading "./" and '/' strings from filename.
+   Optionally also removes prefixes containing a ".." component. */
+const char * remove_leading_dotslash( const char * const filename,
+                                      const bool dotdot )
+  {
+  // prevent two threads from modifying the list of prefixes at the same time
+  static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+  static std::vector< std::string > prefixes;	// list of prefixes
+  const char * p = filename;
+
+  if( dotdot )
+    for( int i = 0; filename[i]; ++i )
+      if( filename[i] == '.' && filename[i+1] == '.' &&
+          ( i == 0 || filename[i-1] == '/' ) &&
+          ( filename[i+2] == 0 || filename[i+2] == '/' ) )
+        p = filename + i + 2;
+  while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p;
+  if( p != filename )
+    {
+    std::string msg( filename, p - filename );
+    unsigned i = 0;
+    xlock( &mutex );
+    while( i < prefixes.size() && prefixes[i] != msg ) ++i;
+    if( i >= prefixes.size() )
+      {
+      prefixes.push_back( msg );
+      msg.insert( 0, "Removing leading '" ); msg += "' from member names.";
+      show_error( msg.c_str() );
+      }
+    xunlock( &mutex );
+    }
+  if( *p == 0 && *filename != 0 ) p = ".";
+  return p;
+  }
+
+
+bool fill_headers( const char * const filename, Extended & extended,
+                   Tar_header header, unsigned long long & file_size,
+                   const int flag )
+  {
   struct stat st;
   if( lstat( filename, &st ) != 0 )
     { show_file_error( filename, "Can't stat input file", errno );
-      gretval = 1; return 0; }
+      set_error_status( 1 ); return false; }
   if( file_is_the_archive( st ) )
     { show_file_error( archive_namep, "File is the archive; not dumped." );
-      return 0; }
-  Extended extended;		// metadata for extended records
-  Tar_header header;
+      return false; }
   init_tar_header( header );
   bool force_extended_name = false;
 
@@ -350,15 +375,14 @@ int add_member( const char * const filename, const struct stat *,
   const gid_t gid = ( cl_group >= 0 ) ? (gid_t)cl_group : st.st_gid;
   if( uid >= 2 << 20 || gid >= 2 << 20 )
     { show_file_error( filename, "uid or gid is larger than 2_097_151." );
-      gretval = 1; return 0; }
+      set_error_status( 1 ); return false; }
   print_octal( header + uid_o, uid_l - 1, uid );
   print_octal( header + gid_o, gid_l - 1, gid );
   const long long mtime = st.st_mtime;			// shut up gcc
   if( mtime < 0 || mtime >= 1LL << 33 )
     { show_file_error( filename, "mtime is out of ustar range [0, 8_589_934_591]." );
-      gretval = 1; return 0; }
+      set_error_status( 1 ); return false; }
   print_octal( header + mtime_o, mtime_l - 1, mtime );
-  unsigned long long file_size = 0;
   Typeflag typeflag;
   if( S_ISREG( mode ) ) { typeflag = tf_regular; file_size = st.st_size; }
   else if( S_ISDIR( mode ) )
@@ -366,7 +390,7 @@ int add_member( const char * const filename, const struct stat *,
     typeflag = tf_directory;
     if( flag == FTW_DNR )
       { show_file_error( filename, "Can't open directory", errno );
-        gretval = 1; return 0; }
+        set_error_status( 1 ); return false; }
     }
   else if( S_ISLNK( mode ) )
     {
@@ -384,94 +408,74 @@ int add_member( const char * const filename, const struct stat *,
       }
     if( len != st.st_size )
       { show_file_error( filename, "Error reading link", (len < 0) ? errno : 0 );
-        gretval = 1; return 0; }
+        set_error_status( 1 ); return false; }
     }
   else if( S_ISCHR( mode ) || S_ISBLK( mode ) )
     {
     typeflag = S_ISCHR( mode ) ? tf_chardev : tf_blockdev;
     if( major( st.st_dev ) >= 2 << 20 || minor( st.st_dev ) >= 2 << 20 )
       { show_file_error( filename, "devmajor or devminor is larger than 2_097_151." );
-        gretval = 1; return 0; }
+        set_error_status( 1 ); return false; }
     print_octal( header + devmajor_o, devmajor_l - 1, major( st.st_dev ) );
     print_octal( header + devminor_o, devminor_l - 1, minor( st.st_dev ) );
     }
   else if( S_ISFIFO( mode ) ) typeflag = tf_fifo;
   else { show_file_error( filename, "Unknown file type." );
-         gretval = 2; return 0; }
+         set_error_status( 2 ); return false; }
   header[typeflag_o] = typeflag;
+  errno = 0;
   const struct passwd * const pw = getpwuid( uid );
   if( pw && pw->pw_name )
     std::strncpy( (char *)header + uname_o, pw->pw_name, uname_l - 1 );
+  else { show_file_error( filename, "Can't read user name from database", errno );
+         set_error_status( 1 ); }
+  errno = 0;
   const struct group * const gr = getgrgid( gid );
   if( gr && gr->gr_name )
     std::strncpy( (char *)header + gname_o, gr->gr_name, gname_l - 1 );
+  else { show_file_error( filename, "Can't read group name from database", errno );
+         set_error_status( 1 ); }
   if( file_size >= 1ULL << 33 )
     { extended.file_size( file_size ); force_extended_name = true; }
   else print_octal( header + size_o, size_l - 1, file_size );
   store_name( filename, extended, header, force_extended_name );
   print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
+  return true;
+  }
 
-  const int infd = file_size ? open_instream( filename ) : -1;
-  if( file_size && infd < 0 ) { gretval = 1; return 0; }
-  if( encoder && solidity == bsolid )
-    {
-    const unsigned long long member_size =
-      header_size + extended.full_size() + round_up( file_size );
-    const unsigned long long target_size = cl_data_size;
-    if( partial_data_size >= target_size ||
-        ( partial_data_size >= min_data_size &&
-          partial_data_size + member_size / 2 > target_size ) )
-      {
-      partial_data_size = member_size;
-      if( !archive_write( 0, 0 ) )
-        { show_error( "Error flushing encoder", errno ); return 1; }
-      }
-    else partial_data_size += member_size;
-    }
 
-  if( !extended.empty() && !write_extended( extended ) )
-    { show_error( "Error writing extended header", errno ); return 1; }
-  if( !archive_write( header, header_size ) )
-    { show_error( "Error writing ustar header", errno ); return 1; }
-  if( file_size )
-    {
-    enum { bufsize = 32 * header_size };
-    uint8_t buf[bufsize];
-    unsigned long long rest = file_size;
-    while( rest > 0 )
-      {
-      int size = std::min( rest, (unsigned long long)bufsize );
-      const int rd = readblock( infd, buf, size );
-      rest -= rd;
-      if( rd != size )
-        {
-        if( verbosity >= 0 )
-          std::fprintf( stderr, "File '%s' ends unexpectedly at pos %llu\n",
-                        filename, file_size - rest );
-        close( infd ); return 1;
-        }
-      if( rest == 0 )				// last read
-        {
-        const int rem = file_size % header_size;
-        if( rem > 0 )
-          { const int padding = header_size - rem;
-            std::memset( buf + size, 0, padding ); size += padding; }
-        }
-      if( !archive_write( buf, size ) )
-        { show_error( "Error writing archive", errno ); close( infd );
-          return 1; }
-      }
-    if( close( infd ) != 0 )
-      { show_file_error( filename, "Error closing file", errno ); return 1; }
-    }
-  if( encoder && solidity == no_solid && !archive_write( 0, 0 ) )
-    { show_error( "Error flushing encoder", errno ); return 1; }
-  if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
-  return 0;
+bool block_is_full( const Extended & extended,
+                    const unsigned long long file_size,
+                    unsigned long long & partial_data_size )
+  {
+  const unsigned long long member_size =
+    header_size + extended.full_size() + round_up( file_size );
+  const unsigned long long target_size = cl_data_size;
+  if( partial_data_size >= target_size ||
+      ( partial_data_size >= min_data_size &&
+        partial_data_size + member_size / 2 > target_size ) )
+    { partial_data_size = member_size; return true; }
+  partial_data_size += member_size; return false;
   }
 
-} // end namespace
 
+void set_error_status( const int retval )
+  {
+  // prevent two threads from modifying the error_status at the same time
+  static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+
+  xlock( &mutex );
+  if( error_status < retval ) error_status = retval;
+  xunlock( &mutex );
+  }
+
+int final_exit_status( int retval )
+  {
+  if( !retval && error_status )
+    { show_error( "Exiting with failure status due to previous errors." );
+      retval = error_status; }
+  return retval;
+  }
 
 unsigned ustar_chksum( const uint8_t * const header )
   {
@@ -495,7 +499,8 @@ int concatenate( const std::string & archive_name, const Arg_parser & parser,
   if( archive_name.empty() )
     { show_error( "'--concatenate' is incompatible with '-f -'.", 0, true );
       return 1; }
-  if( ( outfd = open_outstream( archive_name, false ) ) < 0 ) return 1;
+  const int outfd = open_outstream( archive_name, false );
+  if( outfd < 0 ) return 1;
   if( !file_is_the_archive.init( outfd ) )
     { show_file_error( archive_name.c_str(), "Can't stat", errno ); return 1; }
 
@@ -503,6 +508,7 @@ int concatenate( const std::string & archive_name, const Arg_parser & parser,
   for( int i = 0; i < parser.arguments(); ++i )		// copy archives
     {
     if( parser.code( i ) ) continue;			// skip options
+    if( parser.argument( i ).empty() ) continue;	// skip empty names
     const char * const filename = parser.argument( i ).c_str();
     const int infd = open_instream( filename );
     if( infd < 0 )
@@ -531,7 +537,8 @@ int concatenate( const std::string & archive_name, const Arg_parser & parser,
 
 
 int encode( const std::string & archive_name, const Arg_parser & parser,
-            const int filenames, const int level, const bool append )
+            const int filenames, const int level, const int num_workers,
+            const int debug_level, const bool append )
   {
   struct Lzma_options
     {
@@ -557,8 +564,8 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
     if( !filenames )
       { show_error( "Cowardly refusing to create an empty archive.", 0, true );
         return 1; }
-    if( archive_name.empty() ) outfd = STDOUT_FILENO;
-    else if( ( outfd = open_outstream( archive_name ) ) < 0 ) return 1;
+    if( archive_name.empty() ) goutfd = STDOUT_FILENO;
+    else if( ( goutfd = open_outstream( archive_name ) ) < 0 ) return 1;
     }
   else
     {
@@ -570,14 +577,14 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
     if( !compressed )
       { show_error( "'--append' is incompatible with '--uncompressed'.", 0, true );
         return 1; }
-    if( ( outfd = open_outstream( archive_name, false ) ) < 0 ) return 1;
-    if( !check_appendable( outfd, true ) )
+    if( ( goutfd = open_outstream( archive_name, false ) ) < 0 ) return 1;
+    if( !check_appendable( goutfd, true ) )
       { show_error( "This does not look like an appendable tar.lz archive." );
         return 2; }
     }
 
   archive_namep = archive_name.size() ? archive_name.c_str() : "(stdout)";
-  if( !file_is_the_archive.init( outfd ) )
+  if( !file_is_the_archive.init( goutfd ) )
     { show_file_error( archive_namep, "Can't stat", errno ); return 1; }
 
   if( compressed )
@@ -588,12 +595,22 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
       if( level == 0 ) cl_data_size = 1 << 20;
       else cl_data_size = 2 * dictionary_size;
       }
+    /* CWD is not per-thread; multi-threaded --create can't be used if a
+       -C option appears after a relative filename in the command line. */
+    if( solidity != asolid && solidity != solid && num_workers > 0 &&
+        !option_C_after_relative_filename( parser ) )
+      {
+      // show_file_error( archive_namep, "Multi-threaded --create" );
+      return encode_lz( archive_namep, parser, dictionary_size,
+                        option_mapping[level].match_len_limit, num_workers,
+                        goutfd, debug_level );
+      }
     encoder = LZ_compress_open( dictionary_size,
                 option_mapping[level].match_len_limit, LLONG_MAX );
     if( !encoder || LZ_compress_errno( encoder ) != LZ_ok )
       {
       if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error )
-        show_error( "Not enough memory. Try a lower compression level." );
+        show_error( mem_msg2 );
       else
         internal_error( "invalid argument to encoder." );
       return 1;
@@ -601,7 +618,7 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
     }
 
   int retval = 0;
-  for( int i = 0; i < parser.arguments(); ++i )		// write members
+  for( int i = 0; i < parser.arguments(); ++i )		// parse command line
     {
     const int code = parser.code( i );
     const std::string & arg = parser.argument( i );
@@ -610,17 +627,18 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
       { show_file_error( filename, "Error changing working directory", errno );
         retval = 1; break; }
     if( code ) continue;				// skip options
+    if( parser.argument( i ).empty() ) continue;	// skip empty names
     std::string deslashed;		// arg without trailing slashes
     unsigned len = arg.size();
     while( len > 1 && arg[len-1] == '/' ) --len;
     if( len < arg.size() )
       { deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); }
     struct stat st;
-    if( lstat( filename, &st ) != 0 )
+    if( lstat( filename, &st ) != 0 )	// filename from command line
       { show_file_error( filename, "Can't stat input file", errno );
-        if( gretval < 1 ) gretval = 1; }
+        set_error_status( 1 ); }
     else if( ( retval = nftw( filename, add_member, 16, FTW_PHYS ) ) != 0 )
-      break;			// write error
+      break;					// write error
     else if( encoder && solidity == dsolid && !archive_write( 0, 0 ) )
       { show_error( "Error flushing encoder", errno ); retval = 1; }
     }
@@ -630,7 +648,8 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
     enum { bufsize = 2 * header_size };
     uint8_t buf[bufsize];
     std::memset( buf, 0, bufsize );
-    if( encoder && ( solidity == asolid || solidity == bsolid ) &&
+    if( encoder &&
+        ( solidity == asolid || ( solidity == bsolid && partial_data_size ) ) &&
         !archive_write( 0, 0 ) )
       { show_error( "Error flushing encoder", errno ); retval = 1; }
     else if( !archive_write( buf, bufsize ) ||
@@ -640,12 +659,7 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
     }
   if( encoder && LZ_compress_close( encoder ) < 0 )
     { show_error( "LZ_compress_close failed." ); retval = 1; }
-  if( close( outfd ) != 0 && !retval )
+  if( close( goutfd ) != 0 && !retval )
     { show_error( "Error closing archive", errno ); retval = 1; }
-  if( retval && archive_name.size() && !append )
-    std::remove( archive_name.c_str() );
-  if( !retval && gretval )
-    { show_error( "Exiting with failure status due to previous errors." );
-      retval = gretval; }
-  return retval;
+  return final_exit_status( retval );
   }
diff --git a/create_lz.cc b/create_lz.cc
new file mode 100644
index 0000000..48c6a3a
--- /dev/null
+++ b/create_lz.cc
@@ -0,0 +1,560 @@
+/*  Tarlz - Archiver with multimember lzip compression
+    Copyright (C) 2013-2019 Antonio Diaz Diaz.
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <queue>
+#include <string>
+#include <vector>
+#include <pthread.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <ftw.h>
+#include <lzlib.h>
+
+#include "arg_parser.h"
+#include "tarlz.h"
+
+
+namespace {
+
+enum { max_packet_size = 1 << 20 };
+class Packet_courier;
+Packet_courier * courierp = 0;		// local vars needed by add_member
+unsigned long long partial_data_size = 0;	// size of current block
+
+
+struct Ipacket			// filename, file size and headers
+  {
+  const unsigned long long file_size;
+  const std::string filename;	// filename.empty() means end of lzip member
+  const Extended * const extended;
+  const uint8_t * const header;
+
+  Ipacket() : file_size( 0 ), extended( 0 ), header( 0 ) {}
+  Ipacket( const char * const name, const unsigned long long s,
+           const Extended * const ext, const uint8_t * const head )
+    : file_size( s ), filename( name ), extended( ext ), header( head ) {}
+  };
+
+struct Opacket		// compressed data to be written to the archive
+  {
+  const uint8_t * const data;	// data == 0 means end of lzip member
+  const int size;		// number of bytes in data (if any)
+
+  Opacket() : data( 0 ), size( 0 ) {}
+  Opacket( uint8_t * const d, const int s ) : data( d ), size( s ) {}
+  };
+
+
+class Packet_courier			// moves packets around
+  {
+public:
+  unsigned icheck_counter;
+  unsigned iwait_counter;
+  unsigned ocheck_counter;
+  unsigned owait_counter;
+private:
+  int receive_worker_id;	// worker queue currently receiving packets
+  int deliver_worker_id;	// worker queue currently delivering packets
+  Slot_tally slot_tally;		// limits the number of input packets
+  std::vector< std::queue< const Ipacket * > > ipacket_queues;
+  std::vector< std::queue< const Opacket * > > opacket_queues;
+  int num_working;			// number of workers still running
+  const int num_workers;		// number of workers
+  const unsigned out_slots;		// max output packets per queue
+  pthread_mutex_t imutex;
+  pthread_cond_t iav_or_eof;	// input packet available or grouper done
+  pthread_mutex_t omutex;
+  pthread_cond_t oav_or_exit;	// output packet available or all workers exited
+  std::vector< pthread_cond_t > slot_av;	// output slot available
+  bool eof;					// grouper done
+
+  Packet_courier( const Packet_courier & );	// declared as private
+  void operator=( const Packet_courier & );	// declared as private
+
+public:
+  Packet_courier( const int workers, const int in_slots, const int oslots )
+    : icheck_counter( 0 ), iwait_counter( 0 ),
+      ocheck_counter( 0 ), owait_counter( 0 ),
+      receive_worker_id( 0 ), deliver_worker_id( 0 ),
+      slot_tally( in_slots ), ipacket_queues( workers ),
+      opacket_queues( workers ), num_working( workers ),
+      num_workers( workers ), out_slots( oslots ), slot_av( workers ),
+      eof( false )
+    {
+    xinit_mutex( &imutex ); xinit_cond( &iav_or_eof );
+    xinit_mutex( &omutex ); xinit_cond( &oav_or_exit );
+    for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] );
+    }
+
+  ~Packet_courier()
+    {
+    for( unsigned i = 0; i < slot_av.size(); ++i ) xdestroy_cond( &slot_av[i] );
+    xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex );
+    xdestroy_cond( &iav_or_eof ); xdestroy_mutex( &imutex );
+    }
+
+  /* Receive an ipacket from grouper.
+     If filename.empty() (end of lzip member token), move to next queue. */
+  void receive_packet( const Ipacket * const ipacket )
+    {
+    if( ipacket->filename.size() )
+      slot_tally.get_slot();		// wait for a free slot
+    xlock( &imutex );
+    ipacket_queues[receive_worker_id].push( ipacket );
+    if( ipacket->filename.empty() && ++receive_worker_id >= num_workers )
+      receive_worker_id = 0;
+    xbroadcast( &iav_or_eof );
+    xunlock( &imutex );
+    }
+
+  // distribute an ipacket to a worker
+  const Ipacket * distribute_packet( const int worker_id )
+    {
+    const Ipacket * ipacket = 0;
+    xlock( &imutex );
+    ++icheck_counter;
+    while( ipacket_queues[worker_id].empty() && !eof )
+      {
+      ++iwait_counter;
+      xwait( &iav_or_eof, &imutex );
+      }
+    if( !ipacket_queues[worker_id].empty() )
+      {
+      ipacket = ipacket_queues[worker_id].front();
+      ipacket_queues[worker_id].pop();
+      }
+    xunlock( &imutex );
+    if( ipacket )
+      { if( ipacket->filename.size() ) slot_tally.leave_slot(); }
+    else
+      {
+      // notify muxer when last worker exits
+      xlock( &omutex );
+      if( --num_working == 0 ) xsignal( &oav_or_exit );
+      xunlock( &omutex );
+      }
+    return ipacket;
+    }
+
+  // collect an opacket from a worker
+  void collect_packet( const Opacket * const opacket, const int worker_id )
+    {
+    xlock( &omutex );
+    if( opacket->data )
+      {
+      while( opacket_queues[worker_id].size() >= out_slots )
+        xwait( &slot_av[worker_id], &omutex );
+      }
+    opacket_queues[worker_id].push( opacket );
+    if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit );
+    xunlock( &omutex );
+    }
+
+  /* Deliver an opacket to muxer.
+     If opacket data == 0, move to next queue and wait again. */
+  const Opacket * deliver_packet()
+    {
+    const Opacket * opacket = 0;
+    xlock( &omutex );
+    ++ocheck_counter;
+    while( true )
+      {
+      while( opacket_queues[deliver_worker_id].empty() && num_working > 0 )
+        {
+        ++owait_counter;
+        xwait( &oav_or_exit, &omutex );
+        }
+      if( opacket_queues[deliver_worker_id].empty() ) break;
+      opacket = opacket_queues[deliver_worker_id].front();
+      opacket_queues[deliver_worker_id].pop();
+      if( opacket_queues[deliver_worker_id].size() + 1 == out_slots )
+        xsignal( &slot_av[deliver_worker_id] );
+      if( opacket->data ) break;
+      if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0;
+      delete opacket; opacket = 0;
+      }
+    xunlock( &omutex );
+    return opacket;
+    }
+
+  void finish()			// grouper has no more packets to send
+    {
+    xlock( &imutex );
+    eof = true;
+    xbroadcast( &iav_or_eof );
+    xunlock( &imutex );
+    }
+
+  bool finished()		// all packets delivered to muxer
+    {
+    if( !slot_tally.all_free() || !eof || num_working != 0 ) return false;
+    for( int i = 0; i < num_workers; ++i )
+      if( !ipacket_queues[i].empty() ) return false;
+    for( int i = 0; i < num_workers; ++i )
+      if( !opacket_queues[i].empty() ) return false;
+    return true;
+    }
+  };
+
+
+// send one ipacket with tar member metadata to courier
+int add_member( const char * const filename, const struct stat *,
+                const int flag, struct FTW * )
+  {
+  unsigned long long file_size = 0;
+  // metadata for extended records
+  Extended * const extended = new( std::nothrow ) Extended;
+  uint8_t * const header = extended ? new( std::nothrow ) Tar_header : 0;
+  if( !header )
+    { show_error( mem_msg ); if( extended ) delete extended; return 1; }
+  if( !fill_headers( filename, *extended, header, file_size, flag ) )
+    { delete[] header; delete extended; return 0; }
+
+  if( solidity == bsolid &&
+      block_is_full( *extended, file_size, partial_data_size ) )
+    courierp->receive_packet( new Ipacket );		// end of group
+
+  courierp->receive_packet( new Ipacket( filename, file_size, extended, header ) );
+
+  if( solidity == no_solid )			// one tar member per group
+    courierp->receive_packet( new Ipacket );
+  if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
+  return 0;
+  }
+
+
+struct Grouper_arg
+  {
+  Packet_courier * courier;
+  const Arg_parser * parser;
+  };
+
+
+/* Package metadata of the files to be archived and pass them to the
+   courier for distribution to workers. */
+extern "C" void * grouper( void * arg )
+  {
+  const Grouper_arg & tmp = *(const Grouper_arg *)arg;
+  Packet_courier & courier = *tmp.courier;
+  const Arg_parser & parser = *tmp.parser;
+
+  for( int i = 0; i < parser.arguments(); ++i )		// parse command line
+    {
+    const int code = parser.code( i );
+    const std::string & arg = parser.argument( i );
+    const char * filename = arg.c_str();
+    if( code == 'C' && chdir( filename ) != 0 )
+      { show_file_error( filename, "Error changing working directory", errno );
+        cleanup_and_fail(); }
+    if( code ) continue;				// skip options
+    if( parser.argument( i ).empty() ) continue;	// skip empty names
+    std::string deslashed;		// arg without trailing slashes
+    unsigned len = arg.size();
+    while( len > 1 && arg[len-1] == '/' ) --len;
+    if( len < arg.size() )
+      { deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); }
+    struct stat st;
+    if( lstat( filename, &st ) != 0 )	// filename from command line
+      { show_file_error( filename, "Can't stat input file", errno );
+        set_error_status( 1 ); }
+    else if( nftw( filename, add_member, 16, FTW_PHYS ) != 0 )
+      cleanup_and_fail();			// write error or oom
+    else if( solidity == dsolid )		// end of group
+      courier.receive_packet( new Ipacket );
+    }
+
+  if( solidity == bsolid && partial_data_size )		// finish last block
+    { partial_data_size = 0; courierp->receive_packet( new Ipacket ); }
+  courier.finish();			// no more packets to send
+  return 0;
+  }
+
+
+/* Writes ibuf to encoder. To minimize dictionary size, it does not read
+   from encoder until encoder's input buffer is full or finish is true.
+   Sends opacket to courier and allocates new obuf each time obuf is full. */
+void loop_encode( const uint8_t * const ibuf, const int isize,
+                  uint8_t * & obuf, int & opos, Packet_courier & courier,
+                  LZ_Encoder * const encoder, const int worker_id,
+                  const bool finish = false )
+  {
+  int ipos = 0;
+  if( opos < 0 || opos > max_packet_size )
+    internal_error( "bad buffer index in loop_encode." );
+  while( true )
+    {
+    if( ipos < isize )
+      {
+      const int wr = LZ_compress_write( encoder, ibuf + ipos, isize - ipos );
+      if( wr < 0 ) internal_error( "library error (LZ_compress_write)." );
+      ipos += wr;
+      }
+    if( ipos >= isize )					// ibuf is empty
+      { if( finish ) LZ_compress_finish( encoder ); else break; }
+    const int rd =
+      LZ_compress_read( encoder, obuf + opos, max_packet_size - opos );
+    if( rd < 0 )
+      {
+      if( verbosity >= 0 )
+        std::fprintf( stderr, "LZ_compress_read error: %s\n",
+                      LZ_strerror( LZ_compress_errno( encoder ) ) );
+      cleanup_and_fail();
+      }
+    opos += rd;
+    // obuf is full or last opacket in lzip member
+    if( opos >= max_packet_size || LZ_compress_finished( encoder ) == 1 )
+      {
+      if( opos > max_packet_size )
+        internal_error( "opacket size exceeded in worker." );
+      courier.collect_packet( new Opacket( obuf, opos ), worker_id );
+      opos = 0; obuf = new( std::nothrow ) uint8_t[max_packet_size];
+      if( !obuf ) { show_error( mem_msg2 ); cleanup_and_fail(); }
+      if( LZ_compress_finished( encoder ) == 1 ) break;
+      }
+    }
+  if( ipos > isize ) internal_error( "ipacket size exceeded in worker." );
+  if( ipos < isize ) internal_error( "input not fully consumed in worker." );
+  }
+
+
+struct Worker_arg
+  {
+  Packet_courier * courier;
+  int dictionary_size;
+  int match_len_limit;
+  int worker_id;
+  };
+
+
+/* Get ipackets from courier, compress headers and file data, and give the
+   opackets produced to courier. */
+extern "C" void * cworker( void * arg )
+  {
+  const Worker_arg & tmp = *(const Worker_arg *)arg;
+  Packet_courier & courier = *tmp.courier;
+  const int dictionary_size = tmp.dictionary_size;
+  const int match_len_limit = tmp.match_len_limit;
+  const int worker_id = tmp.worker_id;
+
+  LZ_Encoder * encoder = 0;
+  uint8_t * data = 0;
+  Resizable_buffer rbuf( 2 * header_size );	// extended header + data
+  if( !rbuf.size() ) { show_error( mem_msg2 ); cleanup_and_fail(); }
+
+  int opos = 0;
+  while( true )
+    {
+    const Ipacket * const ipacket = courier.distribute_packet( worker_id );
+    if( !ipacket ) break;		// no more packets to process
+    if( ipacket->filename.empty() )	// end of group, flush encoder
+      {
+      if( !encoder ) { delete ipacket; continue; }	// nothing to flush
+      loop_encode( 0, 0, data, opos, courier, encoder, worker_id, true );
+      courier.collect_packet( new Opacket, worker_id );	// end of member token
+      if( LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 )
+        { show_error( "LZ_compress_restart_member failed." ); cleanup_and_fail(); }
+      delete ipacket; continue;
+      }
+
+    const int infd =
+      ipacket->file_size ? open_instream( ipacket->filename.c_str() ) : -1;
+    if( ipacket->file_size && infd < 0 )
+      { delete[] ipacket->header; delete ipacket->extended; delete ipacket;
+        set_error_status( 1 ); continue; }
+
+    if( !encoder )
+      {
+      data = new( std::nothrow ) uint8_t[max_packet_size];
+      encoder = LZ_compress_open( dictionary_size, match_len_limit, LLONG_MAX );
+      if( !data || !encoder || LZ_compress_errno( encoder ) != LZ_ok )
+        {
+        if( !data || !encoder || LZ_compress_errno( encoder ) == LZ_mem_error )
+          show_error( mem_msg2 );
+        else
+          internal_error( "invalid argument to encoder." );
+        cleanup_and_fail();
+        }
+      }
+
+    if( !ipacket->extended->empty() )		// compress extended block
+      {
+      const long long ebsize = ipacket->extended->format_block( rbuf );
+      if( ebsize < 0 )
+        { show_error( "Error formatting extended records." ); cleanup_and_fail(); }
+      /* Limit the size of the extended block to INT_MAX - 1 so that it can
+         be fed to lzlib as one buffer. */
+      if( ebsize >= INT_MAX )
+        { show_error( "Extended records size >= INT_MAX." ); cleanup_and_fail(); }
+      loop_encode( (const uint8_t *)rbuf(), ebsize, data, opos, courier,
+                   encoder, worker_id );
+      }
+    // compress ustar header
+    loop_encode( ipacket->header, header_size, data, opos, courier,
+                 encoder, worker_id );
+    delete[] ipacket->header; delete ipacket->extended;
+
+    if( ipacket->file_size )
+      {
+      enum { bufsize = 32 * header_size };
+      uint8_t buf[bufsize];
+      unsigned long long rest = ipacket->file_size;
+      while( rest > 0 )
+        {
+        int size = std::min( rest, (unsigned long long)bufsize );
+        const int rd = readblock( infd, buf, size );
+        rest -= rd;
+        if( rd != size )
+          {
+          if( verbosity >= 0 )
+            std::fprintf( stderr, "File '%s' ends unexpectedly at pos %llu\n",
+                          ipacket->filename.c_str(), ipacket->file_size - rest );
+          close( infd ); cleanup_and_fail();
+          }
+        if( rest == 0 )				// last read
+          {
+          const int rem = ipacket->file_size % header_size;
+          if( rem > 0 )
+            { const int padding = header_size - rem;
+              std::memset( buf + size, 0, padding ); size += padding; }
+          }
+        // compress size bytes of file
+        loop_encode( buf, size, data, opos, courier, encoder, worker_id );
+        }
+      if( close( infd ) != 0 )
+        { show_file_error( ipacket->filename.c_str(), "Error closing file", errno );
+          cleanup_and_fail(); }
+      }
+    delete ipacket;
+    }
+  if( data ) delete[] data;
+  if( encoder && LZ_compress_close( encoder ) < 0 )
+    { show_error( "LZ_compress_close failed." ); cleanup_and_fail(); }
+  return 0;
+  }
+
+
+/* Get from courier the processed and sorted packets, and write
+   their contents to the output archive. */
+bool muxer( Packet_courier & courier, const char * const archive_name,
+            const int outfd )
+  {
+  while( true )
+    {
+    const Opacket * const opacket = courier.deliver_packet();
+    if( !opacket ) break;	// queue is empty. all workers exited
+
+    const int wr = writeblock( outfd, opacket->data, opacket->size );
+    if( wr != opacket->size )
+      { show_file_error( archive_name, "Write error", errno ); return false; }
+    delete[] opacket->data;
+    delete opacket;
+    }
+  return true;
+  }
+
+} // end namespace
+
+
+// init the courier, then start the grouper and the workers and call the muxer
+int encode_lz( const char * const archive_name, const Arg_parser & parser,
+               const int dictionary_size, const int match_len_limit,
+               const int num_workers, const int outfd, const int debug_level )
+  {
+  const int in_slots = 65536;		// max small files (<=512B) in 64 MiB
+  const int total_in_slots = ( INT_MAX / num_workers >= in_slots ) ?
+                             num_workers * in_slots : INT_MAX;
+  const int out_slots = 64;
+
+  Packet_courier courier( num_workers, total_in_slots, out_slots );
+  courierp = &courier;			// needed by add_member
+
+  Grouper_arg grouper_arg;
+  grouper_arg.courier = &courier;
+  grouper_arg.parser = &parser;
+
+  pthread_t grouper_thread;
+  int errcode = pthread_create( &grouper_thread, 0, grouper, &grouper_arg );
+  if( errcode )
+    { show_error( "Can't create grouper thread", errcode ); return 1; }
+
+  Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
+  pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
+  if( !worker_args || !worker_threads ) { show_error( mem_msg ); return 1; }
+  for( int i = 0; i < num_workers; ++i )
+    {
+    worker_args[i].courier = &courier;
+    worker_args[i].dictionary_size = dictionary_size;
+    worker_args[i].match_len_limit = match_len_limit;
+    worker_args[i].worker_id = i;
+    errcode = pthread_create( &worker_threads[i], 0, cworker, &worker_args[i] );
+    if( errcode )
+      { show_error( "Can't create worker threads", errcode ); return 1; }
+    }
+
+  if( !muxer( courier, archive_name, outfd ) ) return 1;
+
+  for( int i = num_workers - 1; i >= 0; --i )
+    {
+    errcode = pthread_join( worker_threads[i], 0 );
+    if( errcode )
+      { show_error( "Can't join worker threads", errcode ); return 1; }
+    }
+  delete[] worker_threads;
+  delete[] worker_args;
+
+  errcode = pthread_join( grouper_thread, 0 );
+  if( errcode )
+    { show_error( "Can't join grouper thread", errcode ); return 1; }
+
+  // write End-Of-Archive records
+  int retval = 0;
+  enum { eof_member_size = 44 };
+  const uint8_t eof_member[eof_member_size] = {
+    0x4C, 0x5A, 0x49, 0x50, 0x01, 0x0C, 0x00, 0x00, 0x6F, 0xFD, 0xFF, 0xFF,
+    0xA3, 0xB7, 0x80, 0x0C, 0x82, 0xDB, 0xFF, 0xFF, 0x9F, 0xF0, 0x00, 0x00,
+    0x2E, 0xAF, 0xB5, 0xEF, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x2C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+  if( writeblock( outfd, eof_member, eof_member_size ) != eof_member_size )
+    { show_error( "Error writing end-of-archive blocks", errno );
+      retval = 1; }
+
+  if( close( outfd ) != 0 && !retval )
+    { show_error( "Error closing archive", errno ); retval = 1; }
+
+  if( debug_level & 1 )
+    std::fprintf( stderr,
+      "any worker tried to consume from grouper %8u times\n"
+      "any worker had to wait                   %8u times\n"
+      "muxer tried to consume from workers      %8u times\n"
+      "muxer had to wait                        %8u times\n",
+      courier.icheck_counter,
+      courier.iwait_counter,
+      courier.ocheck_counter,
+      courier.owait_counter );
+
+  if( !courier.finished() ) internal_error( "courier not finished." );
+  return final_exit_status( retval );
+  }
diff --git a/doc/tarlz.1 b/doc/tarlz.1
index c30c72f..82462cd 100644
--- a/doc/tarlz.1
+++ b/doc/tarlz.1
@@ -1,20 +1,20 @@
 .\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.46.1.
-.TH TARLZ "1" "February 2019" "tarlz 0.10a" "User Commands"
+.TH TARLZ "1" "February 2019" "tarlz 0.11" "User Commands"
 .SH NAME
 tarlz \- creates tar archives with multimember lzip compression
 .SH SYNOPSIS
 .B tarlz
 [\fI\,options\/\fR] [\fI\,files\/\fR]
 .SH DESCRIPTION
-Tarlz is a combined implementation of the tar archiver and the lzip
-compressor. By default tarlz creates, lists and extracts archives in a
-simplified posix pax format compressed with lzip on a per file basis. Each
-tar member is compressed in its own lzip member, as well as the end\-of\-file
-blocks. This method adds an indexed lzip layer on top of the tar archive,
-making it possible to decode the archive safely in parallel. The resulting
-multimember tar.lz archive is fully backward compatible with standard tar
-tools like GNU tar, which treat it like any other tar.lz archive. Tarlz can
-append files to the end of such compressed archives.
+Tarlz is a massively parallel (multi\-threaded) combined implementation of
+the tar archiver and the lzip compressor. Tarlz creates, lists and extracts
+archives in a simplified posix pax format compressed with lzip, keeping the
+alignment between tar members and lzip members. This method adds an indexed
+lzip layer on top of the tar archive, making it possible to decode the
+archive safely in parallel. The resulting multimember tar.lz archive is
+fully backward compatible with standard tar tools like GNU tar, which treat
+it like any other tar.lz archive. Tarlz can append files to the end of such
+compressed archives.
 .PP
 The tarlz file format is a safe posix\-style backup format. In case of
 corruption, tarlz can extract all the undamaged members from the tar.lz
@@ -46,7 +46,7 @@ change to directory <dir>
 use archive file <archive>
 .TP
 \fB\-n\fR, \fB\-\-threads=\fR<n>
-set number of decompression threads [2]
+set number of (de)compression threads [2]
 .TP
 \fB\-q\fR, \fB\-\-quiet\fR
 suppress all messages
@@ -70,13 +70,13 @@ set compression level [default 6]
 create solidly compressed appendable archive
 .TP
 \fB\-\-bsolid\fR
-create per\-data\-block compressed archive
+create per block compressed archive (default)
 .TP
 \fB\-\-dsolid\fR
-create per\-directory compressed archive
+create per directory compressed archive
 .TP
 \fB\-\-no\-solid\fR
-create per\-file compressed archive (default)
+create per file compressed archive
 .TP
 \fB\-\-solid\fR
 create solidly compressed archive
diff --git a/doc/tarlz.info b/doc/tarlz.info
index bf1e1f5..288c441 100644
--- a/doc/tarlz.info
+++ b/doc/tarlz.info
@@ -11,7 +11,7 @@ File: tarlz.info,  Node: Top,  Next: Introduction,  Up: (dir)
 Tarlz Manual
 ************
 
-This manual is for Tarlz (version 0.10, 31 January 2019).
+This manual is for Tarlz (version 0.11, 13 February 2019).
 
 * Menu:
 
@@ -20,6 +20,7 @@ This manual is for Tarlz (version 0.10, 31 January 2019).
 * File format::               Detailed format of the compressed archive
 * Amendments to pax format::  The reasons for the differences with pax
 * Multi-threaded tar::        Limitations of parallel tar decoding
+* Minimum archive sizes::     Sizes required for full multi-threaded speed
 * Examples::                  A small tutorial with examples
 * Problems::                  Reporting bugs
 * Concept index::             Index of concepts
@@ -36,23 +37,23 @@ File: tarlz.info,  Node: Introduction,  Next: Invoking tarlz,  Prev: Top,  Up: T
 1 Introduction
 **************
 
-Tarlz is a combined implementation of the tar archiver and the lzip
-compressor. By default tarlz creates, lists and extracts archives in a
-simplified posix pax format compressed with lzip on a per file basis.
-Each tar member is compressed in its own lzip member, as well as the
-end-of-file blocks. This method adds an indexed lzip layer on top of
-the tar archive, making it possible to decode the archive safely in
-parallel. The resulting multimember tar.lz archive is fully backward
-compatible with standard tar tools like GNU tar, which treat it like
-any other tar.lz archive. Tarlz can append files to the end of such
-compressed archives.
+Tarlz is a massively parallel (multi-threaded) combined implementation
+of the tar archiver and the lzip compressor. Tarlz creates, lists and
+extracts archives in a simplified posix pax format compressed with
+lzip, keeping the alignment between tar members and lzip members. This
+method adds an indexed lzip layer on top of the tar archive, making it
+possible to decode the archive safely in parallel. The resulting
+multimember tar.lz archive is fully backward compatible with standard
+tar tools like GNU tar, which treat it like any other tar.lz archive.
+Tarlz can append files to the end of such compressed archives.
 
-   Tarlz can create tar archives with four levels of compression
-granularity; per file, per directory, appendable solid, and solid.
+   Tarlz can create tar archives with five levels of compression
+granularity; per file, per block, per directory, appendable solid, and
+solid.
 
-Of course, compressing each file (or each directory) individually is
-less efficient than compressing the whole tar archive, but it has the
-following advantages:
+Of course, compressing each file (or each directory) individually can't
+achieve a compression ratio as high as compressing solidly the whole tar
+archive, but it has the following advantages:
 
    * The resulting multimember tar.lz archive can be decompressed in
      parallel, multiplying the decompression speed.
@@ -87,17 +88,23 @@ The format for running tarlz is:
 
      tarlz [OPTIONS] [FILES]
 
-On archive creation or appending, tarlz removes leading and trailing
-slashes from filenames, as well as filename prefixes containing a '..'
-component. On extraction, archive members containing a '..' component
-are skipped. Tarlz detects when the archive being created or enlarged
-is among the files to be dumped, appended or concatenated, and skips it.
+On archive creation or appending tarlz archives the files specified, but
+removes from member names any leading and trailing slashes and any
+filename prefixes containing a '..' component. On extraction, leading
+and trailing slashes are also removed from member names, and archive
+members containing a '..' component in the filename are skipped. Tarlz
+detects when the archive being created or enlarged is among the files
+to be dumped, appended or concatenated, and skips it.
 
    On extraction and listing, tarlz removes leading './' strings from
 member names in the archive or given in the command line, so that
 'tarlz -xf foo ./bar baz' extracts members 'bar' and './baz' from
 archive 'foo'.
 
+   If several compression levels or '--*solid' options are given, the
+last setting is used. For example '-9 --solid --uncompressed -1' is
+equivalent to '-1 --solid'
+
    tarlz supports the following options:
 
 '-h'
@@ -125,7 +132,7 @@ archive 'foo'.
      Set target size of input data blocks for the '--bsolid' option.
      Valid values range from 8 KiB to 1 GiB. Default value is two times
      the dictionary size, except for option '-0' where it defaults to
-     1 MiB.
+     1 MiB.  *Note Minimum archive sizes::.
 
 '-c'
 '--create'
@@ -142,6 +149,11 @@ archive 'foo'.
      relative to the then current working directory, perhaps changed by
      a previous '-C' option.
 
+     Note that a process can only have one current working directory
+     (CWD).  Therefore multi-threading can't be used to create an
+     archive if a '-C' option appears after a relative filename in the
+     command line.
+
 '-f ARCHIVE'
 '--file=ARCHIVE'
      Use archive file ARCHIVE. '-' used as an ARCHIVE argument reads
@@ -149,18 +161,21 @@ archive 'foo'.
 
 '-n N'
 '--threads=N'
-     Set the number of decompression threads, overriding the system's
+     Set the number of (de)compression threads, overriding the system's
      default.  Valid values range from 0 to "as many as your system can
      support". A value of 0 disables threads entirely. If this option
      is not used, tarlz tries to detect the number of processors in the
      system and use it as default value.  'tarlz --help' shows the
-     system's default value. This option currently only has effect when
-     listing the contents of a multimember compressed archive. *Note
+     system's default value. See the note about multi-threaded archive
+     creation in the '-C' option above.  Multi-threaded extraction of
+     files from an archive is not yet implemented.  *Note
      Multi-threaded tar::.
 
      Note that the number of usable threads is limited during
-     decompression to the number of lzip members in the tar.lz archive,
-     which you can find by running 'lzip -lv archive.tar.lz'.
+     compression to ceil( uncompressed_size / data_size ) (*note
+     Minimum archive sizes::), and during decompression to the number
+     of lzip members in the tar.lz archive, which you can find by
+     running 'lzip -lv archive.tar.lz'.
 
 '-q'
 '--quiet'
@@ -180,7 +195,7 @@ archive 'foo'.
 '-t'
 '--list'
      List the contents of an archive. If FILES are given, list only the
-     given FILES.
+     FILES given.
 
 '-v'
 '--verbose'
@@ -189,7 +204,7 @@ archive 'foo'.
 '-x'
 '--extract'
      Extract files from an archive. If FILES are given, extract only
-     the given FILES. Else extract all the files in the archive.
+     the FILES given. Else extract all the files in the archive.
 
 '-0 .. -9'
      Set the compression level. The default compression level is '-6'.
@@ -214,38 +229,43 @@ archive 'foo'.
      solid compression. All the files being added to the archive are
      compressed into a single lzip member, but the end-of-file blocks
      are compressed into a separate lzip member. This creates a solidly
-     compressed appendable archive.
+     compressed appendable archive.  Solid archives can't be created
+     nor decoded in parallel.
 
 '--bsolid'
-     When creating or appending to a compressed archive, compress tar
-     members together in a lzip member until they approximate a target
-     uncompressed size.  The size can't be exact because each solidly
-     compressed data block must contain an integer number of tar
-     members. This option improves compression efficiency for archives
-     with lots of small files. *Note --data-size::, to set the target
+     When creating or appending to a compressed archive, use block
+     compression.  Tar members are compressed together in a lzip member
+     until they approximate a target uncompressed size. The size can't
+     be exact because each solidly compressed data block must contain
+     an integer number of tar members. Block compression is the default
+     because it improves compression ratio for archives with many files
+     smaller than the block size. This option allows tarlz revert to
+     default behavior if, for example, it is invoked through an alias
+     like 'tar='tarlz --solid''. *Note --data-size::, to set the target
      block size.
 
 '--dsolid'
-     When creating or appending to a compressed archive, use solid
-     compression for each directory especified in the command line. The
-     end-of-file blocks are compressed into a separate lzip member. This
-     creates a compressed appendable archive with a separate lzip
-     member for each top-level directory.
+     When creating or appending to a compressed archive, compress each
+     file specified in the command line separately in its own lzip
+     member, and use solid compression for each directory specified in
+     the command line. The end-of-file blocks are compressed into a
+     separate lzip member. This creates a compressed appendable archive
+     with a separate lzip member for each file or top-level directory
+     specified.
 
 '--no-solid'
      When creating or appending to a compressed archive, compress each
-     file separately. The end-of-file blocks are compressed into a
-     separate lzip member. This creates a compressed appendable archive
-     with a separate lzip member for each file. This option allows
-     tarlz revert to default behavior if, for example, tarlz is invoked
-     through an alias like 'tar='tarlz --solid''.
+     file separately in its own lzip member. The end-of-file blocks are
+     compressed into a separate lzip member. This creates a compressed
+     appendable archive with a lzip member for each file.
 
 '--solid'
      When creating or appending to a compressed archive, use solid
-     compression. The files being added to the archive, along with the
+     compression.  The files being added to the archive, along with the
      end-of-file blocks, are compressed into a single lzip member. The
      resulting archive is not appendable. No more files can be later
-     appended to the archive.
+     appended to the archive. Solid archives can't be created nor
+     decoded in parallel.
 
 '--anonymous'
      Equivalent to '--owner=root --group=root'.
@@ -341,9 +361,9 @@ blocks are either compressed in a separate lzip member or compressed
 along with the tar members contained in the last lzip member.
 
    The diagram below shows the correspondence between each tar member
-(formed by one or two headers plus optional data) in the tar archive and
-each lzip member in the resulting multimember tar.lz archive: *Note
-File format: (lzip)File format.
+(formed by one or two headers plus optional data) in the tar archive
+and each lzip member in the resulting multimember tar.lz archive, when
+per file compression is used: *Note File format: (lzip)File format.
 
 tar
 +========+======+=================+===============+========+======+========+
@@ -612,12 +632,12 @@ wasteful for a backup format.
 
 There is no portable way to tell what charset a text string is coded
 into.  Therefore, tarlz stores all fields representing text strings
-as-is, without conversion to UTF-8 nor any other transformation. This
-prevents accidental double UTF-8 conversions. If the need arises this
-behavior will be adjusted with a command line option in the future.
+unmodified, without conversion to UTF-8 nor any other transformation.
+This prevents accidental double UTF-8 conversions. If the need arises
+this behavior will be adjusted with a command line option in the future.
 
 
-File: tarlz.info,  Node: Multi-threaded tar,  Next: Examples,  Prev: Amendments to pax format,  Up: Top
+File: tarlz.info,  Node: Multi-threaded tar,  Next: Minimum archive sizes,  Prev: Amendments to pax format,  Up: Top
 
 5 Limitations of parallel tar decoding
 **************************************
@@ -659,15 +679,53 @@ sequential '--list' because, in addition to using several processors,
 it only needs to decompress part of each lzip member. See the following
 example listing the Silesia corpus on a dual core machine:
 
-     tarlz -9 -cf silesia.tar.lz silesia
+     tarlz -9 --no-solid -cf silesia.tar.lz silesia
      time lzip -cd silesia.tar.lz | tar -tf -            (5.032s)
      time plzip -cd silesia.tar.lz | tar -tf -           (3.256s)
      time tarlz -tf silesia.tar.lz                       (0.020s)
 
 
-File: tarlz.info,  Node: Examples,  Next: Problems,  Prev: Multi-threaded tar,  Up: Top
+File: tarlz.info,  Node: Minimum archive sizes,  Next: Examples,  Prev: Multi-threaded tar,  Up: Top
+
+6 Minimum archive sizes required for multi-threaded block compression
+*********************************************************************
+
+When creating or appending to a compressed archive using multi-threaded
+block compression, tarlz puts tar members together in blocks and
+compresses as many blocks simultaneously as worker threads are chosen,
+creating a multimember compressed archive.
+
+   For this to work as expected (and roughly multiply the compression
+speed by the number of available processors), the uncompressed archive
+must be at least as large as the number of worker threads times the
+block size (*note --data-size::). Else some processors will not get any
+data to compress, and compression will be proportionally slower. The
+maximum speed increase achievable on a given file is limited by the
+ratio (uncompressed_size / data_size). For example, a tarball the size
+of gcc or linux will scale up to 10 or 12 processors at level -9.
+
+   The following table shows the minimum uncompressed archive size
+needed for full use of N processors at a given compression level, using
+the default data size for each level:
+
+Processors   2         4         8         16        64        256
+------------------------------------------------------------------
+Level                                                          
+-0           2 MiB     4 MiB     8 MiB     16 MiB    64 MiB    256 MiB
+-1           4 MiB     8 MiB     16 MiB    32 MiB    128 MiB   512 MiB
+-2           6 MiB     12 MiB    24 MiB    48 MiB    192 MiB   768 MiB
+-3           8 MiB     16 MiB    32 MiB    64 MiB    256 MiB   1 GiB
+-4           12 MiB    24 MiB    48 MiB    96 MiB    384 MiB   1.5 GiB
+-5           16 MiB    32 MiB    64 MiB    128 MiB   512 MiB   2 GiB
+-6           32 MiB    64 MiB    128 MiB   256 MiB   1 GiB     4 GiB
+-7           64 MiB    128 MiB   256 MiB   512 MiB   2 GiB     8 GiB
+-8           96 MiB    192 MiB   384 MiB   768 MiB   3 GiB     12 GiB
+-9           128 MiB   256 MiB   512 MiB   1 GiB     4 GiB     16 GiB
+
+
+File: tarlz.info,  Node: Examples,  Next: Problems,  Prev: Minimum archive sizes,  Up: Top
 
-6 A small tutorial with examples
+7 A small tutorial with examples
 ********************************
 
 Example 1: Create a multimember compressed archive 'archive.tar.lz'
@@ -725,7 +783,7 @@ Example 8: Copy the contents of directory 'sourcedir' to the directory
 
 File: tarlz.info,  Node: Problems,  Next: Concept index,  Prev: Examples,  Up: Top
 
-7 Reporting bugs
+8 Reporting bugs
 ****************
 
 There are probably bugs in tarlz. There are certainly errors and
@@ -754,6 +812,7 @@ Concept index
 * getting help:                          Problems.              (line 6)
 * introduction:                          Introduction.          (line 6)
 * invoking:                              Invoking tarlz.        (line 6)
+* minimum archive sizes:                 Minimum archive sizes. (line 6)
 * options:                               Invoking tarlz.        (line 6)
 * usage:                                 Invoking tarlz.        (line 6)
 * version:                               Invoking tarlz.        (line 6)
@@ -762,18 +821,19 @@ Concept index
 
 Tag Table:
 Node: Top223
-Node: Introduction1013
-Node: Invoking tarlz3125
-Ref: --data-size4717
-Node: File format11536
-Ref: key_crc3216321
-Node: Amendments to pax format21738
-Ref: crc3222262
-Ref: flawed-compat23287
-Node: Multi-threaded tar25649
-Node: Examples28164
-Node: Problems29830
-Node: Concept index30356
+Node: Introduction1089
+Node: Invoking tarlz3218
+Ref: --data-size5097
+Node: File format12673
+Ref: key_crc3217493
+Node: Amendments to pax format22910
+Ref: crc3223434
+Ref: flawed-compat24459
+Node: Multi-threaded tar26826
+Node: Minimum archive sizes29365
+Node: Examples31495
+Node: Problems33164
+Node: Concept index33690
 
 End Tag Table
 
diff --git a/doc/tarlz.texi b/doc/tarlz.texi
index 2ab37fb..6026fe3 100644
--- a/doc/tarlz.texi
+++ b/doc/tarlz.texi
@@ -6,8 +6,8 @@
 @finalout
 @c %**end of header
 
-@set UPDATED 31 January 2019
-@set VERSION 0.10
+@set UPDATED 13 February 2019
+@set VERSION 0.11
 
 @dircategory Data Compression
 @direntry
@@ -40,6 +40,7 @@ This manual is for Tarlz (version @value{VERSION}, @value{UPDATED}).
 * File format::               Detailed format of the compressed archive
 * Amendments to pax format::  The reasons for the differences with pax
 * Multi-threaded tar::        Limitations of parallel tar decoding
+* Minimum archive sizes::     Sizes required for full multi-threaded speed
 * Examples::                  A small tutorial with examples
 * Problems::                  Reporting bugs
 * Concept index::             Index of concepts
@@ -56,25 +57,24 @@ to copy, distribute and modify it.
 @chapter Introduction
 @cindex introduction
 
-@uref{http://www.nongnu.org/lzip/tarlz.html,,Tarlz} is a combined
-implementation of the tar archiver and the
-@uref{http://www.nongnu.org/lzip/lzip.html,,lzip} compressor. By default
-tarlz creates, lists and extracts archives in a simplified posix pax format
-compressed with lzip on a per file basis. Each tar member is compressed in
-its own lzip member, as well as the end-of-file blocks. This method adds an
-indexed lzip layer on top of the tar archive, making it possible to decode
-the archive safely in parallel. The resulting multimember tar.lz archive is
-fully backward compatible with standard tar tools like GNU tar, which treat
-it like any other tar.lz archive. Tarlz can append files to the end of such
-compressed archives.
-
-Tarlz can create tar archives with four levels of compression granularity;
-per file, per directory, appendable solid, and solid.
+@uref{http://www.nongnu.org/lzip/tarlz.html,,Tarlz} is a massively parallel
+(multi-threaded) combined implementation of the tar archiver and the
+@uref{http://www.nongnu.org/lzip/lzip.html,,lzip} compressor. Tarlz creates,
+lists and extracts archives in a simplified posix pax format compressed with
+lzip, keeping the alignment between tar members and lzip members. This
+method adds an indexed lzip layer on top of the tar archive, making it
+possible to decode the archive safely in parallel. The resulting multimember
+tar.lz archive is fully backward compatible with standard tar tools like GNU
+tar, which treat it like any other tar.lz archive. Tarlz can append files to
+the end of such compressed archives.
+
+Tarlz can create tar archives with five levels of compression granularity;
+per file, per block, per directory, appendable solid, and solid.
 
 @noindent
-Of course, compressing each file (or each directory) individually is
-less efficient than compressing the whole tar archive, but it has the
-following advantages:
+Of course, compressing each file (or each directory) individually can't
+achieve a compression ratio as high as compressing solidly the whole tar
+archive, but it has the following advantages:
 
 @itemize @bullet
 @item
@@ -120,18 +120,23 @@ tarlz [@var{options}] [@var{files}]
 @end example
 
 @noindent
-On archive creation or appending, tarlz removes leading and trailing
-slashes from filenames, as well as filename prefixes containing a
-@samp{..} component. On extraction, archive members containing a
-@samp{..} component are skipped. Tarlz detects when the archive being
-created or enlarged is among the files to be dumped, appended or
-concatenated, and skips it.
+On archive creation or appending tarlz archives the files specified, but
+removes from member names any leading and trailing slashes and any filename
+prefixes containing a @samp{..} component. On extraction, leading and
+trailing slashes are also removed from member names, and archive members
+containing a @samp{..} component in the filename are skipped. Tarlz detects
+when the archive being created or enlarged is among the files to be dumped,
+appended or concatenated, and skips it.
 
 On extraction and listing, tarlz removes leading @samp{./} strings from
 member names in the archive or given in the command line, so that
 @w{@code{tarlz -xf foo ./bar baz}} extracts members @samp{bar} and
 @samp{./baz} from archive @samp{foo}.
 
+If several compression levels or @samp{--*solid} options are given, the last
+setting is used. For example @w{@samp{-9 --solid --uncompressed -1}} is
+equivalent to @samp{-1 --solid}
+
 tarlz supports the following options:
 
 @table @code
@@ -160,6 +165,7 @@ specified. Tarlz can't concatenate uncompressed tar archives.
 Set target size of input data blocks for the @samp{--bsolid} option. Valid
 values range from @w{8 KiB} to @w{1 GiB}. Default value is two times the
 dictionary size, except for option @samp{-0} where it defaults to @w{1 MiB}.
+@xref{Minimum archive sizes}.
 
 @item -c
 @itemx --create
@@ -176,6 +182,10 @@ extraction. Listing ignores any @samp{-C} options specified. @var{dir}
 is relative to the then current working directory, perhaps changed by a
 previous @samp{-C} option.
 
+Note that a process can only have one current working directory (CWD).
+Therefore multi-threading can't be used to create an archive if a @samp{-C}
+option appears after a relative filename in the command line.
+
 @item -f @var{archive}
 @itemx --file=@var{archive}
 Use archive file @var{archive}. @samp{-} used as an @var{archive}
@@ -183,17 +193,19 @@ argument reads from standard input or writes to standard output.
 
 @item -n @var{n}
 @itemx --threads=@var{n}
-Set the number of decompression threads, overriding the system's default.
+Set the number of (de)compression threads, overriding the system's default.
 Valid values range from 0 to "as many as your system can support". A value
 of 0 disables threads entirely. If this option is not used, tarlz tries to
 detect the number of processors in the system and use it as default value.
-@w{@samp{tarlz --help}} shows the system's default value. This option
-currently only has effect when listing the contents of a multimember
-compressed archive. @xref{Multi-threaded tar}.
+@w{@samp{tarlz --help}} shows the system's default value. See the note about
+multi-threaded archive creation in the @samp{-C} option above.
+Multi-threaded extraction of files from an archive is not yet implemented.
+@xref{Multi-threaded tar}.
 
-Note that the number of usable threads is limited during decompression to
-the number of lzip members in the tar.lz archive, which you can find by
-running @w{@code{lzip -lv archive.tar.lz}}.
+Note that the number of usable threads is limited during compression to
+@w{ceil( uncompressed_size / data_size )} (@pxref{Minimum archive sizes}),
+and during decompression to the number of lzip members in the tar.lz
+archive, which you can find by running @w{@code{lzip -lv archive.tar.lz}}.
 
 @item -q
 @itemx --quiet
@@ -213,7 +225,7 @@ to an uncompressed tar archive.
 @item -t
 @itemx --list
 List the contents of an archive. If @var{files} are given, list only the
-given @var{files}.
+@var{files} given.
 
 @item -v
 @itemx --verbose
@@ -222,7 +234,7 @@ Verbosely list files processed.
 @item -x
 @itemx --extract
 Extract files from an archive. If @var{files} are given, extract only
-the given @var{files}. Else extract all the files in the archive.
+the @var{files} given. Else extract all the files in the archive.
 
 @item -0 .. -9
 Set the compression level. The default compression level is @samp{-6}.
@@ -245,40 +257,42 @@ it creates, reducing the amount of memory required for decompression.
 
 @item --asolid
 When creating or appending to a compressed archive, use appendable solid
-compression. All the files being added to the archive are compressed
-into a single lzip member, but the end-of-file blocks are compressed
-into a separate lzip member. This creates a solidly compressed
-appendable archive.
+compression. All the files being added to the archive are compressed into a
+single lzip member, but the end-of-file blocks are compressed into a
+separate lzip member. This creates a solidly compressed appendable archive.
+Solid archives can't be created nor decoded in parallel.
 
 @item --bsolid
-When creating or appending to a compressed archive, compress tar members
-together in a lzip member until they approximate a target uncompressed size.
-The size can't be exact because each solidly compressed data block must
-contain an integer number of tar members. This option improves compression
-efficiency for archives with lots of small files. @xref{--data-size}, to set
-the target block size.
+When creating or appending to a compressed archive, use block compression.
+Tar members are compressed together in a lzip member until they approximate
+a target uncompressed size. The size can't be exact because each solidly
+compressed data block must contain an integer number of tar members. Block
+compression is the default because it improves compression ratio for
+archives with many files smaller than the block size. This option allows
+tarlz revert to default behavior if, for example, it is invoked through an
+alias like @code{tar='tarlz --solid'}. @xref{--data-size}, to set the target
+block size.
 
 @item --dsolid
-When creating or appending to a compressed archive, use solid
-compression for each directory especified in the command line. The
-end-of-file blocks are compressed into a separate lzip member. This
-creates a compressed appendable archive with a separate lzip member for
-each top-level directory.
+When creating or appending to a compressed archive, compress each file
+specified in the command line separately in its own lzip member, and use
+solid compression for each directory specified in the command line. The
+end-of-file blocks are compressed into a separate lzip member. This creates
+a compressed appendable archive with a separate lzip member for each file or
+top-level directory specified.
 
 @item --no-solid
 When creating or appending to a compressed archive, compress each file
-separately. The end-of-file blocks are compressed into a separate lzip
-member. This creates a compressed appendable archive with a separate
-lzip member for each file. This option allows tarlz revert to default
-behavior if, for example, tarlz is invoked through an alias like
-@code{tar='tarlz --solid'}.
+separately in its own lzip member. The end-of-file blocks are compressed
+into a separate lzip member. This creates a compressed appendable archive
+with a lzip member for each file.
 
 @item --solid
-When creating or appending to a compressed archive, use solid
-compression. The files being added to the archive, along with the
-end-of-file blocks, are compressed into a single lzip member. The
-resulting archive is not appendable. No more files can be later appended
-to the archive.
+When creating or appending to a compressed archive, use solid compression.
+The files being added to the archive, along with the end-of-file blocks, are
+compressed into a single lzip member. The resulting archive is not
+appendable. No more files can be later appended to the archive. Solid
+archives can't be created nor decoded in parallel.
 
 @item --anonymous
 Equivalent to @samp{--owner=root --group=root}.
@@ -388,11 +402,11 @@ binary zeros, interpreted as an end-of-archive indicator. These EOF
 blocks are either compressed in a separate lzip member or compressed
 along with the tar members contained in the last lzip member.
 
-The diagram below shows the correspondence between each tar member
-(formed by one or two headers plus optional data) in the tar archive and
-each
+The diagram below shows the correspondence between each tar member (formed
+by one or two headers plus optional data) in the tar archive and each
 @uref{http://www.nongnu.org/lzip/manual/lzip_manual.html#File-format,,lzip member}
-in the resulting multimember tar.lz archive:
+in the resulting multimember tar.lz archive, when per file compression is
+used:
 @ifnothtml
 @xref{File format,,,lzip}.
 @end ifnothtml
@@ -672,10 +686,10 @@ format.
 @section Avoid misconversions to/from UTF-8
 
 There is no portable way to tell what charset a text string is coded into.
-Therefore, tarlz stores all fields representing text strings as-is, without
-conversion to UTF-8 nor any other transformation. This prevents accidental
-double UTF-8 conversions. If the need arises this behavior will be adjusted
-with a command line option in the future.
+Therefore, tarlz stores all fields representing text strings unmodified,
+without conversion to UTF-8 nor any other transformation. This prevents
+accidental double UTF-8 conversions. If the need arises this behavior will
+be adjusted with a command line option in the future.
 
 
 @node Multi-threaded tar
@@ -717,13 +731,51 @@ it only needs to decompress part of each lzip member. See the following
 example listing the Silesia corpus on a dual core machine:
 
 @example
-tarlz -9 -cf silesia.tar.lz silesia
+tarlz -9 --no-solid -cf silesia.tar.lz silesia
 time lzip -cd silesia.tar.lz | tar -tf -            (5.032s)
 time plzip -cd silesia.tar.lz | tar -tf -           (3.256s)
 time tarlz -tf silesia.tar.lz                       (0.020s)
 @end example
 
 
+@node Minimum archive sizes
+@chapter Minimum archive sizes required for multi-threaded block compression
+@cindex minimum archive sizes
+
+When creating or appending to a compressed archive using multi-threaded
+block compression, tarlz puts tar members together in blocks and compresses
+as many blocks simultaneously as worker threads are chosen, creating a
+multimember compressed archive.
+
+For this to work as expected (and roughly multiply the compression speed by
+the number of available processors), the uncompressed archive must be at
+least as large as the number of worker threads times the block size
+(@pxref{--data-size}). Else some processors will not get any data to
+compress, and compression will be proportionally slower. The maximum speed
+increase achievable on a given file is limited by the ratio
+@w{(uncompressed_size / data_size)}. For example, a tarball the size of gcc
+or linux will scale up to 10 or 12 processors at level -9.
+
+The following table shows the minimum uncompressed archive size needed for
+full use of N processors at a given compression level, using the default
+data size for each level:
+
+@multitable {Processors} {512 MiB} {512 MiB} {512 MiB} {512 MiB} {512 MiB} {512 MiB}
+@headitem Processors @tab 2 @tab 4 @tab 8 @tab 16 @tab 64 @tab 256
+@item Level
+@item -0 @tab   2 MiB @tab   4 MiB @tab   8 MiB @tab  16 MiB @tab  64 MiB @tab 256 MiB
+@item -1 @tab   4 MiB @tab   8 MiB @tab  16 MiB @tab  32 MiB @tab 128 MiB @tab 512 MiB
+@item -2 @tab   6 MiB @tab  12 MiB @tab  24 MiB @tab  48 MiB @tab 192 MiB @tab 768 MiB
+@item -3 @tab   8 MiB @tab  16 MiB @tab  32 MiB @tab  64 MiB @tab 256 MiB @tab   1 GiB
+@item -4 @tab  12 MiB @tab  24 MiB @tab  48 MiB @tab  96 MiB @tab 384 MiB @tab 1.5 GiB
+@item -5 @tab  16 MiB @tab  32 MiB @tab  64 MiB @tab 128 MiB @tab 512 MiB @tab   2 GiB
+@item -6 @tab  32 MiB @tab  64 MiB @tab 128 MiB @tab 256 MiB @tab   1 GiB @tab   4 GiB
+@item -7 @tab  64 MiB @tab 128 MiB @tab 256 MiB @tab 512 MiB @tab   2 GiB @tab   8 GiB
+@item -8 @tab  96 MiB @tab 192 MiB @tab 384 MiB @tab 768 MiB @tab   3 GiB @tab  12 GiB
+@item -9 @tab 128 MiB @tab 256 MiB @tab 512 MiB @tab   1 GiB @tab   4 GiB @tab  16 GiB
+@end multitable
+
+
 @node Examples
 @chapter A small tutorial with examples
 @cindex examples
diff --git a/extended.cc b/extended.cc
index 4b9e067..5440de7 100644
--- a/extended.cc
+++ b/extended.cc
@@ -19,10 +19,12 @@
 
 #include <cctype>
 #include <climits>
+#include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include <string>
 #include <vector>
+#include <pthread.h>
 #include <stdint.h>
 
 #include "tarlz.h"
@@ -38,13 +40,13 @@ unsigned decimal_digits( unsigned long long value )
   }
 
 
-int record_size( const unsigned keyword_size, const unsigned long value_size )
+unsigned long long record_size( const unsigned keyword_size,
+                                const unsigned long value_size )
   {
   // size = ' ' + keyword + '=' + value + '\n'
   unsigned long long size = 1 + keyword_size + 1 + value_size + 1;
   const unsigned d1 = decimal_digits( size );
   size += decimal_digits( d1 + size );
-  if( size >= INT_MAX ) size = 0;		// overflows snprintf size
   return size;
   }
 
@@ -89,45 +91,120 @@ uint32_t parse_record_crc( const char * const ptr )
 
 const std::string Extended::crc_record( "22 GNU.crc32=00000000\n" );
 
-int Extended::recsize_linkpath() const
+void Extended::calculate_sizes() const
   {
-  if( recsize_linkpath_ < 0 ) recsize_linkpath_ =
-    linkpath_.size() ? record_size( 8, linkpath_.size() ) : 0;
-  return recsize_linkpath_;
+  linkpath_recsize_ = linkpath_.size() ? record_size( 8, linkpath_.size() ) : 0;
+  path_recsize_ = path_.size() ? record_size( 4, path_.size() ) : 0;
+  file_size_recsize_ =
+    ( file_size_ > 0 ) ? record_size( 4, decimal_digits( file_size_ ) ) : 0;
+  edsize_ = linkpath_recsize_ + path_recsize_ + file_size_recsize_ +
+            crc_record.size();
+  padded_edsize_ = round_up( edsize_ );
+  full_size_ = header_size + padded_edsize_;
   }
 
-int Extended::recsize_path() const
+
+unsigned char xdigit( const unsigned value )
   {
-  if( recsize_path_ < 0 )
-    recsize_path_ = path_.size() ? record_size( 4, path_.size() ) : 0;
-  return recsize_path_;
+  if( value <= 9 ) return '0' + value;
+  if( value <= 15 ) return 'A' + value - 10;
+  return 0;
   }
 
-int Extended::recsize_file_size() const
+void print_hex( char * const buf, int size, unsigned long long num )
   {
-  if( recsize_file_size_ < 0 ) recsize_file_size_ =
-    ( file_size_ > 0 ) ? record_size( 4, decimal_digits( file_size_ ) ) : 0;
-  return recsize_file_size_;
+  while( --size >= 0 ) { buf[size] = xdigit( num & 0x0F ); num >>= 4; }
+  }
+
+void print_decimal( char * const buf, int size, unsigned long long num )
+  { while( --size >= 0 ) { buf[size] = '0' + ( num % 10 ); num /= 10; } }
+
+
+bool print_record( char * const buf, const unsigned long long size,
+                   const char * keyword, const std::string & value )
+  {
+  // "size keyword=value\n"
+  unsigned long long pos = decimal_digits( size );
+  print_decimal( buf, pos, size ); buf[pos++] = ' ';
+  while( *keyword ) { buf[pos++] = *keyword; ++keyword; } buf[pos++] = '=';
+  std::memcpy( buf + pos, value.c_str(), value.size() );
+  pos += value.size(); buf[pos++] = '\n';
+  return pos == size;
+  }
+
+bool print_record( char * const buf, const int size,
+                   const char * keyword, const unsigned long long value )
+  {
+  // "size keyword=value\n"
+  int pos = decimal_digits( size );
+  print_decimal( buf, pos, size ); buf[pos++] = ' ';
+  while( *keyword ) { buf[pos++] = *keyword; ++keyword; } buf[pos++] = '=';
+  const int vd =  decimal_digits( value );
+  print_decimal( buf + pos, vd, value ); pos += vd; buf[pos++] = '\n';
+  return pos == size;
+  }
+
+
+// Returns the extended block size, or -1 if error.
+long long Extended::format_block( Resizable_buffer & rbuf ) const
+  {
+  if( empty() ) return 0;			// no extended data
+  const unsigned long long bufsize = full_size();	// recalculate sizes
+  if( edsize_ <= 0 ) return 0;			// no extended data
+  if( edsize_ >= 1LL << 33 ) return -1;		// too much extended data
+  if( !rbuf.resize( bufsize ) ) return -1;	// extended block buffer
+  uint8_t * const header = (uint8_t *)rbuf();	// extended header
+  char * const buf = rbuf() + header_size;	// extended records
+  init_tar_header( header );
+  header[typeflag_o] = tf_extended;		// fill only required fields
+  print_octal( header + size_o, size_l - 1, edsize_ );
+  print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
+
+  if( path_recsize_ && !print_record( buf, path_recsize_, "path", path_ ) )
+    return -1;
+  long long pos = path_recsize_;
+  if( linkpath_recsize_ &&
+      !print_record( buf + pos, linkpath_recsize_, "linkpath", linkpath_ ) )
+    return -1;
+  pos += linkpath_recsize_;
+  if( file_size_recsize_ &&
+      !print_record( buf + pos, file_size_recsize_, "size", file_size_ ) )
+    return -1;
+  pos += file_size_recsize_;
+  const unsigned crc_size = Extended::crc_record.size();
+  std::memcpy( buf + pos, Extended::crc_record.c_str(), crc_size );
+  pos += crc_size;
+  if( pos != edsize_ ) return -1;
+  print_hex( buf + edsize_ - 9, 8,
+             crc32c.windowed_crc( (const uint8_t *)buf, edsize_ - 9, edsize_ ) );
+  if( padded_edsize_ > edsize_ )			// wipe padding
+    std::memset( buf + edsize_, 0, padded_edsize_ - edsize_ );
+  crc_present_ = true;
+  return bufsize;
   }
 
 
 bool Extended::parse( const char * const buf, const unsigned long long edsize,
                       const bool permissive )
   {
-  reset();
+  reset(); full_size_ = -1;			// invalidate cached sizes
   for( unsigned long long pos = 0; pos < edsize; )	// parse records
     {
     const char * tail;
     const unsigned long long rsize =
       parse_decimal( buf + pos, &tail, edsize - pos );
-    if( rsize == 0 || rsize > edsize - pos || tail[0] != ' ' ||
-        buf[pos+rsize-1] != '\n' ) return false;
+    if( rsize == 0 || rsize > edsize - pos ||
+        tail[0] != ' ' || buf[pos+rsize-1] != '\n' ) return false;
     ++tail;	// point to keyword
     // rest = length of (keyword + '=' + value) without the final newline
     const unsigned long long rest = ( buf + ( pos + rsize - 1 ) ) - tail;
     if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 )
-      { if( path_.size() && !permissive ) return false;
-        path_.assign( tail + 5, rest - 5 ); }
+      {
+      if( path_.size() && !permissive ) return false;
+      path_.assign( tail + 5, rest - 5 );
+      // this also truncates path_ at the first embedded null character
+      path_.assign( remove_leading_dotslash( path_.c_str() ) );
+      }
     else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 )
       { if( linkpath_.size() && !permissive ) return false;
         linkpath_.assign( tail + 9, rest - 9 ); }
@@ -143,14 +220,18 @@ bool Extended::parse( const char * const buf, const unsigned long long edsize,
       {
       if( crc_present_ && !permissive ) return false;
       if( rsize != crc_record.size() ) return false;
+      crc_present_ = true;
       const uint32_t stored_crc = parse_record_crc( tail + 10 );
       const uint32_t computed_crc =
         crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize );
-      crc_present_ = true;
-      if( stored_crc != computed_crc ) return false;
+      if( stored_crc != computed_crc )
+        {
+        if( verbosity >= 2 )
+          std::fprintf( stderr, "CRC32C = %08X\n", (unsigned)computed_crc );
+        return false;
+        }
       }
     pos += rsize;
     }
-  full_size_ = header_size + round_up( edsize );
   return true;
   }
diff --git a/extract.cc b/extract.cc
index f85cf67..63f58a5 100644
--- a/extract.cc
+++ b/extract.cc
@@ -44,7 +44,6 @@
 namespace {
 
 Resizable_buffer grbuf( initial_line_length );
-int gretval = 0;
 bool has_lz_ext;				// global var for archive_read
 
 void skip_warn( const bool reset = false )	// avoid duplicate warnings
@@ -118,16 +117,14 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
     if( !islz && !istar && !iseof )		// corrupt or invalid format
       {
       show_error( "This does not look like a POSIX tar archive." );
-      if( has_lz_ext ) islz = true;
-      if( verbosity >= 2 && !islz && rd == size )
-        std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( buf ) );
+      if( has_lz_ext && rd >= min_member_size ) islz = true;
       if( !islz ) return 1;
       }
     if( !islz )						// uncompressed
       { if( rd == size ) return 0; fatal = true; return 2; }
     decoder = LZ_decompress_open();			// compressed
     if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
-      { show_error( "Not enough memory." );
+      { show_error( mem_msg );
         LZ_decompress_close( decoder ); fatal = true; return 2; }
     if( LZ_decompress_write( decoder, buf, rd ) != rd )
       internal_error( "library error (LZ_decompress_write)." );
@@ -154,7 +151,7 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
       {
       if( LZ_decompress_sync_to_member( decoder ) < 0 )
         internal_error( "library error (LZ_decompress_sync_to_member)." );
-      skip_warn(); gretval = 2; return 1;
+      skip_warn(); set_error_status( 2 ); return 1;
       }
     if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
       { LZ_decompress_close( decoder );
@@ -271,8 +268,8 @@ void format_member_name( const Extended & extended, const Tar_header header,
                 extended.file_size(), 1900 + tm->tm_year, 1 + tm->tm_mon,
                 tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path().c_str(),
                 link_string, !islink ? "" : extended.linkpath().c_str() );
-      if( (int)rbuf.size() > len + offset ) break;
-      else rbuf.resize( len + offset + 1 );
+      if( (int)rbuf.size() > len + offset || !rbuf.resize( len + offset + 1 ) )
+        break;
       }
     }
   else
@@ -458,25 +455,6 @@ int extract_member( const int infd, const Extended & extended,
 
 } // end namespace
 
-// Removes any amount of leading "./" and '/' strings.
-const char * remove_leading_slash( const char * const filename )
-  {
-  static bool first_post = true;
-  const char * p = filename;
-
-  while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p;
-  if( p != filename && first_post )
-    {
-    first_post = false;
-    std::string msg( "Removing leading '" );
-    msg.append( filename, p - filename );
-    msg += "' from member names.";
-    show_error( msg.c_str() );
-    }
-  if( *p == 0 ) p = ".";
-  return p;
-  }
-
 
 // return true if dir is a parent directory of name
 bool compare_prefix_dir( const char * const dir, const char * const name )
@@ -587,19 +565,21 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
         { show_file_error( dir, "Error changing working directory", errno );
           return 1; }
       }
-    if( !code ) name_pending[i] = true;
+    if( !code && parser.argument( i ).size() ) name_pending[i] = true;
     }
 
-  if( listing && num_workers > 0 )		// multi-threaded --list
+  // multi-threaded --list is faster even with 1 thread and 1 file in archive
+  if( listing && num_workers > 0 )
     {
-    const Lzip_index lzip_index( infd, true, false );
+    const Lzip_index lzip_index( infd, true, false );	// only regular files
     const long members = lzip_index.members();
-    if( lzip_index.retval() == 0 && ( members >= 3 ||
-        ( members >= 2 && lzip_index.dblock( members - 1 ).size() > 1024 ) ) )
-      { //show_file_error( archive_name.c_str(), "Is compressed seekable" );
-        return list_lz( parser, name_pending, lzip_index, filenames,
-                     debug_level, infd, std::min( (long)num_workers, members ),
-                     missing_crc, permissive ); }
+    if( lzip_index.retval() == 0 && members >= 2 )	// one file + eof
+      {
+      // show_file_error( archive_name.c_str(), "Is compressed seekable" );
+      return list_lz( parser, name_pending, lzip_index, filenames, debug_level,
+                      infd, std::min( (long)num_workers, members ),
+                      missing_crc, permissive );
+      }
     lseek( infd, 0, SEEK_SET );
     }
 
@@ -619,7 +599,9 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
     if( ret != 0 || !verify_ustar_chksum( header ) )
       {
       if( ret == 0 && block_is_zero( header, header_size ) ) break;	// EOF
-      skip_warn(); gretval = 2; continue;
+      if( verbosity >= 2 )
+        std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( header ) );
+      skip_warn(); set_error_status( 2 ); continue;
       }
     skip_warn( true );			// reset warning
 
@@ -632,7 +614,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
       Extended dummy;		// global headers are parsed and ignored
       if( !parse_records( infd, dummy, header, true ) )
         { show_error( "Error in global extended records. Skipping to next header." );
-          gretval = 2; }
+          set_error_status( 2 ); }
       continue;
       }
     if( typeflag == tf_extended )
@@ -642,7 +624,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
                       /*" Use --permissive.", 0, true*/ ); return 2; }
       if( !parse_records( infd, extended, header, permissive ) )
         { show_error( "Error in extended records. Skipping to next header." );
-          extended.reset(); gretval = 2; }
+          extended.reset(); set_error_status( 2 ); }
       else if( !extended.crc_present() && missing_crc )
         { show_error( "Missing CRC in extended records.", 0, true ); return 2; }
       prev_extended = true;
@@ -674,17 +656,17 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
         { stored_name[len] = header[name_o+i]; ++len; }
       while( len > 0 && stored_name[len-1] == '/' ) --len;	// trailing '/'
       stored_name[len] = 0;
-      extended.path( remove_leading_slash( stored_name ) );
+      extended.path( remove_leading_dotslash( stored_name ) );
       }
     const char * const filename = extended.path().c_str();
 
     bool skip = filenames > 0;
     if( skip )
       for( int i = 0; i < parser.arguments(); ++i )
-        if( parser.code( i ) == 0 )
+        if( !parser.code( i ) && parser.argument( i ).size() )
           {
           const char * const name =
-            remove_leading_slash( parser.argument( i ).c_str() );
+            remove_leading_dotslash( parser.argument( i ).c_str() );
           if( compare_prefix_dir( name, filename ) ||
               compare_tslash( name, filename ) )
             { skip = false; name_pending[i] = false; break; }
@@ -705,13 +687,10 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
     }
 
   for( int i = 0; i < parser.arguments(); ++i )
-    if( parser.code( i ) == 0 && name_pending[i] )
+    if( !parser.code( i ) && parser.argument( i ).size() && name_pending[i] )
       {
       show_file_error( parser.argument( i ).c_str(), "Not found in archive." );
-      if( gretval < 1 ) gretval = 1;
+      set_error_status( 1 );
       }
-  if( !retval && gretval )
-    { show_error( "Exiting with failure status due to previous errors." );
-      retval = gretval; }
-  return retval;
+  return final_exit_status( retval );
   }
diff --git a/list_lz.cc b/list_lz.cc
index 79d500c..23b6e7c 100644
--- a/list_lz.cc
+++ b/list_lz.cc
@@ -75,22 +75,6 @@ int pwriteblock( const int fd, const uint8_t * const buf, const int size,
   }
 
 
-namespace {
-
-// This can be called from any thread, main thread or sub-threads alike,
-// since they all call common helper functions that call cleanup_and_fail()
-// in case of an error.
-//
-void cleanup_and_fail( const int retval = 2 )
-  {
-  // only one thread can delete and exit
-  static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
-
-  pthread_mutex_lock( &mutex );		// ignore errors to avoid loop
-  std::exit( retval );
-  }
-
-
 void xinit_mutex( pthread_mutex_t * const mutex )
   {
   const int errcode = pthread_mutex_init( mutex, 0 );
@@ -161,6 +145,8 @@ void xbroadcast( pthread_cond_t * const cond )
   }
 
 
+namespace {
+
 struct Packet			// member name and metadata or error message
   {
   enum Status { ok, member_done, error };
@@ -262,8 +248,8 @@ public:
     return true;
     }
 
-  // deliver a packet to muxer
-  // if packet.status == Packet::member_done, move to next queue
+  /* Deliver a packet to muxer.
+     If packet.status == Packet::member_done, move to next queue. */
   Packet * deliver_packet()
     {
     Packet * opacket = 0;
@@ -425,9 +411,9 @@ struct Worker_arg
   };
 
 
-       // read lzip members from archive, list their tar members, and
-       // give the produced packets to courier.
-extern "C" void * dworker_l( void * arg )
+/* Read lzip members from archive, list their tar members, and give the
+   packets produced to courier. */
+extern "C" void * tworker( void * arg )
   {
   const Worker_arg & tmp = *(const Worker_arg *)arg;
   const Lzip_index & lzip_index = *tmp.lzip_index;
@@ -441,12 +427,12 @@ extern "C" void * dworker_l( void * arg )
   const int missing_crc = tmp.missing_crc;
   const bool permissive = tmp.permissive;
 
+  Resizable_buffer rbuf( initial_line_length );
   LZ_Decoder * const decoder = LZ_decompress_open();
-  if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
-    { show_error( "Not enough memory." ); cleanup_and_fail(); }
+  if( !rbuf.size() || !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
+    { show_error( mem_msg ); cleanup_and_fail(); }
 
   const long long cdata_size = lzip_index.cdata_size();
-  Resizable_buffer rbuf( initial_line_length );
   bool master = false;
   for( long i = worker_id; !master && i < lzip_index.members(); i += num_workers )
     {
@@ -498,7 +484,7 @@ extern "C" void * dworker_l( void * arg )
         {
         if( prev_extended )
           { show_error( "Format violation: global header after extended header." );
-            cleanup_and_fail(); }
+            cleanup_and_fail( 2 ); }
         Extended dummy;		// global headers are parsed and ignored
         const int ret = parse_records_lz( decoder, infd, file_pos, member_end,
                           cdata_size, data_pos, dummy, header, &msg, true );
@@ -572,17 +558,17 @@ extern "C" void * dworker_l( void * arg )
           { stored_name[len] = header[name_o+i]; ++len; }
         while( len > 0 && stored_name[len-1] == '/' ) --len;	// trailing '/'
         stored_name[len] = 0;
-        extended.path( remove_leading_slash( stored_name ) );
+        extended.path( remove_leading_dotslash( stored_name ) );
         }
       const char * const filename = extended.path().c_str();
 
       bool skip = filenames > 0;
       if( skip )
         for( int i = 0; i < parser.arguments(); ++i )
-          if( parser.code( i ) == 0 )
+          if( !parser.code( i ) && parser.argument( i ).size() )
             {
             const char * const name =
-              remove_leading_slash( parser.argument( i ).c_str() );
+              remove_leading_dotslash( parser.argument( i ).c_str() );
             if( compare_prefix_dir( name, filename ) ||
                 compare_tslash( name, filename ) )
               { skip = false; name_pending[i] = false; break; }
@@ -602,7 +588,7 @@ extern "C" void * dworker_l( void * arg )
       else if( retval > 0 )
         { show_error( msg );
           show_error( "Error is not recoverable: exiting now." );
-          cleanup_and_fail(); }
+          cleanup_and_fail( 2 ); }
       }
     }
   if( LZ_decompress_close( decoder ) < 0 )
@@ -617,9 +603,9 @@ done:
   }
 
 
-     // get from courier the processed and sorted packets, and print
-     // the member lines on stdout or the diagnostics on stderr.
-void muxer( Packet_courier & courier )
+/* Get from courier the processed and sorted packets, and print
+   the member lines on stdout or the diagnostics on stderr. */
+bool muxer( Packet_courier & courier )
   {
   while( true )
     {
@@ -627,14 +613,15 @@ void muxer( Packet_courier & courier )
     if( !opacket ) break;	// queue is empty. all workers exited
 
     if( opacket->status == Packet::error )
-      { show_error( opacket->line.c_str() ); cleanup_and_fail(); }
+      { show_error( opacket->line.c_str() ); return false; }
     if( opacket->line.size() )
       { std::fputs( opacket->line.c_str(), stdout );
         std::fflush( stdout ); }
     delete opacket;
     }
   if( !courier.mastership_granted() )	// no worker found EOF blocks
-    { show_error( "Archive ends unexpectedly." ); cleanup_and_fail(); }
+    { show_error( "Archive ends unexpectedly." ); return false; }
+  return true;
   }
 
 } // end namespace
@@ -651,8 +638,7 @@ int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
 
   Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
   pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
-  if( !worker_args || !worker_threads )
-    { show_error( "Not enough memory." ); cleanup_and_fail(); }
+  if( !worker_args || !worker_threads ) { show_error( mem_msg ); return 1; }
   for( int i = 0; i < num_workers; ++i )
     {
     worker_args[i].lzip_index = &lzip_index;
@@ -666,25 +652,25 @@ int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
     worker_args[i].missing_crc = missing_crc;
     worker_args[i].permissive = permissive;
     const int errcode =
-      pthread_create( &worker_threads[i], 0, dworker_l, &worker_args[i] );
+      pthread_create( &worker_threads[i], 0, tworker, &worker_args[i] );
     if( errcode )
-      { show_error( "Can't create worker threads", errcode ); cleanup_and_fail(); }
+      { show_error( "Can't create worker threads", errcode ); return 1; }
     }
 
-  muxer( courier );
+  if( !muxer( courier ) ) return 2;
 
   for( int i = num_workers - 1; i >= 0; --i )
     {
     const int errcode = pthread_join( worker_threads[i], 0 );
     if( errcode )
-      { show_error( "Can't join worker threads", errcode ); cleanup_and_fail(); }
+      { show_error( "Can't join worker threads", errcode ); return 1; }
     }
   delete[] worker_threads;
   delete[] worker_args;
 
   int retval = 0;
   for( int i = 0; i < parser.arguments(); ++i )
-    if( parser.code( i ) == 0 && name_pending[i] )
+    if( !parser.code( i ) && parser.argument( i ).size() && name_pending[i] )
       {
       show_file_error( parser.argument( i ).c_str(), "Not found in archive." );
       retval = 1;
diff --git a/lzip_index.cc b/lzip_index.cc
index 1b7e576..f724bd1 100644
--- a/lzip_index.cc
+++ b/lzip_index.cc
@@ -19,10 +19,12 @@
 
 #include <algorithm>
 #include <cerrno>
+#include <climits>
 #include <cstdio>
 #include <cstring>
 #include <string>
 #include <vector>
+#include <pthread.h>
 #include <stdint.h>
 #include <unistd.h>
 
diff --git a/main.cc b/main.cc
index 25ff394..976bbd0 100644
--- a/main.cc
+++ b/main.cc
@@ -67,15 +67,15 @@ enum Mode { m_none, m_append, m_concatenate, m_create, m_extract, m_list };
 
 void show_help( const long num_online )
   {
-  std::printf( "Tarlz is a combined implementation of the tar archiver and the lzip\n"
-               "compressor. By default tarlz creates, lists and extracts archives in a\n"
-               "simplified posix pax format compressed with lzip on a per file basis. Each\n"
-               "tar member is compressed in its own lzip member, as well as the end-of-file\n"
-               "blocks. This method adds an indexed lzip layer on top of the tar archive,\n"
-               "making it possible to decode the archive safely in parallel. The resulting\n"
-               "multimember tar.lz archive is fully backward compatible with standard tar\n"
-               "tools like GNU tar, which treat it like any other tar.lz archive. Tarlz can\n"
-               "append files to the end of such compressed archives.\n"
+  std::printf( "Tarlz is a massively parallel (multi-threaded) combined implementation of\n"
+               "the tar archiver and the lzip compressor. Tarlz creates, lists and extracts\n"
+               "archives in a simplified posix pax format compressed with lzip, keeping the\n"
+               "alignment between tar members and lzip members. This method adds an indexed\n"
+               "lzip layer on top of the tar archive, making it possible to decode the\n"
+               "archive safely in parallel. The resulting multimember tar.lz archive is\n"
+               "fully backward compatible with standard tar tools like GNU tar, which treat\n"
+               "it like any other tar.lz archive. Tarlz can append files to the end of such\n"
+               "compressed archives.\n"
                "\nThe tarlz file format is a safe posix-style backup format. In case of\n"
                "corruption, tarlz can extract all the undamaged members from the tar.lz\n"
                "archive, skipping over the damaged members, just like the standard\n"
@@ -91,7 +91,7 @@ void show_help( const long num_online )
                "  -c, --create               create a new archive\n"
                "  -C, --directory=<dir>      change to directory <dir>\n"
                "  -f, --file=<archive>       use archive file <archive>\n"
-               "  -n, --threads=<n>          set number of decompression threads [%ld]\n"
+               "  -n, --threads=<n>          set number of (de)compression threads [%ld]\n"
                "  -q, --quiet                suppress all messages\n"
                "  -r, --append               append files to the end of an archive\n"
                "  -t, --list                 list the contents of an archive\n"
@@ -99,9 +99,9 @@ void show_help( const long num_online )
                "  -x, --extract              extract files from an archive\n"
                "  -0 .. -9                   set compression level [default 6]\n"
                "      --asolid               create solidly compressed appendable archive\n"
-               "      --bsolid               create per-data-block compressed archive\n"
-               "      --dsolid               create per-directory compressed archive\n"
-               "      --no-solid             create per-file compressed archive (default)\n"
+               "      --bsolid               create per block compressed archive (default)\n"
+               "      --dsolid               create per directory compressed archive\n"
+               "      --no-solid             create per file compressed archive\n"
                "      --solid                create solidly compressed archive\n"
                "      --anonymous            equivalent to '--owner=root --group=root'\n"
                "      --owner=<owner>        use <owner> name/ID for files added\n"
@@ -239,6 +239,20 @@ int open_outstream( const std::string & name, const bool create )
   }
 
 
+// This can be called from any thread, main thread or sub-threads alike,
+// since they all call common helper functions that call cleanup_and_fail()
+// in case of an error.
+//
+void cleanup_and_fail( const int retval )
+  {
+  // only one thread can delete and exit
+  static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+
+  pthread_mutex_lock( &mutex );		// ignore errors to avoid loop
+  std::exit( retval );
+  }
+
+
 void show_error( const char * const msg, const int errcode, const bool help )
   {
   if( verbosity < 0 ) return;
@@ -342,7 +356,8 @@ int main( const int argc, const char * const argv[] )
   for( int argind = 0; argind < parser.arguments(); ++argind )
     {
     const int code = parser.code( argind );
-    if( !code ) { ++filenames; continue; }		// skip non-options
+    if( !code )						// skip non-options
+      { if( parser.argument( argind ).size() ) ++filenames; continue; }
     const std::string & sarg = parser.argument( argind );
     const char * const arg = sarg.c_str();
     switch( code )
@@ -394,7 +409,7 @@ int main( const int argc, const char * const argv[] )
     case m_none:   show_error( "Missing operation.", 0, true ); return 2;
     case m_append:
     case m_create: return encode( archive_name, parser, filenames, level,
-                                  program_mode == m_append );
+                          num_workers, debug_level, program_mode == m_append );
     case m_concatenate: return concatenate( archive_name, parser, filenames );
     case m_extract:
     case m_list:   return decode( archive_name, parser, filenames, num_workers,
diff --git a/tarlz.h b/tarlz.h
index d34374a..6baa1d8 100644
--- a/tarlz.h
+++ b/tarlz.h
@@ -41,6 +41,16 @@ const uint8_t ustar_magic[magic_l] =
 inline bool verify_ustar_magic( const uint8_t * const header )
   { return std::memcmp( header + magic_o, ustar_magic, magic_l ) == 0; }
 
+inline void init_tar_header( Tar_header header )    // set magic and version
+  {
+  std::memset( header, 0, header_size );
+  std::memcpy( header + magic_o, ustar_magic, magic_l - 1 );
+  header[version_o] = header[version_o+1] = '0';
+  }
+
+inline void print_octal( uint8_t * const buf, int size, unsigned long long num )
+  { while( --size >= 0 ) { buf[size] = '0' + ( num % 8 ); num /= 8; } }
+
 
 // Round "size" to the next multiple of header size (512).
 //
@@ -52,30 +62,65 @@ inline unsigned long long round_up( const unsigned long long size )
   }
 
 
+enum { initial_line_length = 1000 };	// must be >= 77 for 'mode user/group'
+
+class Resizable_buffer
+  {
+  char * p;
+  unsigned long size_;			// size_ < LONG_MAX
+
+public:
+  explicit Resizable_buffer( const unsigned long initial_size )
+    : p( (char *)std::malloc( initial_size ) ), size_( p ? initial_size : 0 ) {}
+  ~Resizable_buffer() { if( p ) std::free( p ); p = 0; size_ = 0; }
+
+  bool resize( const unsigned long long new_size )
+    {
+    if( new_size >= LONG_MAX ) return false;
+    if( size_ < new_size )
+      {
+      char * const tmp = (char *)std::realloc( p, new_size );
+      if( !tmp ) return false;
+      p = tmp; size_ = new_size;
+      }
+    return true;
+    }
+  char * operator()() const { return p; }
+  unsigned long size() const { return size_; }
+  };
+
+
 class Extended		// stores metadata from/for extended records
   {
-  std::string linkpath_;
+  std::string linkpath_;		// these are the real metadata
   std::string path_;
   unsigned long long file_size_;
 
-  mutable long long full_size_;		// cached sizes
-  mutable int recsize_linkpath_;
-  mutable int recsize_path_;
-  mutable int recsize_file_size_;
+  // cached sizes; if full_size_ < 0 they must be recalculated
+  mutable long long edsize_;		// extended data size
+  mutable long long padded_edsize_;	// edsize rounded up
+  mutable long long full_size_;		// header + padded edsize
+  mutable long long linkpath_recsize_;
+  mutable long long path_recsize_;
+  mutable int file_size_recsize_;
+
+  // true if CRC present in parsed or formatted records
+  mutable bool crc_present_;
 
-  bool crc_present_;		// true if CRC present in parsed records
+  void calculate_sizes() const;
 
 public:
   static const std::string crc_record;
 
   Extended()
-    : file_size_( 0 ), full_size_( -1 ), recsize_linkpath_( -1 ),
-      recsize_path_( -1 ), recsize_file_size_( -1 ), crc_present_( false ) {}
+    : file_size_( 0 ), edsize_( 0 ), padded_edsize_( 0 ), full_size_( 0 ),
+      linkpath_recsize_( 0 ), path_recsize_( 0 ), file_size_recsize_( 0 ),
+      crc_present_( false ) {}
 
   void reset()
-    { linkpath_.clear(); path_.clear(); file_size_ = 0; full_size_ = -1;
-      recsize_linkpath_ = -1; recsize_path_ = -1; recsize_file_size_ = -1;
-      crc_present_ = false; }
+    { linkpath_.clear(); path_.clear(); file_size_ = 0; edsize_ = 0;
+      padded_edsize_ = 0; full_size_ = 0; linkpath_recsize_ = 0;
+      path_recsize_ = 0; file_size_recsize_ = 0; crc_present_ = false; }
 
   bool empty() const
     { return linkpath_.empty() && path_.empty() && file_size_ == 0; }
@@ -84,27 +129,16 @@ public:
   const std::string & path() const { return path_; }
   unsigned long long file_size() const { return file_size_; }
 
-  void linkpath( const char * const lp )
-    { linkpath_ = lp; full_size_ = -1; recsize_linkpath_ = -1; }
-  void path( const char * const p )
-    { path_ = p; full_size_ = -1; recsize_path_ = -1; }
+  void linkpath( const char * const lp ) { linkpath_ = lp; full_size_ = -1; }
+  void path( const char * const p ) { path_ = p; full_size_ = -1; }
   void file_size( const unsigned long long fs )
-    { file_size_ = fs; full_size_ = -1; recsize_file_size_ = -1; }
-
-  int recsize_linkpath() const;
-  int recsize_path() const;
-  int recsize_file_size() const;
-  unsigned long long edsize() const		// extended data size
-    { return empty() ? 0 : recsize_linkpath() + recsize_path() +
-                           recsize_file_size() + crc_record.size(); }
-  unsigned long long edsize_pad() const		// edsize rounded up
-    { return round_up( edsize() ); }
+    { file_size_ = fs; full_size_ = -1; }
+
   unsigned long long full_size() const
-    { if( full_size_ < 0 )
-        full_size_ = ( empty() ? 0 : header_size + edsize_pad() );
-      return full_size_; }
+    { if( full_size_ < 0 ) calculate_sizes(); return full_size_; }
 
   bool crc_present() const { return crc_present_; }
+  long long format_block( Resizable_buffer & rbuf ) const;
   bool parse( const char * const buf, const unsigned long long edsize,
               const bool permissive );
   };
@@ -253,37 +287,12 @@ public:
 
 extern const CRC32 crc32c;
 
-
-enum { initial_line_length = 1000 };	// must be >= 77
-
-class Resizable_buffer
-  {
-  char * p;
-  unsigned size_;
-
-public:
-  explicit Resizable_buffer( const unsigned initial_size )
-    : p( (char *)std::malloc( initial_size ) ), size_( p ? initial_size : 0 ) {}
-  ~Resizable_buffer() { if( p ) std::free( p ); p = 0; size_ = 0; }
-
-  bool resize( const unsigned new_size )
-    {
-    if( size_ < new_size )
-      {
-      char * const tmp = (char *)std::realloc( p, new_size );
-      if( !tmp ) return false;
-      p = tmp; size_ = new_size;
-      }
-    return true;
-    }
-  char * operator()() const { return p; }
-  unsigned size() const { return size_; }
-  };
-
 const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
 const char * const bad_dict_msg = "Invalid dictionary size in member header.";
 const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
 const char * const trailing_msg = "Trailing data not allowed.";
+const char * const mem_msg = "Not enough memory.";
+const char * const mem_msg2 = "Not enough memory. Try a lower compression level.";
 
 // defined in create.cc
 enum Solidity { no_solid, bsolid, dsolid, asolid, solid };
@@ -291,19 +300,34 @@ extern int cl_owner;
 extern int cl_group;
 extern int cl_data_size;
 extern Solidity solidity;
+const char * remove_leading_dotslash( const char * const filename,
+                                      const bool dotdot = false );
+bool fill_headers( const char * const filename, Extended & extended,
+                   Tar_header header, unsigned long long & file_size,
+                   const int flag );
+bool block_is_full( const Extended & extended,
+                    const unsigned long long file_size,
+                    unsigned long long & partial_data_size );
+void set_error_status( const int retval );
+int final_exit_status( int retval );
 unsigned ustar_chksum( const uint8_t * const header );
 bool verify_ustar_chksum( const uint8_t * const header );
 class Arg_parser;
 int concatenate( const std::string & archive_name, const Arg_parser & parser,
                  const int filenames );
 int encode( const std::string & archive_name, const Arg_parser & parser,
-            const int filenames, const int level, const bool append );
+            const int filenames, const int level, const int num_workers,
+            const int debug_level, const bool append );
+
+// defined in create_lz.cc
+int encode_lz( const char * const archive_name, const Arg_parser & parser,
+               const int dictionary_size, const int match_len_limit,
+               const int num_workers, const int outfd, const int debug_level );
 
 // defined in extract.cc
 bool block_is_zero( const uint8_t * const buf, const int size );
 void format_member_name( const Extended & extended, const Tar_header header,
                          Resizable_buffer & rbuf, const bool long_format );
-const char * remove_leading_slash( const char * const filename );
 bool compare_prefix_dir( const char * const dir, const char * const name );
 bool compare_tslash( const char * const name1, const char * const name2 );
 int readblock( const int fd, uint8_t * const buf, const int size );
@@ -315,6 +339,15 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
             const bool permissive );
 
 // defined in list_lz.cc
+void xinit_mutex( pthread_mutex_t * const mutex );
+void xinit_cond( pthread_cond_t * const cond );
+void xdestroy_mutex( pthread_mutex_t * const mutex );
+void xdestroy_cond( pthread_cond_t * const cond );
+void xlock( pthread_mutex_t * const mutex );
+void xunlock( pthread_mutex_t * const mutex );
+void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex );
+void xsignal( pthread_cond_t * const cond );
+void xbroadcast( pthread_cond_t * const cond );
 class Lzip_index;
 int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
              const Lzip_index & lzip_index, const int filenames,
@@ -325,8 +358,45 @@ int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
 extern int verbosity;
 int open_instream( const std::string & name );
 int open_outstream( const std::string & name, const bool create = true );
+void cleanup_and_fail( const int retval = 1 );	// terminate the program
 void show_error( const char * const msg, const int errcode = 0,
                  const bool help = false );
 void show_file_error( const char * const filename, const char * const msg,
                       const int errcode = 0 );
 void internal_error( const char * const msg );
+
+
+class Slot_tally
+  {
+  const int num_slots;				// total slots
+  int num_free;					// remaining free slots
+  pthread_mutex_t mutex;
+  pthread_cond_t slot_av;			// slot available
+
+  Slot_tally( const Slot_tally & );		// declared as private
+  void operator=( const Slot_tally & );		// declared as private
+
+public:
+  explicit Slot_tally( const int slots )
+    : num_slots( slots ), num_free( slots )
+    { xinit_mutex( &mutex ); xinit_cond( &slot_av ); }
+
+  ~Slot_tally() { xdestroy_cond( &slot_av ); xdestroy_mutex( &mutex ); }
+
+  bool all_free() { return ( num_free == num_slots ); }
+
+  void get_slot()				// wait for a free slot
+    {
+    xlock( &mutex );
+    while( num_free <= 0 ) xwait( &slot_av, &mutex );
+    --num_free;
+    xunlock( &mutex );
+    }
+
+  void leave_slot()				// return a slot to the tally
+    {
+    xlock( &mutex );
+    if( ++num_free == 1 ) xsignal( &slot_av );	// num_free was 0
+    xunlock( &mutex );
+    }
+  };
diff --git a/testsuite/check.sh b/testsuite/check.sh
index e1e3f60..9899c15 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -72,7 +72,8 @@ lzlib_1_11() { [ ${lwarn} = 0 ] &&
 # test_bad2.tar.lz:  byte at offset 6000 changed from 0x56 to 0x46
 # test3.tar:         3 members (foo bar baz) + 2 zeroed 512-byte blocks
 # test3_dir.tar.lz:  like test3.tar.lz but members /dir/foo /dir/bar /dir/baz
-# test3_dot.tar.lz:  like test3.tar.lz but members ./foo ./bar ./baz
+# test3_dot.tar.lz:  3 times 3 members ./foo ././bar ./././baz
+#                    the 3 central members with filename in extended header
 # test3_bad1.tar:    byte at offset  259 changed from 't' to '0' (magic)
 # test3_bad2.tar:    byte at offset 1283 changed from 't' to '0' (magic)
 # test3_bad3.tar:    byte at offset 2559 changed from 0x00 to 0x20 (padding)
@@ -131,6 +132,12 @@ rm -f test.tar || framework_failure
 [ $? = 1 ] || test_failed $LINENO
 "${TARLZ}" -q -x -C nx_dir "${test3_lz}"
 [ $? = 1 ] || test_failed $LINENO
+touch empty.tar.lz empty.tlz			# list an empty lz file
+"${TARLZ}" -q -tf empty.tar.lz
+[ $? = 2 ] || test_failed $LINENO
+"${TARLZ}" -q -tf empty.tlz
+[ $? = 2 ] || test_failed $LINENO
+rm -f empty.tar.lz empty.tlz || framework_failure
 "${TARLZ}" -q -cr
 [ $? = 1 ] || test_failed $LINENO
 "${TARLZ}" -q -ct
@@ -180,6 +187,11 @@ cmp cfoo foo || test_failed $LINENO
 cmp cbar bar || test_failed $LINENO
 cmp cbaz baz || test_failed $LINENO
 rm -f foo bar baz || framework_failure
+"${TARLZ}" -xf "${test3_lz}" foo/ bar// baz/// || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
 "${TARLZ}" -xf "${test3}" --missing-crc || test_failed $LINENO
 cmp cfoo foo || test_failed $LINENO
 cmp cbar bar || test_failed $LINENO
@@ -330,41 +342,20 @@ cmp out.tar.lz aout.tar.lz || test_failed $LINENO
 "${TARLZ}" -q -rf aout.tar.lz aout.tar.lz || test_failed $LINENO
 cmp out.tar.lz aout.tar.lz || test_failed $LINENO
 rm -f aout.tar.lz || framework_failure
-
-# test --append
-"${TARLZ}" --dsolid -0 -cf aout.tar.lz foo bar baz || test_failed $LINENO
+#
+"${TARLZ}" -0 -cf aout.tar.lz foo bar baz -C / || test_failed $LINENO
 cmp out.tar.lz aout.tar.lz || test_failed $LINENO
 rm -f aout.tar.lz || framework_failure
-"${TARLZ}" -0 -q -cf aout.tar.lz foo/ ./bar ./baz/ || test_failed $LINENO
+"${TARLZ}" -0 -C / -cf aout.tar.lz -C "${objdir}"/tmp foo bar baz ||
+	test_failed $LINENO
 cmp out.tar.lz aout.tar.lz || test_failed $LINENO
 rm -f aout.tar.lz || framework_failure
-"${TARLZ}" -0 -cf aout.tar.lz foo || test_failed $LINENO
-"${TARLZ}" -0 -rf aout.tar.lz bar baz || test_failed $LINENO
+"${TARLZ}" --asolid -0 -cf aout.tar.lz foo bar baz || test_failed $LINENO
 cmp out.tar.lz aout.tar.lz || test_failed $LINENO
 rm -f aout.tar.lz || framework_failure
-touch aout.tar.lz || framework_failure		# append to empty file
-"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO
-cmp out.tar.lz aout.tar.lz || test_failed $LINENO
-"${TARLZ}" -0 -rf aout.tar.lz || test_failed $LINENO	# append nothing
-cmp out.tar.lz aout.tar.lz || test_failed $LINENO
-"${TARLZ}" -0 -rf aout.tar.lz -C nx_dir || test_failed $LINENO
-cmp out.tar.lz aout.tar.lz || test_failed $LINENO
-"${TARLZ}" -0 -q -rf aout.tar.lz nx_file
-[ $? = 1 ] || test_failed $LINENO
-cmp out.tar.lz aout.tar.lz || test_failed $LINENO
-cat "${eof_lz}" > aout.tar.lz || framework_failure	# append to empty archive
-"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO
+"${TARLZ}" -0 -q -cf aout.tar.lz foo/ ./bar ./baz/ || test_failed $LINENO
 cmp out.tar.lz aout.tar.lz || test_failed $LINENO
-rm -f foo bar baz || framework_failure
-"${TARLZ}" -xf out.tar.lz foo/ bar// baz/// || test_failed $LINENO
-cmp cfoo foo || test_failed $LINENO
-cmp cbar bar || test_failed $LINENO
-cmp cbaz baz || test_failed $LINENO
-rm -f foo bar baz || framework_failure
-"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
-cmp cfoo foo || test_failed $LINENO
-cmp cbar bar || test_failed $LINENO
-cmp cbaz baz || test_failed $LINENO
+rm -f aout.tar.lz || framework_failure
 mkdir dir1 || framework_failure
 "${TARLZ}" -C dir1 -xf out.tar.lz || test_failed $LINENO
 cmp cfoo dir1/foo || test_failed $LINENO
@@ -397,17 +388,39 @@ rm -f foo dir1/bar baz || framework_failure
 	test_failed $LINENO
 cmp out.tar.lz aout.tar.lz || test_failed $LINENO
 "${TARLZ}" -0 -cf aout.tar.lz dir1/foo dir1/baz || test_failed $LINENO
-rm -rf dir1 || framework_failure
+rm -rf dir1 bar || framework_failure
 "${TARLZ}" -xf aout.tar.lz dir1 || test_failed $LINENO
 cmp cfoo dir1/foo || test_failed $LINENO
 cmp cbaz dir1/baz || test_failed $LINENO
 rm -rf dir1 || framework_failure
 rm -f out.tar.lz aout.tar.lz || framework_failure
 
-# append to solid archive
+# test --append
 cat cfoo > foo || framework_failure
 cat cbar > bar || framework_failure
 cat cbaz > baz || framework_failure
+"${TARLZ}" -0 -cf out.tar.lz foo bar baz || test_failed $LINENO
+"${TARLZ}" -0 -cf nout.tar.lz foo bar baz --no-solid || test_failed $LINENO
+"${TARLZ}" -0 -cf aout.tar.lz foo || test_failed $LINENO
+"${TARLZ}" -0 -rf aout.tar.lz bar baz --no-solid || test_failed $LINENO
+cmp nout.tar.lz aout.tar.lz || test_failed $LINENO
+rm -f nout.tar.lz aout.tar.lz || framework_failure
+touch aout.tar.lz || framework_failure		# append to empty file
+"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" -0 -rf aout.tar.lz || test_failed $LINENO	# append nothing
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" -0 -rf aout.tar.lz -C nx_dir || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" -0 -q -rf aout.tar.lz nx_file
+[ $? = 1 ] || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+cat "${eof_lz}" > aout.tar.lz || framework_failure	# append to empty archive
+"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+rm -f out.tar.lz aout.tar.lz || framework_failure
+
+# append to solid archive
 "${TARLZ}" --solid -0 -cf out.tar.lz foo || test_failed $LINENO
 cat out.tar.lz > aout.tar.lz || framework_failure
 for i in --asolid --bsolid --dsolid --solid -0 ; do
@@ -434,11 +447,12 @@ rm -f foo bar baz || framework_failure
 
 # test directories and links
 mkdir dir1 || framework_failure
-"${TARLZ}" -0 -cf out.tar dir1 || test_failed $LINENO
+"${TARLZ}" -0 -cf out.tar.lz dir1 || test_failed $LINENO
 rmdir dir1 || framework_failure
-"${TARLZ}" -xf out.tar || test_failed $LINENO
+"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
 [ -d dir1 ] || test_failed $LINENO
 rmdir dir1
+rm -f out.tar.lz || framework_failure
 mkdir dir1 || framework_failure
 "${TARLZ}" --uncompressed -cf out.tar dir1 || test_failed $LINENO
 rmdir dir1 || framework_failure
@@ -463,9 +477,9 @@ if ln dummy_file dummy_link 2> /dev/null &&
 	ln dir1/dir2/dir3/in "${path_106}" || framework_failure
 	ln -s in dir1/dir2/dir3/link || framework_failure
 	ln -s "${name_100}" dir1/dir2/dir3/link_100 || framework_failure
-	"${TARLZ}" -0 -cf out.tar dir1 || test_failed $LINENO
+	"${TARLZ}" -0 -cf out.tar.lz dir1 || test_failed $LINENO
 	rm -rf dir1 || framework_failure
-	"${TARLZ}" -xf out.tar || test_failed $LINENO
+	"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
 	cmp "${in}" dir1/dir2/dir3/in || test_failed $LINENO
 	cmp "${in}" dir1/dir2/dir3/"${name_100}" || test_failed $LINENO
 	cmp "${in}" "${path_100}" || test_failed $LINENO
@@ -475,12 +489,13 @@ if ln dummy_file dummy_link 2> /dev/null &&
 	rm -f dir1/dir2/dir3/in || framework_failure
 	cmp "${in}" dir1/dir2/dir3/link 2> /dev/null && test_failed $LINENO
 	cmp "${in}" dir1/dir2/dir3/link_100 || test_failed $LINENO
-	"${TARLZ}" -xf out.tar || test_failed $LINENO
-	rm -f out.tar || framework_failure
+	"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
+	rm -f out.tar.lz || framework_failure
 	cmp "${in}" dir1/dir2/dir3/in || test_failed $LINENO
 	cmp "${in}" dir1/dir2/dir3/link || test_failed $LINENO
-	"${TARLZ}" -0 -q -c ../tmp/dir1 > /dev/null || test_failed $LINENO
-	rm -rf dir1 || framework_failure
+	"${TARLZ}" -0 -q -c ../tmp/dir1 | "${TARLZ}" -x || test_failed $LINENO
+	diff -r tmp/dir1 dir1 || test_failed $LINENO
+	rm -rf tmp/dir1 dir1 || framework_failure
 else
 	printf "\nwarning: skipping link test: 'ln' does not work on your system."
 fi
@@ -496,9 +511,13 @@ if [ "${ln_works}" = yes ] ; then
 	mkdir dir2 || framework_failure
 	"${TARLZ}" -C dir2 -xf "${t155_lz}" || test_failed $LINENO
 	diff -r dir1 dir2 || test_failed $LINENO
+	"${TARLZ}" -cf out.tar.lz dir2 || test_failed $LINENO
+	rm -rf dir2 || framework_failure
+	"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
+	diff -r dir1 dir2 || test_failed $LINENO
 	rmdir dir2 2> /dev/null && test_failed $LINENO
 	rmdir dir1 2> /dev/null && test_failed $LINENO
-	rm -rf dir2 dir1 || framework_failure
+	rm -rf out.tar.lz dir2 dir1 || framework_failure
 fi
 
 "${TARLZ}" -tvf "${testdir}"/ug32chars.tar.lz | grep -q \
diff --git a/testsuite/test3_dot.tar.lz b/testsuite/test3_dot.tar.lz
index 3ff8960..8fd3d1f 100644
--- a/testsuite/test3_dot.tar.lz
+++ b/testsuite/test3_dot.tar.lz