From aa4d2adf37f7449dd1a99df517de0a9ee97867bd Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Wed, 23 Jan 2019 18:42:00 +0100
Subject: Adding upstream version 0.9.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 extract.cc | 335 +++++++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 239 insertions(+), 96 deletions(-)

(limited to 'extract.cc')
diff --git a/extract.cc b/extract.cc
index 58cda61..e25f5b6 100644
--- a/extract.cc
+++ b/extract.cc
@@ -1,5 +1,5 @@
 /*  Tarlz - Archiver with multimember lzip compression
-    Copyright (C) 2013-2018 Antonio Diaz Diaz.
+    Copyright (C) 2013-2019 Antonio Diaz Diaz.
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -18,7 +18,9 @@
 #define _FILE_OFFSET_BITS 64
 
 #include <algorithm>
+#include <cctype>
 #include <cerrno>
+#include <climits>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
@@ -36,13 +38,15 @@
 
 #include "arg_parser.h"
 #include "lzip.h"
+#include "lzip_index.h"
 #include "tarlz.h"
 
 
 namespace {
 
+Resizable_buffer grbuf( initial_line_length );
 int gretval = 0;
-bool has_lz_ext;			// global var for archive_read
+bool has_lz_ext;				// global var for archive_read
 
 void skip_warn( const bool reset = false )	// avoid duplicate warnings
   {
@@ -83,13 +87,6 @@ bool make_path( const std::string & name )
   }
 
 
-inline bool block_is_zero( const uint8_t * const buf, const int size )
-  {
-  for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false;
-  return true;
-  }
-
-
 // Return value: 0 = OK, 1 = damaged member, 2 = fatal error.
 // If sizep and error, return in *sizep the number of bytes read.
 // The first 6 bytes of the archive must be intact for islz to be meaningful.
@@ -114,6 +111,7 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
       { show_error( "Error reading archive", errno ); fatal = true; return 2; }
     const Lzip_header & header = (*(const Lzip_header *)buf);
     bool islz = ( rd >= min_member_size && header.verify_magic() &&
+                  header.verify_version() &&
                   isvalid_ds( header.dictionary_size() ) );
     const bool istar = ( rd == size && verify_ustar_chksum( buf ) );
     const bool iseof =
@@ -160,8 +158,8 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
       skip_warn(); gretval = 2; return 1;
       }
     if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
-     { LZ_decompress_close( decoder );
-       show_error( "Archive ends unexpectedly." ); fatal = true; return 2; }
+      { LZ_decompress_close( decoder );
+        show_error( "Archive ends unexpectedly." ); fatal = true; return 2; }
     sz += rd; if( sizep ) *sizep = sz;
     if( sz == size && LZ_decompress_finished( decoder ) == 1 &&
         LZ_decompress_close( decoder ) < 0 )
@@ -185,12 +183,14 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
   }
 
 
-const char * mode_string( const Tar_header header )
+enum { mode_string_size = 10,
+       group_string_size = 1 + uname_l + 1 + gname_l + 1 };	// 67
+
+void format_mode_string( const Tar_header header, char buf[mode_string_size] )
   {
-  static char buf[11];
   const Typeflag typeflag = (Typeflag)header[typeflag_o];
 
-  std::memcpy( buf, "----------", sizeof buf - 1 );
+  std::memcpy( buf, "----------", mode_string_size );
   switch( typeflag )
     {
     case tf_regular: break;
@@ -203,7 +203,7 @@ const char * mode_string( const Tar_header header )
     case tf_hiperf: buf[0] = 'C'; break;
     default: buf[0] = '?';
     }
-  const mode_t mode = strtoul( header + mode_o, 0, 8 );		// 12 bits
+  const mode_t mode = parse_octal( header + mode_o, mode_l );	// 12 bits
   const bool setuid = mode & S_ISUID;
   const bool setgid = mode & S_ISGID;
   const bool sticky = mode & S_ISVTX;
@@ -219,46 +219,79 @@ const char * mode_string( const Tar_header header )
   if( mode & S_IWOTH ) buf[8] = 'w';
   if( mode & S_IXOTH ) buf[9] = sticky ? 't' : 'x';
   else if( sticky ) buf[9] = 'T';
-  return buf;
   }
 
 
-const char * user_group_string( const Tar_header header )
+int format_user_group_string( const Tar_header header,
+                              char buf[group_string_size] )
   {
-  enum { bufsize = uname_l + 1 + gname_l + 1 };
-  static char buf[bufsize];
-
+  int len;
   if( header[uname_o] && header[gname_o] )
-    snprintf( buf, bufsize, "%.32s/%.32s", header + uname_o, header + gname_o );
+    len = snprintf( buf, group_string_size,
+                    " %.32s/%.32s", header + uname_o, header + gname_o );
   else
     {
-    const int uid = strtoul( header + uid_o, 0, 8 );
-    const int gid = strtoul( header + gid_o, 0, 8 );
-    snprintf( buf, bufsize, "%u/%u", uid, gid );
+    const unsigned uid = parse_octal( header + uid_o, uid_l );
+    const unsigned gid = parse_octal( header + gid_o, gid_l );
+    len = snprintf( buf, group_string_size, " %u/%u", uid, gid );
     }
-  return buf;
+  return len;
   }
 
+} // end namespace
 
-void show_member_name( const Extended & extended, const Tar_header header,
-                       const int vlevel )
+bool block_is_zero( const uint8_t * const buf, const int size )
   {
-  if( verbosity < vlevel ) return;
-  if( verbosity > vlevel )
+  for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false;
+  return true;
+  }
+
+
+void format_member_name( const Extended & extended, const Tar_header header,
+                         Resizable_buffer & rbuf, const bool long_format )
+  {
+  if( long_format )
     {
-    const time_t mtime = strtoull( header + mtime_o, 0, 8 );	// 33 bits
-    const struct tm * const tm = localtime( &mtime );
+    format_mode_string( header, rbuf() );
+    const int group_string_len =
+      format_user_group_string( header, rbuf() + mode_string_size );
+    const int offset = mode_string_size + group_string_len;
+    const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
+    struct tm tms;
+    const struct tm * tm = localtime_r( &mtime, &tms );
+    if( !tm )
+      { time_t z = 0; tm = localtime_r( &z, &tms ); if( !tm ) tm = &tms; }
     const Typeflag typeflag = (Typeflag)header[typeflag_o];
     const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
     const char * const link_string = !islink ? "" :
                          ( ( typeflag == tf_link ) ? " link to " : " -> " );
-    std::printf( "%s %s %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n",
-                 mode_string( header ), user_group_string( header ),
-                 extended.size, 1900 + tm->tm_year, 1 + tm->tm_mon,
-                 tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path.c_str(),
-                 link_string, !islink ? "" : extended.linkpath.c_str() );
+    for( int i = 0; i < 2; ++i )
+      {
+      const int len = snprintf( rbuf() + offset, rbuf.size() - offset,
+                  " %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n",
+                  extended.size, 1900 + tm->tm_year, 1 + tm->tm_mon,
+                  tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path.c_str(),
+                  link_string, !islink ? "" : extended.linkpath.c_str() );
+      if( (int)rbuf.size() > len + offset ) break;
+      else rbuf.resize( len + offset + 1 );
+      }
+    }
+  else
+    {
+    if( rbuf.size() < extended.path.size() + 2 )
+      rbuf.resize( extended.path.size() + 2 );
+    snprintf( rbuf(), rbuf.size(), "%s\n", extended.path.c_str() );
     }
-  else std::printf( "%s\n", extended.path.c_str() );
+  }
+
+namespace {
+
+void show_member_name( const Extended & extended, const Tar_header header,
+                       const int vlevel, Resizable_buffer & rbuf )
+  {
+  if( verbosity < vlevel ) return;
+  format_member_name( extended, header, rbuf, verbosity > vlevel );
+  std::fputs( rbuf(), stdout );
   std::fflush( stdout );
   }
 
@@ -266,7 +299,7 @@ void show_member_name( const Extended & extended, const Tar_header header,
 int list_member( const int infd, const Extended & extended,
                  const Tar_header header, const bool skip )
   {
-  if( !skip ) show_member_name( extended, header, 0 );
+  if( !skip ) show_member_name( extended, header, 0, grbuf );
 
   const unsigned bufsize = 32 * header_size;
   uint8_t buf[bufsize];
@@ -304,13 +337,13 @@ int extract_member( const int infd, const Extended & extended,
     show_file_error( filename, "Contains a '..' component, skipping." );
     return list_member( infd, extended, header, true );
     }
-  const mode_t mode = strtoul( header + mode_o, 0, 8 );		// 12 bits
-  const time_t mtime = strtoull( header + mtime_o, 0, 8 );	// 33 bits
+  const mode_t mode = parse_octal( header + mode_o, mode_l );	 // 12 bits
+  const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
   const Typeflag typeflag = (Typeflag)header[typeflag_o];
   const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
   int outfd = -1;
 
-  show_member_name( extended, header, 1 );
+  show_member_name( extended, header, 1, grbuf );
   std::remove( filename );
   make_path( filename );
   switch( typeflag )
@@ -352,8 +385,9 @@ int extract_member( const int infd, const Extended & extended,
     case tf_chardev:
     case tf_blockdev:
       {
-      const unsigned dev = makedev( strtoul( header + devmajor_o, 0, 8 ),
-                                    strtoul( header + devminor_o, 0, 8 ) );
+      const unsigned dev =
+        makedev( parse_octal( header + devmajor_o, devmajor_l ),
+                 parse_octal( header + devminor_o, devminor_l ) );
       const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode;
       if( mknod( filename, dmode, dev ) != 0 )
         {
@@ -376,8 +410,8 @@ int extract_member( const int infd, const Extended & extended,
       return 2;
     }
 
-  const uid_t uid = (uid_t)strtoul( header + uid_o, 0, 8 );
-  const gid_t gid = (gid_t)strtoul( header + gid_o, 0, 8 );
+  const uid_t uid = (uid_t)parse_octal( header + uid_o, uid_l );
+  const gid_t gid = (gid_t)parse_octal( header + gid_o, gid_l );
   if( !islink && chown( filename, uid, gid ) != 0 &&
       errno != EPERM && errno != EINVAL )
     {
@@ -423,6 +457,7 @@ int extract_member( const int infd, const Extended & extended,
   return 0;
   }
 
+} // end namespace
 
 // Removes any amount of leading "./" and '/' strings.
 const char * remove_leading_slash( const char * const filename )
@@ -464,78 +499,163 @@ bool compare_tslash( const char * const name1, const char * const name2 )
   return ( !*p && !*q );
   }
 
-} // end namespace
+namespace {
 
+unsigned long long parse_decimal( const char * const ptr,
+                                  const char ** const tailp,
+                                  const unsigned long long size )
+  {
+  unsigned long long result = 0;
+  unsigned long long i = 0;
+  while( i < size && std::isspace( ptr[i] ) ) ++i;
+  if( !std::isdigit( (unsigned char)ptr[i] ) )
+    { if( tailp ) *tailp = ptr; return 0; }
+  for( ; i < size && std::isdigit( (unsigned char)ptr[i] ); ++i )
+    {
+    const unsigned long long prev = result;
+    result *= 10; result += ptr[i] - '0';
+    if( result < prev || result > LLONG_MAX )		// overflow
+      { if( tailp ) *tailp = ptr; return 0; }
+    }
+  if( tailp ) *tailp = ptr + i;
+  return result;
+  }
 
-bool Extended::parse( const int infd, const Tar_header header,
-                      const bool permissive )
+
+uint32_t parse_record_crc( const char * const ptr )
   {
-  const unsigned long long edsize = strtoull( header + size_o, 0, 8 );
+  uint32_t crc = 0;
+  for( int i = 0; i < 8; ++i )
+    {
+    crc <<= 4;
+    if( ptr[i] >= '0' && ptr[i] <= '9' ) crc += ptr[i] - '0';
+    else if( ptr[i] >= 'A' && ptr[i] <= 'F' ) crc += ptr[i] + 10 - 'A';
+    else if( ptr[i] >= 'a' && ptr[i] <= 'f' ) crc += ptr[i] + 10 - 'a';
+    else { crc = 0; break; }		// invalid digit in crc string
+    }
+  return crc;
+  }
+
+
+bool parse_records( const int infd, Extended & extended,
+                    const Tar_header header, const bool permissive )
+  {
+  const unsigned long long edsize = parse_octal( header + size_o, size_l );
   const unsigned long long bufsize = round_up( edsize );
   if( bufsize == 0 || edsize == 0 || edsize >= 1ULL << 33 )
     return false;				// overflow or no extended data
   char * const buf = new char[bufsize];		// extended records buffer
-  if( archive_read( infd, (uint8_t *)buf, bufsize ) != 0 ) goto error;
+  const bool ret = ( archive_read( infd, (uint8_t *)buf, bufsize ) == 0 &&
+                     extended.parse( buf, edsize, permissive ) );
+  delete[] buf;
+  return ret;
+  }
+
+} // end namespace
+
+
+/* Returns the number of bytes really read.
+   If (returned value < size) and (errno == 0), means EOF was reached.
+*/
+int readblock( const int fd, uint8_t * const buf, const int size )
+  {
+  int sz = 0;
+  errno = 0;
+  while( sz < size )
+    {
+    const int n = read( fd, buf + sz, size - sz );
+    if( n > 0 ) sz += n;
+    else if( n == 0 ) break;				// EOF
+    else if( errno != EINTR ) break;
+    errno = 0;
+    }
+  return sz;
+  }
+
+
+/* Returns the number of bytes really written.
+   If (returned value < size), it is always an error.
+*/
+int writeblock( const int fd, const uint8_t * const buf, const int size )
+  {
+  int sz = 0;
+  errno = 0;
+  while( sz < size )
+    {
+    const int n = write( fd, buf + sz, size - sz );
+    if( n > 0 ) sz += n;
+    else if( n < 0 && errno != EINTR ) break;
+    errno = 0;
+    }
+  return sz;
+  }
+
+
+unsigned long long parse_octal( const uint8_t * const ptr, const int size )
+  {
+  unsigned long long result = 0;
+  int i = 0;
+  while( i < size && std::isspace( ptr[i] ) ) ++i;
+  for( ; i < size && ptr[i] >= '0' && ptr[i] <= '7'; ++i )
+    { result <<= 3; result += ptr[i] - '0'; }
+  return result;
+  }
+
+
+bool Extended::parse( const char * const buf, const unsigned long long edsize,
+                      const bool permissive )
+  {
   for( unsigned long long pos = 0; pos < edsize; )	// parse records
     {
-    char * tail;
-    const unsigned long long rsize = strtoull( buf + pos, &tail, 10 );
+    const char * tail;
+    const unsigned long long rsize =
+      parse_decimal( buf + pos, &tail, edsize - pos );
     if( rsize == 0 || rsize > edsize - pos || tail[0] != ' ' ||
-        buf[pos+rsize-1] != '\n' ) goto error;
+        buf[pos+rsize-1] != '\n' ) return false;
     ++tail;	// point to keyword
-    // length of (keyword + '=' + value) without the final newline
-    const unsigned long long rest = ( buf + pos + rsize - 1 ) - tail;
+    // rest = length of (keyword + '=' + value) without the final newline
+    const unsigned long long rest = ( buf + ( pos + rsize - 1 ) ) - tail;
     if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 )
-      { if( path.size() && !permissive ) goto error;
+      { if( path.size() && !permissive ) return false;
         path.assign( tail + 5, rest - 5 ); }
     else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 )
-      { if( linkpath.size() && !permissive ) goto error;
+      { if( linkpath.size() && !permissive ) return false;
         linkpath.assign( tail + 9, rest - 9 ); }
     else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 )
       {
-      if( size != 0 && !permissive ) goto error;
-      size = 0;
-      for( unsigned long long i = 5; i < rest; ++i )
-        {
-        if( tail[i] < '0' || tail[i] > '9' ) goto error;
-        const unsigned long long prev = size;
-        size = size * 10 + ( tail[i] - '0' );
-        if( size < prev ) goto error;		// overflow
-        }
-      if( size < 1ULL << 33 ) goto error;	// size fits in ustar header
+      if( size != 0 && !permissive ) return false;
+      size = parse_decimal( tail + 5, &tail, rest - 5 );
+      // parse error or size fits in ustar header
+      if( size < 1ULL << 33 || tail != buf + ( pos + rsize - 1 ) ) return false;
       }
     else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 )
       {
-      if( crc_present && !permissive ) goto error;
-      if( rsize != 22 ) goto error;
-      char * t;
-      const uint32_t stored_crc = strtoul( tail + 10, &t, 16 );
-      if( t - tail - 10 != 8 || t[0] != '\n' ) goto error;
+      if( crc_present && !permissive ) return false;
+      if( rsize != 22 ) return false;
+      const uint32_t stored_crc = parse_record_crc( tail + 10 );
       const uint32_t computed_crc =
         crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize );
       crc_present = true;
-      if( stored_crc != computed_crc ) goto error;
+      if( stored_crc != computed_crc ) return false;
       }
     pos += rsize;
     }
-  delete[] buf;
   return true;
-error:
-  delete[] buf;
-  return false;
   }
 
 
 int decode( const std::string & archive_name, const Arg_parser & parser,
-            const int filenames, const bool keep_damaged, const bool listing,
-            const bool missing_crc, const bool permissive )
+            const int filenames, const int num_workers, const int debug_level,
+            const bool keep_damaged, const bool listing, const bool missing_crc,
+            const bool permissive )
   {
   const int infd = archive_name.size() ?
                    open_instream( archive_name ) : STDIN_FILENO;
   if( infd < 0 ) return 1;
 
-  // execute -C options and mark filenames to be extracted or listed
-  std::vector< bool > name_pending( parser.arguments(), false );
+  // Execute -C options and mark filenames to be extracted or listed.
+  // name_pending is of type char instead of bool to allow concurrent update.
+  std::vector< char > name_pending( parser.arguments(), false );
   for( int i = 0; i < parser.arguments(); ++i )
     {
     const int code = parser.code( i );
@@ -549,34 +669,57 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
     if( !code ) name_pending[i] = true;
     }
 
-  has_lz_ext =
+  if( listing && num_workers > 0 )		// multi-threaded --list
+    {
+    const Lzip_index lzip_index( infd, true, false );
+    const long members = lzip_index.members();
+    if( lzip_index.retval() == 0 && ( members >= 3 ||
+        ( members >= 2 && lzip_index.dblock( members - 1 ).size() > 1024 ) ) )
+      { //show_file_error( archive_name.c_str(), "Is compressed seekable" );
+        return list_lz( parser, name_pending, lzip_index, filenames,
+                     debug_level, infd, std::min( (long)num_workers, members ),
+                     missing_crc, permissive ); }
+    lseek( infd, 0, SEEK_SET );
+    }
+
+  has_lz_ext =				// global var for archive_read
     ( archive_name.size() > 3 &&
       archive_name.compare( archive_name.size() - 3, 3, ".lz" ) == 0 ) ||
     ( archive_name.size() > 4 &&
       archive_name.compare( archive_name.size() - 4, 4, ".tlz" ) == 0 );
-  Extended extended;		// metadata from extended records
+  Extended extended;			// metadata from extended records
   int retval = 0;
-  bool prev_extended = false;	// prev header was extended
-  while( true )			// process one member per iteration
+  bool prev_extended = false;		// prev header was extended
+  while( true )				// process one tar member per iteration
     {
-    uint8_t buf[header_size];
-    const int ret = archive_read( infd, buf, header_size );
+    Tar_header header;
+    const int ret = archive_read( infd, header, header_size );
     if( ret == 2 ) return 2;
-    if( ret != 0 || !verify_ustar_chksum( buf ) )
+    if( ret != 0 || !verify_ustar_chksum( header ) )
       {
-      if( ret == 0 && block_is_zero( buf, header_size ) ) break;	// EOF
+      if( ret == 0 && block_is_zero( header, header_size ) ) break;	// EOF
       skip_warn(); gretval = 2; continue;
       }
-    skip_warn( true );		// reset warning
+    skip_warn( true );			// reset warning
 
-    const char * const header = (const char *)buf;
     const Typeflag typeflag = (Typeflag)header[typeflag_o];
+    if( typeflag == tf_global )
+      {
+      if( prev_extended )
+        { show_error( "Format violation: global header after extended header." );
+          return 2; }
+      Extended dummy;		// global headers are parsed and ignored
+      if( !parse_records( infd, dummy, header, true ) )
+        { show_error( "Error in global extended records. Skipping to next header." );
+          gretval = 2; }
+      continue;
+      }
     if( typeflag == tf_extended )
       {
       if( prev_extended && !permissive )
         { show_error( "Format violation: consecutive extended headers found."
-                      /*" Use --permissive."*/, 0, true ); return 2; }
-      if( !extended.parse( infd, header, permissive ) )
+                      /*" Use --permissive.", 0, true*/ ); return 2; }
+      if( !parse_records( infd, extended, header, permissive ) )
         { show_error( "Error in extended records. Skipping to next header." );
           extended.reset(); gretval = 2; }
       else if( !extended.crc_present && missing_crc )
@@ -586,7 +729,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
       }
     prev_extended = false;
 
-    if( extended.linkpath.empty() )
+    if( extended.linkpath.empty() )	// copy linkpath from ustar header
       {
       for( int i = 0; i < linkname_l && header[linkname_o+i]; ++i )
         extended.linkpath += header[linkname_o+i];
@@ -595,7 +738,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
         extended.linkpath.resize( extended.linkpath.size() - 1 );
       }
 
-    if( extended.path.empty() )
+    if( extended.path.empty() )		// copy path from ustar header
       {
       char stored_name[prefix_l+1+name_l+1];
       int len = 0;
@@ -624,7 +767,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
 
     if( extended.size == 0 &&
         ( typeflag == tf_regular || typeflag == tf_hiperf ) )
-      extended.size = strtoull( header + size_o, 0, 8 );
+      extended.size = parse_octal( header + size_o, size_l );
 
     if( listing || skip )
       retval = list_member( infd, extended, header, skip );
-- 
cgit v1.2.3