From cfb04b6a293e8e21107bcc938021275d0647637c Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Wed, 27 Jan 2021 16:59:05 +0100
Subject: Merging upstream version 1.9.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 decompress.cc | 220 +++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 133 insertions(+), 87 deletions(-)

(limited to 'decompress.cc')
diff --git a/decompress.cc b/decompress.cc
index 19cb1df..6765582 100644
--- a/decompress.cc
+++ b/decompress.cc
@@ -1,19 +1,19 @@
-/*  Plzip - Massively parallel implementation of lzip
-    Copyright (C) 2009 Laszlo Ersek.
-    Copyright (C) 2009-2019 Antonio Diaz Diaz.
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+/* Plzip - Massively parallel implementation of lzip
+   Copyright (C) 2009 Laszlo Ersek.
+   Copyright (C) 2009-2021 Antonio Diaz Diaz.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
 #define _FILE_OFFSET_BITS 64
@@ -27,7 +27,6 @@
 #include <cstring>
 #include <string>
 #include <vector>
-#include <pthread.h>
 #include <stdint.h>
 #include <unistd.h>
 #include <sys/stat.h>
@@ -37,8 +36,9 @@
 #include "lzip_index.h"
 
 
-// This code is based on a patch by Hannes Domani, ssbssa@yahoo.de
-// to be able to compile plzip under MS Windows (with MINGW compiler).
+/* This code is based on a patch by Hannes Domani, <ssbssa@yahoo.de> to make
+   possible compiling plzip under MS Windows (with MINGW compiler).
+*/
 #if defined(__MSVCRT__) && defined(WITH_MINGW)
 #include <windows.h>
 #warning "Parallel I/O is not guaranteed to work on Windows."
@@ -76,9 +76,9 @@ ssize_t pwrite( int fd, const void *buf, size_t count, uint64_t offset )
 #endif	// __MSVCRT__
 
 
-// Returns the number of bytes really read.
-// If (returned value < size) and (errno == 0), means EOF was reached.
-//
+/* Returns the number of bytes really read.
+   If (returned value < size) and (errno == 0), means EOF was reached.
+*/
 int preadblock( const int fd, uint8_t * const buf, const int size,
                 const long long pos )
   {
@@ -96,9 +96,9 @@ int preadblock( const int fd, uint8_t * const buf, const int size,
   }
 
 
-// Returns the number of bytes really written.
-// If (returned value < size), it is always an error.
-//
+/* Returns the number of bytes really written.
+   If (returned value < size), it is always an error.
+*/
 int pwriteblock( const int fd, const uint8_t * const buf, const int size,
                  const long long pos )
   {
@@ -115,18 +115,39 @@ int pwriteblock( const int fd, const uint8_t * const buf, const int size,
   }
 
 
-int decompress_read_error( struct LZ_Decoder * const decoder,
-                           const Pretty_print & pp, const int worker_id )
+void decompress_error( struct LZ_Decoder * const decoder,
+                       const Pretty_print & pp,
+                       Shared_retval & shared_retval, const int worker_id )
   {
   const LZ_Errno errcode = LZ_decompress_errno( decoder );
+  const int retval = ( errcode == LZ_header_error || errcode == LZ_data_error ||
+                       errcode == LZ_unexpected_eof ) ? 2 : 1;
+  if( !shared_retval.set_value( retval ) ) return;
   pp();
   if( verbosity >= 0 )
-    std::fprintf( stderr, "LZ_decompress_read error in worker %d: %s\n",
-                  worker_id, LZ_strerror( errcode ) );
-  if( errcode == LZ_header_error || errcode == LZ_unexpected_eof ||
-      errcode == LZ_data_error )
-    return 2;
-  return 1;
+    std::fprintf( stderr, "%s in worker %d\n", LZ_strerror( errcode ),
+                  worker_id );
+  }
+
+
+void show_results( const unsigned long long in_size,
+                   const unsigned long long out_size,
+                   const unsigned dictionary_size, const bool testing )
+  {
+  if( verbosity >= 2 )
+    {
+    if( verbosity >= 4 ) show_header( dictionary_size );
+    if( out_size == 0 || in_size == 0 )
+      std::fputs( "no data compressed. ", stderr );
+    else
+      std::fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved. ",
+                    (double)out_size / in_size,
+                    ( 100.0 * in_size ) / out_size,
+                    100.0 - ( ( 100.0 * in_size ) / out_size ) );
+    if( verbosity >= 3 )
+      std::fprintf( stderr, "%9llu out, %8llu in. ", out_size, in_size );
+    }
+  if( verbosity >= 1 ) std::fputs( testing ? "ok\n" : "done\n", stderr );
   }
 
 
@@ -136,32 +157,38 @@ struct Worker_arg
   {
   const Lzip_index * lzip_index;
   const Pretty_print * pp;
+  Shared_retval * shared_retval;
   int worker_id;
   int num_workers;
   int infd;
   int outfd;
+  bool nocopy;		// avoid copying decompressed data when testing
   };
 
 
-       // read members from file, decompress their contents, and
-       // write the produced data to file.
+/* Read members from input file, decompress their contents, and write to
+   output file the data produced.
+*/
 extern "C" void * dworker( void * arg )
   {
   const Worker_arg & tmp = *(const Worker_arg *)arg;
   const Lzip_index & lzip_index = *tmp.lzip_index;
   const Pretty_print & pp = *tmp.pp;
+  Shared_retval & shared_retval = *tmp.shared_retval;
   const int worker_id = tmp.worker_id;
   const int num_workers = tmp.num_workers;
   const int infd = tmp.infd;
   const int outfd = tmp.outfd;
+  const bool nocopy = tmp.nocopy;
   const int buffer_size = 65536;
 
   uint8_t * const ibuffer = new( std::nothrow ) uint8_t[buffer_size];
-  uint8_t * const obuffer = new( std::nothrow ) uint8_t[buffer_size];
+  uint8_t * const obuffer =
+    nocopy ? 0 : new( std::nothrow ) uint8_t[buffer_size];
   LZ_Decoder * const decoder = LZ_decompress_open();
-  if( !ibuffer || !obuffer || !decoder ||
+  if( !ibuffer || ( !nocopy && !obuffer ) || !decoder ||
       LZ_decompress_errno( decoder ) != LZ_ok )
-    { pp( "Not enough memory." ); cleanup_and_fail(); }
+    { if( shared_retval.set_value( 1 ) ) { pp( mem_msg ); } goto done; }
 
   for( long i = worker_id; i < lzip_index.members(); i += num_workers )
     {
@@ -172,6 +199,7 @@ extern "C" void * dworker( void * arg )
 
     while( member_rest > 0 )
       {
+      if( shared_retval() ) goto done;	// other worker found a problem
       while( LZ_decompress_write_size( decoder ) > 0 )
         {
         const int size = std::min( LZ_decompress_write_size( decoder ),
@@ -179,7 +207,8 @@ extern "C" void * dworker( void * arg )
         if( size > 0 )
           {
           if( preadblock( infd, ibuffer, size, member_pos ) != size )
-            { pp(); show_error( "Read error", errno ); cleanup_and_fail(); }
+            { if( shared_retval.set_value( 1 ) )
+                { pp(); show_error( "Read error", errno ); } goto done; }
           member_pos += size;
           member_rest -= size;
           if( LZ_decompress_write( decoder, ibuffer, size ) != size )
@@ -191,17 +220,18 @@ extern "C" void * dworker( void * arg )
         {
         const int rd = LZ_decompress_read( decoder, obuffer, buffer_size );
         if( rd < 0 )
-          cleanup_and_fail( decompress_read_error( decoder, pp, worker_id ) );
+          { decompress_error( decoder, pp, shared_retval, worker_id );
+            goto done; }
         if( rd > 0 && outfd >= 0 )
           {
           const int wr = pwriteblock( outfd, obuffer, rd, data_pos );
           if( wr != rd )
             {
-            pp();
-            if( verbosity >= 0 )
-              std::fprintf( stderr, "Write error in worker %d: %s\n",
-                            worker_id, std::strerror( errno ) );
-            cleanup_and_fail();
+            if( shared_retval.set_value( 1 ) ) { pp();
+              if( verbosity >= 0 )
+                std::fprintf( stderr, "Write error in worker %d: %s\n",
+                              worker_id, std::strerror( errno ) ); }
+            goto done;
             }
           }
         if( rd > 0 )
@@ -221,98 +251,114 @@ extern "C" void * dworker( void * arg )
       }
     show_progress( lzip_index.mblock( i ).size() );
     }
-
-  delete[] obuffer; delete[] ibuffer;
-  if( LZ_decompress_member_position( decoder ) != 0 )
-    { pp( "Error, some data remains in decoder." ); cleanup_and_fail(); }
-  if( LZ_decompress_close( decoder ) < 0 )
-    { pp( "LZ_decompress_close failed." ); cleanup_and_fail(); }
+done:
+  if( obuffer ) { delete[] obuffer; } delete[] ibuffer;
+  if( LZ_decompress_member_position( decoder ) != 0 &&
+      shared_retval.set_value( 1 ) )
+    pp( "Error, some data remains in decoder." );
+  if( LZ_decompress_close( decoder ) < 0 && shared_retval.set_value( 1 ) )
+    pp( "LZ_decompress_close failed." );
   return 0;
   }
 
 } // end namespace
 
 
-    // start the workers and wait for them to finish.
+// start the workers and wait for them to finish.
 int decompress( const unsigned long long cfile_size, int num_workers,
                 const int infd, const int outfd, const Pretty_print & pp,
                 const int debug_level, const int in_slots,
                 const int out_slots, const bool ignore_trailing,
-                const bool loose_trailing, const bool infd_isreg )
+                const bool loose_trailing, const bool infd_isreg,
+                const bool one_to_one )
   {
   if( !infd_isreg )
     return dec_stream( cfile_size, num_workers, infd, outfd, pp, debug_level,
                        in_slots, out_slots, ignore_trailing, loose_trailing );
 
   const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing );
-  if( lzip_index.retval() == 1 )
+  if( lzip_index.retval() == 1 )	// decompress as stream if seek fails
     {
     lseek( infd, 0, SEEK_SET );
     return dec_stream( cfile_size, num_workers, infd, outfd, pp, debug_level,
                        in_slots, out_slots, ignore_trailing, loose_trailing );
     }
-  if( lzip_index.retval() != 0 )
-    { show_file_error( pp.name(), lzip_index.error().c_str() );
-      return lzip_index.retval(); }
+  if( lzip_index.retval() != 0 )	// corrupt or invalid input file
+    {
+    if( lzip_index.bad_magic() )
+      show_file_error( pp.name(), lzip_index.error().c_str() );
+    else pp( lzip_index.error().c_str() );
+    return lzip_index.retval();
+    }
 
-  if( num_workers > lzip_index.members() )
-    num_workers = lzip_index.members();
-  if( verbosity >= 1 ) pp();
-  show_progress( 0, cfile_size, &pp );			// init
+  if( num_workers > lzip_index.members() ) num_workers = lzip_index.members();
 
   if( outfd >= 0 )
     {
     struct stat st;
-    if( fstat( outfd, &st ) != 0 || !S_ISREG( st.st_mode ) ||
+    if( !one_to_one || fstat( outfd, &st ) != 0 || !S_ISREG( st.st_mode ) ||
         lseek( outfd, 0, SEEK_CUR ) < 0 )
-    return dec_stdout( num_workers, infd, outfd, pp, debug_level, out_slots,
-                       lzip_index );
+      {
+      if( debug_level & 2 ) std::fputs( "decompress file to stdout.\n", stderr );
+      if( verbosity >= 1 ) pp();
+      show_progress( 0, cfile_size, &pp );			// init
+      return dec_stdout( num_workers, infd, outfd, pp, debug_level, out_slots,
+                         lzip_index );
+      }
     }
 
+  if( debug_level & 2 ) std::fputs( "decompress file to file.\n", stderr );
+  if( verbosity >= 1 ) pp();
+  show_progress( 0, cfile_size, &pp );			// init
+
   Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
   pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
   if( !worker_args || !worker_threads )
-    { pp( "Not enough memory." ); cleanup_and_fail(); }
-  for( int i = 0; i < num_workers; ++i )
+    { pp( mem_msg ); delete[] worker_threads; delete[] worker_args; return 1; }
+
+#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012
+  const bool nocopy = ( outfd < 0 && LZ_api_version() >= 1012 );
+#else
+  const bool nocopy = false;
+#endif
+
+  Shared_retval shared_retval;
+  int i = 0;				// number of workers started
+  for( ; i < num_workers; ++i )
     {
     worker_args[i].lzip_index = &lzip_index;
     worker_args[i].pp = &pp;
+    worker_args[i].shared_retval = &shared_retval;
     worker_args[i].worker_id = i;
     worker_args[i].num_workers = num_workers;
     worker_args[i].infd = infd;
     worker_args[i].outfd = outfd;
+    worker_args[i].nocopy = nocopy;
     const int errcode =
       pthread_create( &worker_threads[i], 0, dworker, &worker_args[i] );
     if( errcode )
-      { show_error( "Can't create worker threads", errcode ); cleanup_and_fail(); }
+      { if( shared_retval.set_value( 1 ) )
+          { show_error( "Can't create worker threads", errcode ); } break; }
     }
 
-  for( int i = num_workers - 1; i >= 0; --i )
+  while( --i >= 0 )
     {
     const int errcode = pthread_join( worker_threads[i], 0 );
-    if( errcode )
-      { show_error( "Can't join worker threads", errcode ); cleanup_and_fail(); }
+    if( errcode && shared_retval.set_value( 1 ) )
+      show_error( "Can't join worker threads", errcode );
     }
   delete[] worker_threads;
   delete[] worker_args;
 
-  if( verbosity >= 2 )
-    {
-    if( verbosity >= 4 ) show_header( lzip_index.dictionary_size( 0 ) );
-    const unsigned long long in_size = lzip_index.cdata_size();
-    const unsigned long long out_size = lzip_index.udata_size();
-    if( out_size == 0 || in_size == 0 )
-      std::fputs( "no data compressed.  ", stderr );
-    else
-      std::fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved.  ",
-                    (double)out_size / in_size,
-                    ( 100.0 * in_size ) / out_size,
-                    100.0 - ( ( 100.0 * in_size ) / out_size ) );
-    if( verbosity >= 3 )
-      std::fprintf( stderr, "decompressed %9llu, compressed %8llu.  ",
-                    out_size, in_size );
-    }
-  if( verbosity >= 1 ) std::fputs( (outfd < 0) ? "ok\n" : "done\n", stderr );
+  if( shared_retval() ) return shared_retval();	// some thread found a problem
+
+  if( verbosity >= 1 )
+    show_results( lzip_index.cdata_size(), lzip_index.udata_size(),
+                  lzip_index.dictionary_size(), outfd < 0 );
+
+  if( debug_level & 1 )
+    std::fprintf( stderr,
+      "workers started                           %8u\n", num_workers );
 
   return 0;
   }
-- 
cgit v1.2.3