summaryrefslogtreecommitdiffstats
path: root/zutils.cc
diff options
context:
space:
mode:
Diffstat (limited to 'zutils.cc')
-rw-r--r--zutils.cc292
1 files changed, 292 insertions, 0 deletions
diff --git a/zutils.cc b/zutils.cc
new file mode 100644
index 0000000..74b7351
--- /dev/null
+++ b/zutils.cc
@@ -0,0 +1,292 @@
+/* Zutils - Utilities dealing with compressed files
+ Copyright (C) 2009-2019 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <cerrno>
+#include <csignal>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+#include "rc.h"
+#include "zutils.h"
+
+
+namespace {
+
+// first magic byte must be different among formats
+enum { bzip2_magic_size = 3,
+ gzip_magic_size = 2,
+ lzip_magic_size = 4,
+ xz_magic_size = 5 };
+const uint8_t bzip2_magic[bzip2_magic_size] =
+ { 0x42, 0x5A, 0x68 }; // "BZh"
+const uint8_t gzip_magic[gzip_magic_size] =
+ { 0x1F, 0x8B };
+const uint8_t lzip_magic[lzip_magic_size] =
+ { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP"
+const uint8_t xz_magic[xz_magic_size] =
+ { 0xFD, 0x37, 0x7A, 0x58, 0x5A }; // 0xFD, "7zXZ"
+
+
+// Returns -1 if child not terminated, 2 in case of error, or
+// exit status of child process 'pid'.
+//
+int child_status( const pid_t pid, const char * const name )
+ {
+ int status;
+ while( true )
+ {
+ const int tmp = waitpid( pid, &status, WNOHANG );
+ if( tmp == -1 && errno != EINTR )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Error checking status of '%s': %s\n",
+ program_name, name, std::strerror( errno ) );
+ _exit( 2 );
+ }
+ if( tmp == 0 ) return -1; // child not terminated
+ if( tmp == pid ) break; // child terminated
+ }
+ if( WIFEXITED( status ) ) return WEXITSTATUS( status );
+ return 2;
+ }
+
+} // end namespace
+
+
+// Returns the number of bytes really read.
+// If (returned value < size) and (errno == 0), means EOF was reached.
+//
+int readblock( const int fd, uint8_t * const buf, const int size )
+ {
+ int sz = 0;
+ errno = 0;
+ while( sz < size )
+ {
+ const int n = read( fd, buf + sz, size - sz );
+ if( n > 0 ) sz += n;
+ else if( n == 0 ) break; // EOF
+ else if( errno != EINTR ) break;
+ errno = 0;
+ }
+ return sz;
+ }
+
+
+// Returns the number of bytes really written.
+// If (returned value < size), it is always an error.
+//
+int writeblock( const int fd, const uint8_t * const buf, const int size )
+ {
+ int sz = 0;
+ errno = 0;
+ while( sz < size )
+ {
+ const int n = write( fd, buf + sz, size - sz );
+ if( n > 0 ) sz += n;
+ else if( n < 0 && errno != EINTR ) break;
+ errno = 0;
+ }
+ return sz;
+ }
+
+
+// Empty filename means stdin.
+//
+bool feed_data( const std::string & filename, const int infd, const int outfd,
+ const uint8_t * magic_data, const int magic_size )
+ {
+ if( magic_size && writeblock( outfd, magic_data, magic_size ) != magic_size )
+ { show_error( "Write error", errno ); return false; }
+ enum { buffer_size = 4096 };
+ uint8_t buffer[buffer_size];
+ while( true )
+ {
+ const int size = readblock( infd, buffer, buffer_size );
+ if( size != buffer_size && errno )
+ { const char * const name = filename.empty() ? "-" : filename.c_str();
+ show_file_error( name, "Read error", errno ); return false; }
+ if( size > 0 && writeblock( outfd, buffer, size ) != size )
+ { show_error( "Write error", errno ); return false; }
+ if( size < buffer_size ) break;
+ }
+ return true;
+ }
+
+
+bool good_status( const Children & children, const bool finished )
+ {
+ bool error = false;
+ for( int i = 0; i < 2; ++i )
+ {
+ const pid_t pid = children.pid[i];
+ if( pid )
+ {
+ const char * const name =
+ ( i & 1 ) ? children.compressor_name : "data feeder";
+ if( !finished )
+ {
+ const int tmp = child_status( pid, name );
+ if( tmp < 0 ) // child not terminated
+ { kill( pid, SIGTERM ); wait_for_child( pid, name ); }
+ else if( tmp != 0 ) error = true; // child status != 0
+ }
+ else
+ if( wait_for_child( pid, name ) != 0 ) error = true;
+ }
+ }
+ return !error;
+ }
+
+
+bool set_data_feeder( const std::string & filename, int * const infdp,
+ Children & children, int format_index )
+ {
+ const uint8_t * magic_data = 0;
+ int magic_size = 0;
+ if( format_index < 0 )
+ format_index = test_format( *infdp, &magic_data, &magic_size );
+ children.compressor_name = get_compressor_name( format_index );
+
+ if( children.compressor_name ) // compressed
+ {
+ int fda[2]; // pipe from feeder
+ int fda2[2]; // pipe from compressor
+ if( pipe( fda ) < 0 || pipe( fda2 ) < 0 )
+ { show_error( "Can't create pipe", errno ); return false; }
+ const int old_infd = *infdp;
+ *infdp = fda2[0];
+ const pid_t pid = fork();
+ if( pid == 0 ) // child 1 (compressor feeder)
+ {
+ if( close( fda[0] ) != 0 ||
+ close( fda2[0] ) != 0 || close( fda2[1] ) != 0 ||
+ !feed_data( filename, old_infd, fda[1], magic_data, magic_size ) )
+ _exit( 2 );
+ if( close( fda[1] ) != 0 )
+ { show_close_error(); _exit( 2 ); }
+ _exit( 0 );
+ }
+ if( pid < 0 ) // parent
+ { show_fork_error( "data feeder" ); return false; }
+
+ const pid_t pid2 = fork();
+ if( pid2 == 0 ) // child 2 (compressor)
+ {
+ if( dup2( fda[0], STDIN_FILENO ) >= 0 &&
+ dup2( fda2[1], STDOUT_FILENO ) >= 0 &&
+ close( fda[0] ) == 0 && close( fda[1] ) == 0 &&
+ close( fda2[0] ) == 0 && close( fda2[1] ) == 0 )
+ {
+ const std::vector< std::string > & compressor_args =
+ get_compressor_args( format_index );
+ const int size = compressor_args.size();
+ const char ** const argv = new const char *[size+3];
+ argv[0] = children.compressor_name;
+ for( int i = 0; i < size; ++i )
+ argv[i+1] = compressor_args[i].c_str();
+ argv[size+1] = ( verbosity >= 0 ) ? "-d" : "-dq";
+ argv[size+2] = 0;
+ execvp( argv[0], (char **)argv );
+ }
+ show_exec_error( children.compressor_name );
+ _exit( 2 );
+ }
+ if( pid2 < 0 ) // parent
+ { show_fork_error( children.compressor_name ); return false; }
+
+ close( fda[0] ); close( fda[1] ); close( fda2[1] );
+ children.pid[0] = pid;
+ children.pid[1] = pid2;
+ }
+ else // uncompressed
+ {
+ int fda[2]; // pipe from feeder
+ if( pipe( fda ) < 0 )
+ { show_error( "Can't create pipe", errno ); return false; }
+ const int old_infd = *infdp;
+ *infdp = fda[0];
+ const pid_t pid = fork();
+ if( pid == 0 ) // child (feeder)
+ {
+ if( close( fda[0] ) != 0 ||
+ !feed_data( filename, old_infd, fda[1], magic_data, magic_size ) )
+ _exit( 2 );
+ if( close( fda[1] ) != 0 )
+ { show_close_error(); _exit( 2 ); }
+ _exit( 0 );
+ }
+ if( pid < 0 ) // parent
+ { show_fork_error( "data feeder" ); return false; }
+ close( fda[1] );
+ children.pid[0] = pid;
+ children.pid[1] = 0;
+ }
+ return true;
+ }
+
+
+int test_format( const int infd, const uint8_t ** const magic_datap,
+ int * const magic_sizep )
+ {
+ enum { buf_size = 5 };
+ static uint8_t buf[buf_size];
+ int i = 0;
+ if( readblock( infd, buf, 1 ) == 1 )
+ {
+ ++i;
+ if( buf[0] == bzip2_magic[0] )
+ {
+ if( readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == bzip2_magic[1] &&
+ readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == bzip2_magic[2] )
+ { *magic_datap = bzip2_magic; *magic_sizep = bzip2_magic_size;
+ return fmt_bz2; }
+ }
+ else if( buf[0] == gzip_magic[0] )
+ {
+ if( readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == gzip_magic[1] )
+ { *magic_datap = gzip_magic; *magic_sizep = gzip_magic_size;
+ return fmt_gz; }
+ }
+ else if( buf[0] == lzip_magic[0] )
+ {
+ if( readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == lzip_magic[1] &&
+ readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == lzip_magic[2] &&
+ readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == lzip_magic[3] )
+ { *magic_datap = lzip_magic; *magic_sizep = lzip_magic_size;
+ return fmt_lz; }
+ }
+ else if( buf[0] == xz_magic[0] )
+ {
+ if( readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == xz_magic[1] &&
+ readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == xz_magic[2] &&
+ readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == xz_magic[3] &&
+ readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == xz_magic[4] )
+ { *magic_datap = xz_magic; *magic_sizep = xz_magic_size;
+ return fmt_xz; }
+ }
+ }
+ *magic_datap = buf; *magic_sizep = i;
+ return -1;
+ }