diff options
Diffstat (limited to 'zutils.cc')
-rw-r--r-- | zutils.cc | 292 |
1 files changed, 292 insertions, 0 deletions
diff --git a/zutils.cc b/zutils.cc new file mode 100644 index 0000000..2509d10 --- /dev/null +++ b/zutils.cc @@ -0,0 +1,292 @@ +/* Zutils - Utilities dealing with compressed files + Copyright (C) 2009-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <string> +#include <vector> +#include <stdint.h> +#include <unistd.h> +#include <sys/wait.h> + +#include "rc.h" +#include "zutils.h" + + +namespace { + +inline bool isvalid_ds( const uint8_t ds ) // lzip valid dictionary_size + { + enum { min_dictionary_size = 1 << 12, + max_dictionary_size = 1 << 29 }; + unsigned dictionary_size = ( 1 << ( ds & 0x1F ) ); + if( dictionary_size > min_dictionary_size ) + dictionary_size -= ( dictionary_size / 16 ) * ( ( ds >> 5 ) & 7 ); + return ( dictionary_size >= min_dictionary_size && + dictionary_size <= max_dictionary_size ); + } + + +/* Return -1 if child not terminated, 2 in case of error, or exit status of + child process 'pid'. Return 0 if child was terminated by SIGPIPE. +*/ +int child_status( const pid_t pid, const char * const name ) + { + int status; + while( true ) + { + const int tmp = waitpid( pid, &status, WNOHANG ); + if( tmp == -1 && errno != EINTR ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Error checking status of '%s': %s\n", + program_name, name, std::strerror( errno ) ); + _exit( 2 ); + } + if( tmp == 0 ) return -1; // child not terminated + if( tmp == pid ) break; // child terminated + } + if( WIFEXITED( status ) ) return WEXITSTATUS( status ); + if( WIFSIGNALED( status ) && WTERMSIG( status ) == SIGPIPE ) return 0; + return 2; + } + +} // end namespace + + +/* Return the number of bytes really read. + If (value returned < size) and (errno == 0), means EOF was reached. +*/ +int readblock( const int fd, uint8_t * const buf, const int size ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = read( fd, buf + sz, size - sz ); + if( n > 0 ) sz += n; + else if( n == 0 ) break; // EOF + else if( errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +/* Return the number of bytes really written. + If (value returned < size), it is always an error. +*/ +int writeblock( const int fd, const uint8_t * const buf, const int size ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = write( fd, buf + sz, size - sz ); + if( n > 0 ) sz += n; + else if( n < 0 && errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +// filename == "-" means stdin. +// +bool feed_data( const std::string & filename, const int infd, const int outfd, + const uint8_t * magic_data, const int magic_size ) + { + if( magic_size && writeblock( outfd, magic_data, magic_size ) != magic_size ) + { show_error( "Write error", errno ); return false; } + enum { buffer_size = 4096 }; + uint8_t buffer[buffer_size]; + while( true ) + { + const int size = readblock( infd, buffer, buffer_size ); + if( size != buffer_size && errno ) + { show_file_error( name_or_stdin( filename.c_str() ), "Read error", + errno ); return false; } + if( size > 0 && writeblock( outfd, buffer, size ) != size ) + { show_error( "Write error", errno ); return false; } + if( size < buffer_size ) break; + } + return true; + } + + +bool good_status( const Children & children, const bool finished ) + { + bool error = false; + for( int i = 0; i < 2; ++i ) + { + const pid_t pid = children.pid[i]; + if( pid ) + { + const char * const name = + ( i == 0 ) ? "data feeder" : children.compressor_name; + // even if compressor finished, trailing data may remain in data feeder + if( i == 0 || !finished ) + { + const int tmp = child_status( pid, name ); // 0 if SIGPIPE + if( tmp < 0 ) // child not terminated + { kill( pid, SIGTERM ); wait_for_child( pid, name ); } + else if( tmp != 0 ) error = true; // child status != 0 + } + else + if( wait_for_child( pid, name ) != 0 ) error = true; + } + } + return !error; + } + + +bool set_data_feeder( const std::string & filename, int * const infdp, + Children & children, int format_index ) + { + uint8_t magic_data[magic_buf_size]; + int magic_size = 0; + if( format_index < 0 ) + format_index = test_format( *infdp, magic_data, &magic_size ); + children.compressor_name = get_compressor_name( format_index ); + + if( children.compressor_name ) // compressed + { + int fda[2]; // pipe from feeder + int fda2[2]; // pipe from compressor + if( pipe( fda ) < 0 || pipe( fda2 ) < 0 ) + { show_error( "Can't create pipe", errno ); return false; } + const int old_infd = *infdp; + *infdp = fda2[0]; + const pid_t pid = fork(); + if( pid == 0 ) // child 1 (compressor feeder) + { + if( close( fda[0] ) != 0 || + close( fda2[0] ) != 0 || close( fda2[1] ) != 0 || + !feed_data( filename, old_infd, fda[1], magic_data, magic_size ) ) + _exit( 2 ); + if( close( fda[1] ) != 0 ) { show_close_error(); _exit( 2 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return false; } + + const pid_t pid2 = fork(); + if( pid2 == 0 ) // child 2 (compressor) + { + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + dup2( fda2[1], STDOUT_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 && + close( fda2[0] ) == 0 && close( fda2[1] ) == 0 ) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const char ** const argv = new const char *[size+3]; + argv[0] = children.compressor_name; + for( int i = 0; i < size; ++i ) + argv[i+1] = compressor_args[i].c_str(); + argv[size+1] = ( verbosity >= 0 ) ? "-d" : "-dq"; + argv[size+2] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( children.compressor_name ); + _exit( 2 ); + } + if( pid2 < 0 ) // parent + { show_fork_error( children.compressor_name ); return false; } + + close( fda[0] ); close( fda[1] ); close( fda2[1] ); + children.pid[0] = pid; + children.pid[1] = pid2; + } + else // uncompressed + { + int fda[2]; // pipe from feeder + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return false; } + const int old_infd = *infdp; + *infdp = fda[0]; + const pid_t pid = fork(); + if( pid == 0 ) // child (feeder) + { + if( close( fda[0] ) != 0 || + !feed_data( filename, old_infd, fda[1], magic_data, magic_size ) ) + _exit( 2 ); + if( close( fda[1] ) != 0 ) { show_close_error(); _exit( 2 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return false; } + close( fda[1] ); + children.pid[0] = pid; + children.pid[1] = 0; + } + return true; + } + + +// Return format_index, or -1 if uncompressed or read error. +// +int test_format( const int infd, uint8_t magic_data[], + int * const magic_sizep ) + { + enum { bzip2_magic_size = 3, + gzip_magic_size = 2, + lzip_magic_size = 5, + xz_magic_size = 5, + zstd_magic_size = 4, + compress_magic_size = 2 }; + const uint8_t bzip2_magic[bzip2_magic_size] = + { 0x42, 0x5A, 0x68 }; // "BZh" + const uint8_t gzip_magic[gzip_magic_size] = + { 0x1F, 0x8B }; + const uint8_t compress_magic[compress_magic_size] = + { 0x1F, 0x9D }; + const uint8_t lzip_magic[lzip_magic_size] = + { 0x4C, 0x5A, 0x49, 0x50, 0x01 }; // "LZIP\001" + const uint8_t xz_magic[xz_magic_size] = + { 0xFD, 0x37, 0x7A, 0x58, 0x5A }; // 0xFD, "7zXZ" + const uint8_t zstd_magic[zstd_magic_size] = + { 0x28, 0xB5, 0x2F, 0xFD }; // 0xFD2FB528 LE + + *magic_sizep = readblock( infd, magic_data, magic_buf_size ); + if( *magic_sizep < magic_buf_size ) + { if( errno ) return -1; // read error + for( int i = *magic_sizep; i < magic_buf_size; ++i ) magic_data[i] = 0; } + // test formats in search order + if( std::memcmp( magic_data, lzip_magic, lzip_magic_size ) == 0 && + isvalid_ds( magic_data[lzip_magic_size] ) ) + return fmt_lz; + if( std::memcmp( magic_data, bzip2_magic, bzip2_magic_size ) == 0 && + magic_data[3] >= '1' && magic_data[3] <= '9' && + ( std::memcmp( magic_data + 4, "1AY&SY", 6 ) == 0 || + std::memcmp( magic_data + 4, "\x17rE8P\x90", 6 ) == 0 ) ) + return fmt_bz2; + if( std::memcmp( magic_data, gzip_magic, gzip_magic_size ) == 0 || + std::memcmp( magic_data, compress_magic, compress_magic_size ) == 0 ) + return fmt_gz; + if( std::memcmp( magic_data, zstd_magic, zstd_magic_size ) == 0 ) + return fmt_zst; + if( std::memcmp( magic_data, xz_magic, xz_magic_size ) == 0 ) + return fmt_xz; + return -1; + } |