summaryrefslogtreecommitdiffstats
path: root/zutils.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--zutils.cc141
1 files changed, 66 insertions, 75 deletions
diff --git a/zutils.cc b/zutils.cc
index 74b7351..6090c0a 100644
--- a/zutils.cc
+++ b/zutils.cc
@@ -1,18 +1,18 @@
-/* Zutils - Utilities dealing with compressed files
- Copyright (C) 2009-2019 Antonio Diaz Diaz.
+/* Zutils - Utilities dealing with compressed files
+ Copyright (C) 2009-2020 Antonio Diaz Diaz.
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 2 of the License, or
- (at your option) any later version.
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
@@ -34,24 +34,21 @@
namespace {
-// first magic byte must be different among formats
-enum { bzip2_magic_size = 3,
- gzip_magic_size = 2,
- lzip_magic_size = 4,
- xz_magic_size = 5 };
-const uint8_t bzip2_magic[bzip2_magic_size] =
- { 0x42, 0x5A, 0x68 }; // "BZh"
-const uint8_t gzip_magic[gzip_magic_size] =
- { 0x1F, 0x8B };
-const uint8_t lzip_magic[lzip_magic_size] =
- { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP"
-const uint8_t xz_magic[xz_magic_size] =
- { 0xFD, 0x37, 0x7A, 0x58, 0x5A }; // 0xFD, "7zXZ"
+inline bool isvalid_ds( const uint8_t ds ) // lzip valid dictionary_size
+ {
+ enum { min_dictionary_size = 1 << 12,
+ max_dictionary_size = 1 << 29 };
+ unsigned dictionary_size = ( 1 << ( ds & 0x1F ) );
+ if( dictionary_size > min_dictionary_size )
+ dictionary_size -= ( dictionary_size / 16 ) * ( ( ds >> 5 ) & 7 );
+ return ( dictionary_size >= min_dictionary_size &&
+ dictionary_size <= max_dictionary_size );
+ }
-// Returns -1 if child not terminated, 2 in case of error, or
-// exit status of child process 'pid'.
-//
+/* Returns -1 if child not terminated, 2 in case of error, or exit status of
+ child process 'pid'.
+*/
int child_status( const pid_t pid, const char * const name )
{
int status;
@@ -69,15 +66,16 @@ int child_status( const pid_t pid, const char * const name )
if( tmp == pid ) break; // child terminated
}
if( WIFEXITED( status ) ) return WEXITSTATUS( status );
+ if( WIFSIGNALED( status ) && WTERMSIG( status ) == SIGPIPE ) return 0;
return 2;
}
} // end namespace
-// Returns the number of bytes really read.
-// If (returned value < size) and (errno == 0), means EOF was reached.
-//
+/* Returns the number of bytes really read.
+ If (returned value < size) and (errno == 0), means EOF was reached.
+*/
int readblock( const int fd, uint8_t * const buf, const int size )
{
int sz = 0;
@@ -94,9 +92,9 @@ int readblock( const int fd, uint8_t * const buf, const int size )
}
-// Returns the number of bytes really written.
-// If (returned value < size), it is always an error.
-//
+/* Returns the number of bytes really written.
+ If (returned value < size), it is always an error.
+*/
int writeblock( const int fd, const uint8_t * const buf, const int size )
{
int sz = 0;
@@ -144,8 +142,9 @@ bool good_status( const Children & children, const bool finished )
if( pid )
{
const char * const name =
- ( i & 1 ) ? children.compressor_name : "data feeder";
- if( !finished )
+ ( i == 0 ) ? "data feeder" : children.compressor_name;
+ // even if compressor finished, trailing data may remain in data feeder
+ if( i == 0 || !finished )
{
const int tmp = child_status( pid, name );
if( tmp < 0 ) // child not terminated
@@ -163,10 +162,10 @@ bool good_status( const Children & children, const bool finished )
bool set_data_feeder( const std::string & filename, int * const infdp,
Children & children, int format_index )
{
- const uint8_t * magic_data = 0;
+ uint8_t magic_data[magic_buf_size];
int magic_size = 0;
if( format_index < 0 )
- format_index = test_format( *infdp, &magic_data, &magic_size );
+ format_index = test_format( *infdp, magic_data, &magic_size );
children.compressor_name = get_compressor_name( format_index );
if( children.compressor_name ) // compressed
@@ -247,46 +246,38 @@ bool set_data_feeder( const std::string & filename, int * const infdp,
}
-int test_format( const int infd, const uint8_t ** const magic_datap,
+// Returns format index or -1 if uncompressed
+//
+int test_format( const int infd, uint8_t magic_data[],
int * const magic_sizep )
{
- enum { buf_size = 5 };
- static uint8_t buf[buf_size];
- int i = 0;
- if( readblock( infd, buf, 1 ) == 1 )
+ enum { bzip2_magic_size = 3,
+ gzip_magic_size = 2,
+ lzip_magic_size = 5,
+ xz_magic_size = 5 };
+ const uint8_t bzip2_magic[bzip2_magic_size] =
+ { 0x42, 0x5A, 0x68 }; // "BZh"
+ const uint8_t gzip_magic[gzip_magic_size] =
+ { 0x1F, 0x8B };
+ const uint8_t lzip_magic[lzip_magic_size] =
+ { 0x4C, 0x5A, 0x49, 0x50, 0x01 }; // "LZIP\001"
+ const uint8_t xz_magic[xz_magic_size] =
+ { 0xFD, 0x37, 0x7A, 0x58, 0x5A }; // 0xFD, "7zXZ"
+
+ *magic_sizep = readblock( infd, magic_data, magic_buf_size );
+ if( *magic_sizep == magic_buf_size )
{
- ++i;
- if( buf[0] == bzip2_magic[0] )
- {
- if( readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == bzip2_magic[1] &&
- readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == bzip2_magic[2] )
- { *magic_datap = bzip2_magic; *magic_sizep = bzip2_magic_size;
- return fmt_bz2; }
- }
- else if( buf[0] == gzip_magic[0] )
- {
- if( readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == gzip_magic[1] )
- { *magic_datap = gzip_magic; *magic_sizep = gzip_magic_size;
- return fmt_gz; }
- }
- else if( buf[0] == lzip_magic[0] )
- {
- if( readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == lzip_magic[1] &&
- readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == lzip_magic[2] &&
- readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == lzip_magic[3] )
- { *magic_datap = lzip_magic; *magic_sizep = lzip_magic_size;
- return fmt_lz; }
- }
- else if( buf[0] == xz_magic[0] )
- {
- if( readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == xz_magic[1] &&
- readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == xz_magic[2] &&
- readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == xz_magic[3] &&
- readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == xz_magic[4] )
- { *magic_datap = xz_magic; *magic_sizep = xz_magic_size;
- return fmt_xz; }
- }
+ if( std::memcmp( magic_data, bzip2_magic, bzip2_magic_size ) == 0 &&
+ magic_data[3] >= '1' && magic_data[3] <= '9' &&
+ std::memcmp( magic_data + 4, "1AY&SY", 6 ) == 0 )
+ return fmt_bz2;
+ if( std::memcmp( magic_data, gzip_magic, gzip_magic_size ) == 0 )
+ return fmt_gz;
+ if( std::memcmp( magic_data, lzip_magic, lzip_magic_size ) == 0 &&
+ isvalid_ds( magic_data[lzip_magic_size] ) )
+ return fmt_lz;
+ if( std::memcmp( magic_data, xz_magic, xz_magic_size ) == 0 )
+ return fmt_xz;
}
- *magic_datap = buf; *magic_sizep = i;
return -1;
}