From 80d9d5d2b23b8ac219454d6bd8ad82eab16af6fc Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 3 Jul 2020 19:56:15 +0200 Subject: Merging upstream version 1.9. Signed-off-by: Daniel Baumann --- zutils.cc | 141 +++++++++++++++++++++++++++++--------------------------------- 1 file changed, 66 insertions(+), 75 deletions(-) (limited to 'zutils.cc') diff --git a/zutils.cc b/zutils.cc index 74b7351..6090c0a 100644 --- a/zutils.cc +++ b/zutils.cc @@ -1,18 +1,18 @@ -/* Zutils - Utilities dealing with compressed files - Copyright (C) 2009-2019 Antonio Diaz Diaz. +/* Zutils - Utilities dealing with compressed files + Copyright (C) 2009-2020 Antonio Diaz Diaz. - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ #define _FILE_OFFSET_BITS 64 @@ -34,24 +34,21 @@ namespace { -// first magic byte must be different among formats -enum { bzip2_magic_size = 3, - gzip_magic_size = 2, - lzip_magic_size = 4, - xz_magic_size = 5 }; -const uint8_t bzip2_magic[bzip2_magic_size] = - { 0x42, 0x5A, 0x68 }; // "BZh" -const uint8_t gzip_magic[gzip_magic_size] = - { 0x1F, 0x8B }; -const uint8_t lzip_magic[lzip_magic_size] = - { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP" -const uint8_t xz_magic[xz_magic_size] = - { 0xFD, 0x37, 0x7A, 0x58, 0x5A }; // 0xFD, "7zXZ" +inline bool isvalid_ds( const uint8_t ds ) // lzip valid dictionary_size + { + enum { min_dictionary_size = 1 << 12, + max_dictionary_size = 1 << 29 }; + unsigned dictionary_size = ( 1 << ( ds & 0x1F ) ); + if( dictionary_size > min_dictionary_size ) + dictionary_size -= ( dictionary_size / 16 ) * ( ( ds >> 5 ) & 7 ); + return ( dictionary_size >= min_dictionary_size && + dictionary_size <= max_dictionary_size ); + } -// Returns -1 if child not terminated, 2 in case of error, or -// exit status of child process 'pid'. -// +/* Returns -1 if child not terminated, 2 in case of error, or exit status of + child process 'pid'. +*/ int child_status( const pid_t pid, const char * const name ) { int status; @@ -69,15 +66,16 @@ int child_status( const pid_t pid, const char * const name ) if( tmp == pid ) break; // child terminated } if( WIFEXITED( status ) ) return WEXITSTATUS( status ); + if( WIFSIGNALED( status ) && WTERMSIG( status ) == SIGPIPE ) return 0; return 2; } } // end namespace -// Returns the number of bytes really read. -// If (returned value < size) and (errno == 0), means EOF was reached. -// +/* Returns the number of bytes really read. + If (returned value < size) and (errno == 0), means EOF was reached. +*/ int readblock( const int fd, uint8_t * const buf, const int size ) { int sz = 0; @@ -94,9 +92,9 @@ int readblock( const int fd, uint8_t * const buf, const int size ) } -// Returns the number of bytes really written. -// If (returned value < size), it is always an error. -// +/* Returns the number of bytes really written. + If (returned value < size), it is always an error. +*/ int writeblock( const int fd, const uint8_t * const buf, const int size ) { int sz = 0; @@ -144,8 +142,9 @@ bool good_status( const Children & children, const bool finished ) if( pid ) { const char * const name = - ( i & 1 ) ? children.compressor_name : "data feeder"; - if( !finished ) + ( i == 0 ) ? "data feeder" : children.compressor_name; + // even if compressor finished, trailing data may remain in data feeder + if( i == 0 || !finished ) { const int tmp = child_status( pid, name ); if( tmp < 0 ) // child not terminated @@ -163,10 +162,10 @@ bool good_status( const Children & children, const bool finished ) bool set_data_feeder( const std::string & filename, int * const infdp, Children & children, int format_index ) { - const uint8_t * magic_data = 0; + uint8_t magic_data[magic_buf_size]; int magic_size = 0; if( format_index < 0 ) - format_index = test_format( *infdp, &magic_data, &magic_size ); + format_index = test_format( *infdp, magic_data, &magic_size ); children.compressor_name = get_compressor_name( format_index ); if( children.compressor_name ) // compressed @@ -247,46 +246,38 @@ bool set_data_feeder( const std::string & filename, int * const infdp, } -int test_format( const int infd, const uint8_t ** const magic_datap, +// Returns format index or -1 if uncompressed +// +int test_format( const int infd, uint8_t magic_data[], int * const magic_sizep ) { - enum { buf_size = 5 }; - static uint8_t buf[buf_size]; - int i = 0; - if( readblock( infd, buf, 1 ) == 1 ) + enum { bzip2_magic_size = 3, + gzip_magic_size = 2, + lzip_magic_size = 5, + xz_magic_size = 5 }; + const uint8_t bzip2_magic[bzip2_magic_size] = + { 0x42, 0x5A, 0x68 }; // "BZh" + const uint8_t gzip_magic[gzip_magic_size] = + { 0x1F, 0x8B }; + const uint8_t lzip_magic[lzip_magic_size] = + { 0x4C, 0x5A, 0x49, 0x50, 0x01 }; // "LZIP\001" + const uint8_t xz_magic[xz_magic_size] = + { 0xFD, 0x37, 0x7A, 0x58, 0x5A }; // 0xFD, "7zXZ" + + *magic_sizep = readblock( infd, magic_data, magic_buf_size ); + if( *magic_sizep == magic_buf_size ) { - ++i; - if( buf[0] == bzip2_magic[0] ) - { - if( readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == bzip2_magic[1] && - readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == bzip2_magic[2] ) - { *magic_datap = bzip2_magic; *magic_sizep = bzip2_magic_size; - return fmt_bz2; } - } - else if( buf[0] == gzip_magic[0] ) - { - if( readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == gzip_magic[1] ) - { *magic_datap = gzip_magic; *magic_sizep = gzip_magic_size; - return fmt_gz; } - } - else if( buf[0] == lzip_magic[0] ) - { - if( readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == lzip_magic[1] && - readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == lzip_magic[2] && - readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == lzip_magic[3] ) - { *magic_datap = lzip_magic; *magic_sizep = lzip_magic_size; - return fmt_lz; } - } - else if( buf[0] == xz_magic[0] ) - { - if( readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == xz_magic[1] && - readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == xz_magic[2] && - readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == xz_magic[3] && - readblock( infd, &buf[i], 1 ) == 1 && buf[i++] == xz_magic[4] ) - { *magic_datap = xz_magic; *magic_sizep = xz_magic_size; - return fmt_xz; } - } + if( std::memcmp( magic_data, bzip2_magic, bzip2_magic_size ) == 0 && + magic_data[3] >= '1' && magic_data[3] <= '9' && + std::memcmp( magic_data + 4, "1AY&SY", 6 ) == 0 ) + return fmt_bz2; + if( std::memcmp( magic_data, gzip_magic, gzip_magic_size ) == 0 ) + return fmt_gz; + if( std::memcmp( magic_data, lzip_magic, lzip_magic_size ) == 0 && + isvalid_ds( magic_data[lzip_magic_size] ) ) + return fmt_lz; + if( std::memcmp( magic_data, xz_magic, xz_magic_size ) == 0 ) + return fmt_xz; } - *magic_datap = buf; *magic_sizep = i; return -1; } -- cgit v1.2.3