From 7dd21df678aafaba8614ffe90188712f13b3bfc3 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Tue, 13 Feb 2018 07:58:53 +0100 Subject: Merging upstream version 1.10. Signed-off-by: Daniel Baumann --- ChangeLog | 19 ++++- INSTALL | 2 +- NEWS | 36 ++++++-- README | 4 +- carg_parser.c | 2 +- carg_parser.h | 2 +- configure | 6 +- decoder.c | 70 ++++++++-------- decoder.h | 10 +-- doc/lunzip.1 | 7 +- file_index.c | 32 +++---- file_index.h | 4 +- list.c | 8 +- lzip.h | 49 +++++++++-- main.c | 239 ++++++++++++++++++++++++++++++++++------------------- testsuite/check.sh | 43 +++++++++- 16 files changed, 352 insertions(+), 181 deletions(-) diff --git a/ChangeLog b/ChangeLog index 83c4648..54b4b03 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +2018-02-05 Antonio Diaz Diaz + + * Version 1.10 released. + * main.c: Added new option '--loose-trailing'. + * Improved corrupt header detection to HD=3. + * main.c: Show corrupt or truncated header in multimember file. + * Replaced 'bits/byte' with inverse compression ratio in output. + * Show progress of decompression at verbosity level 2 (-vv). + * Show progress of decompression only if stderr is a terminal. + * main.c: Show final diagnostic when testing multiple files. + * decoder.c (LZd_verify_trailer): Show stored sizes also in hex. + Show dictionary size at verbosity level 4 (-vvvv). + 2017-04-13 Antonio Diaz Diaz * Version 1.9 released. @@ -21,8 +34,8 @@ * main.c (main): Don't use stdin more than once. * Error messages synced with lzip-1.18. * configure: Avoid warning on some shells when testing for gcc. - * testsuite/check.sh: A POSIX shell is required to run the tests. - * testsuite/check.sh: Don't check error messages. + * check.sh: A POSIX shell is required to run the tests. + * check.sh: Don't check error messages. 2015-05-27 Antonio Diaz Diaz @@ -80,7 +93,7 @@ * Created from the decompression code of clzip 1.1. -Copyright (C) 2010-2017 Antonio Diaz Diaz. +Copyright (C) 2010-2018 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and diff --git a/INSTALL b/INSTALL index 59892e4..4543b8d 100644 --- a/INSTALL +++ b/INSTALL @@ -61,7 +61,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2010-2017 Antonio Diaz Diaz. +Copyright (C) 2010-2018 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/NEWS b/NEWS index 57ec920..cd904d7 100644 --- a/NEWS +++ b/NEWS @@ -1,14 +1,32 @@ -Changes in version 1.9: +Changes in version 1.10: -The option '-l, --list' has been ported from lziprecover. +The option '--loose-trailing', has been added. -It is now an error to specify two or more different operations in the -command line (--decompress, --list or --test). +The test used by lunzip to discriminate trailing data from a corrupt +header in multimember or concatenated files has been improved to a +Hamming distance (HD) of 3, and the 3 bit flips must happen in different +magic bytes for the test to fail. As a consequence some kinds of files +no longer can be appended to a lzip file as trailing data unless the +'--loose-trailing' option is used when decompressing. +Lziprecover can be used to remove conflicting trailing data from a file. -Decompression time has been reduced by 7%. +The contents of a corrupt or truncated header found in a multimember +file is now shown, after the error message, in the same format as +trailing data. -In test mode, lunzip now continues checking the rest of the files if any -input file is a terminal. +The 'bits/byte' ratio has been replaced with the inverse compression +ratio in the output. -Trailing data are now shown both in hexadecimal and as a string of -printable ASCII characters. +The progress of decompression is now shown at verbosity level 2 (-vv) or +higher. + +Progress of decompression is only shown if stderr is a terminal. + +A final diagnostic is now shown at verbosity level 1 (-v) or higher if +any file fails the test when testing multiple files. + +In case of (de)compressed size mismatch, the stored size is now also +shown in hexadecimal to ease visual comparison. + +The dictionary size is now shown at verbosity level 4 (-vvvv) when +decompressing or testing. diff --git a/README b/README index ccd9b7b..947dc24 100644 --- a/README +++ b/README @@ -73,7 +73,7 @@ input from a terminal. Lunzip will correctly decompress a file which is the concatenation of two or more compressed files. The result is the concatenation of the -corresponding uncompressed files. Integrity testing of concatenated +corresponding decompressed files. Integrity testing of concatenated compressed files is also supported. The ideas embodied in lunzip are due to (at least) the following people: @@ -83,7 +83,7 @@ range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI). -Copyright (C) 2010-2017 Antonio Diaz Diaz. +Copyright (C) 2010-2018 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/carg_parser.c b/carg_parser.c index 6850643..10ad4dc 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2017 Antonio Diaz Diaz. + Copyright (C) 2006-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/carg_parser.h b/carg_parser.h index c4ce31d..e1c70dd 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2017 Antonio Diaz Diaz. + Copyright (C) 2006-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/configure b/configure index 53fcaae..20fa969 100755 --- a/configure +++ b/configure @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lunzip - Decompressor for the lzip format -# Copyright (C) 2010-2017 Antonio Diaz Diaz. +# Copyright (C) 2010-2018 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=lunzip -pkgversion=1.9 +pkgversion=1.10 progname=lunzip srctrigger=doc/${progname}.1 @@ -168,7 +168,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lunzip - Decompressor for the lzip format -# Copyright (C) 2010-2017 Antonio Diaz Diaz. +# Copyright (C) 2010-2018 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission diff --git a/decoder.c b/decoder.c index 61777b5..a995da3 100644 --- a/decoder.c +++ b/decoder.c @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2017 Antonio Diaz Diaz. + Copyright (C) 2010-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,24 +32,6 @@ CRC32 crc32; -void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ) - { - if( verbosity >= 0 ) - { - if( pp->first_post ) - { - unsigned i; - pp->first_post = false; - fprintf( stderr, " %s: ", pp->name ); - for( i = strlen( pp->name ); i < pp->longest_name; ++i ) - fputc( ' ', stderr ); - if( !msg ) fflush( stderr ); - } - if( msg ) fprintf( stderr, "%s\n", msg ); - } - } - - /* Returns the number of bytes really read. If (returned value < size) and (errno == 0), means EOF was reached. */ @@ -106,6 +88,7 @@ bool Rd_read_block( struct Range_decoder * const rdec ) rdec->at_stream_end = ( rdec->stream_pos < rd_buffer_size ); rdec->partial_member_pos += rdec->pos; rdec->pos = 0; + show_dprogress( 0, 0, 0, 0 ); } return rdec->pos < rdec->stream_pos; } @@ -136,6 +119,8 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d, int size = Rd_read_data( d->rdec, trailer, Ft_size ); const unsigned long long data_size = LZd_data_position( d ); const unsigned long long member_size = Rd_member_position( d->rdec ); + unsigned td_crc; + unsigned long long td_size, tm_size; bool error = false; if( size < Ft_size ) @@ -150,45 +135,56 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d, while( size < Ft_size ) trailer[size++] = 0; } - if( Ft_get_data_crc( trailer ) != LZd_crc( d ) ) + td_crc = Ft_get_data_crc( trailer ); + if( td_crc != LZd_crc( d ) ) { error = true; if( verbosity >= 0 ) { Pp_show_msg( pp, 0 ); - fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X\n", - Ft_get_data_crc( trailer ), LZd_crc( d ) ); + fprintf( stderr, "CRC mismatch; stored %08X, computed %08X\n", + td_crc, LZd_crc( d ) ); } } - if( Ft_get_data_size( trailer ) != data_size ) + td_size = Ft_get_data_size( trailer ); + if( td_size != data_size ) { error = true; if( verbosity >= 0 ) { Pp_show_msg( pp, 0 ); - fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX)\n", - Ft_get_data_size( trailer ), data_size, data_size ); + fprintf( stderr, "Data size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n", + td_size, td_size, data_size, data_size ); } } - if( Ft_get_member_size( trailer ) != member_size ) + tm_size = Ft_get_member_size( trailer ); + if( tm_size != member_size ) { error = true; if( verbosity >= 0 ) { Pp_show_msg( pp, 0 ); - fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX)\n", - Ft_get_member_size( trailer ), member_size, member_size ); + fprintf( stderr, "Member size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n", + tm_size, tm_size, member_size, member_size ); } } - if( !error && verbosity >= 2 && data_size > 0 && member_size > 0 ) - fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", - (double)data_size / member_size, - ( 8.0 * member_size ) / data_size, - 100.0 * ( 1.0 - ( (double)member_size / data_size ) ) ); - if( !error && verbosity >= 4 ) - fprintf( stderr, "CRC %08X, decompressed %9llu, compressed %8llu. ", - LZd_crc( d ), data_size, member_size ); - return !error; + if( error ) return false; + if( verbosity >= 2 ) + { + if( verbosity >= 4 ) show_header( d->dictionary_size ); + if( data_size == 0 || member_size == 0 ) + fputs( "no data compressed. ", stderr ); + else + fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved. ", + (double)data_size / member_size, + ( 100.0 * member_size ) / data_size, + 100.0 - ( ( 100.0 * member_size ) / data_size ) ); + if( verbosity >= 4 ) fprintf( stderr, "CRC %08X, ", td_crc ); + if( verbosity >= 3 ) + fprintf( stderr, "decompressed %9llu, compressed %8llu. ", + data_size, member_size ); + } + return true; } diff --git a/decoder.h b/decoder.h index e3a4f1c..7ec3133 100644 --- a/decoder.h +++ b/decoder.h @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2017 Antonio Diaz Diaz. + Copyright (C) 2010-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -244,11 +244,7 @@ unsigned seek_read_back( const int fd, uint8_t * const buf, const int size, const int offset ); static inline uint8_t LZd_peek_prev( const struct LZ_decoder * const d ) - { - if( d->pos > 0 ) return d->buffer[d->pos-1]; - if( d->pos_wrapped ) return d->buffer[d->buffer_size-1]; - return 0; /* prev_byte of first byte */ - } + { return d->buffer[((d->pos > 0) ? d->pos : d->buffer_size)-1]; } static inline uint8_t LZd_peek( const struct LZ_decoder * const d, const unsigned distance ) @@ -338,6 +334,8 @@ static inline bool LZd_init( struct LZ_decoder * const d, d->outfd = ofd; d->pos_wrapped = false; d->pos_wrapped_dic = false; + /* prev_byte of first byte; also for LZd_peek( 0 ) on corrupt file */ + d->buffer[d->buffer_size-1] = 0; return true; } diff --git a/doc/lunzip.1 b/doc/lunzip.1 index c83f901..ffcd8a3 100644 --- a/doc/lunzip.1 +++ b/doc/lunzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH LUNZIP "1" "April 2017" "lunzip 1.9" "User Commands" +.TH LUNZIP "1" "February 2018" "lunzip 1.10" "User Commands" .SH NAME lunzip \- decompressor for the lzip format .SH SYNOPSIS @@ -61,6 +61,9 @@ set output buffer size in bytes .TP \fB\-v\fR, \fB\-\-verbose\fR be verbose (a 2nd \fB\-v\fR gives more) +.TP +\fB\-\-loose\-trailing\fR +allow trailing data seeming corrupt header .PP If no file names are given, or if a file is '\-', lunzip decompresses from standard input to standard output. @@ -78,7 +81,7 @@ Report bugs to lzip\-bug@nongnu.org .br Lunzip home page: http://www.nongnu.org/lzip/lunzip.html .SH COPYRIGHT -Copyright \(co 2017 Antonio Diaz Diaz. +Copyright \(co 2018 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later .br This is free software: you are free to change and redistribute it. diff --git a/file_index.c b/file_index.c index e737608..1872d67 100644 --- a/file_index.c +++ b/file_index.c @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2017 Antonio Diaz Diaz. + Copyright (C) 2010-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -108,7 +108,9 @@ static void Fi_set_num_error( struct File_index * const fi, /* If successful, push last member and set pos to member header. */ static bool Fi_skip_trailing_data( struct File_index * const fi, - const int fd, long long * const pos ) + const int fd, long long * const pos, + const bool ignore_trailing, + const bool loose_trailing ) { enum { block_size = 16384, buffer_size = block_size + Ft_size - 1 + Fh_size }; @@ -152,6 +154,11 @@ static bool Fi_skip_trailing_data( struct File_index * const fi, add_error( fi, "Last member in input file is truncated or corrupt." ); fi->retval = 2; return false; } + if( !loose_trailing && bsize - i >= Fh_size && + Fh_verify_corrupt( buffer + i ) ) + { add_error( fi, corrupt_mm_msg ); fi->retval = 2; return false; } + if( !ignore_trailing ) + { add_error( fi, trailing_msg ); fi->retval = 2; return false; } *pos = ipos + i - member_size; return push_back_member( fi, 0, Ft_get_data_size( *trailer ), *pos, member_size, dictionary_size ); @@ -170,7 +177,7 @@ static bool Fi_skip_trailing_data( struct File_index * const fi, bool Fi_init( struct File_index * const fi, const int infd, - const bool ignore_trailing ) + const bool ignore_trailing, const bool loose_trailing ) { File_header header; long long pos; @@ -211,12 +218,10 @@ bool Fi_init( struct File_index * const fi, const int infd, member_size = Ft_get_member_size( trailer ); if( member_size < min_member_size || member_size > (unsigned long long)pos ) { - if( fi->members > 0 ) - Fi_set_num_error( fi, "Member size in trailer is corrupt at pos ", - pos - 8 ); - else if( Fi_skip_trailing_data( fi, infd, &pos ) ) - { if( ignore_trailing ) continue; - add_error( fi, trailing_msg ); fi->retval = 2; return false; } + if( fi->members <= 0 ) + { if( Fi_skip_trailing_data( fi, infd, &pos, ignore_trailing, + loose_trailing ) ) continue; else return false; } + Fi_set_num_error( fi, "Member size in trailer is corrupt at pos ", pos - 8 ); break; } if( seek_read( infd, header, Fh_size, pos - member_size ) != Fh_size ) @@ -225,11 +230,10 @@ bool Fi_init( struct File_index * const fi, const int infd, if( !Fh_verify_magic( header ) || !Fh_verify_version( header ) || !isvalid_ds( dictionary_size ) ) { - if( fi->members > 0 ) - Fi_set_num_error( fi, "Bad header at pos ", pos - member_size ); - else if( Fi_skip_trailing_data( fi, infd, &pos ) ) - { if( ignore_trailing ) continue; - add_error( fi, trailing_msg ); fi->retval = 2; return false; } + if( fi->members <= 0 ) + { if( Fi_skip_trailing_data( fi, infd, &pos, ignore_trailing, + loose_trailing ) ) continue; else return false; } + Fi_set_num_error( fi, "Bad header at pos ", pos - member_size ); break; } pos -= member_size; diff --git a/file_index.h b/file_index.h index 38f0246..49b6519 100644 --- a/file_index.h +++ b/file_index.h @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2017 Antonio Diaz Diaz. + Copyright (C) 2010-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -57,7 +57,7 @@ struct File_index }; bool Fi_init( struct File_index * const fi, const int infd, - const bool ignore_trailing ); + const bool ignore_trailing, const bool loose_trailing ); void Fi_free( struct File_index * const fi ); diff --git a/list.c b/list.c index 6533bce..b0dfef7 100644 --- a/list.c +++ b/list.c @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2017 Antonio Diaz Diaz. + Copyright (C) 2010-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -34,7 +34,7 @@ static void list_line( const unsigned long long uncomp_size, { if( uncomp_size > 0 ) printf( "%15llu %15llu %6.2f%% %s\n", uncomp_size, comp_size, - 100.0 * ( 1.0 - ( (double)comp_size / uncomp_size ) ), + 100.0 - ( ( 100.0 * comp_size ) / uncomp_size ), input_filename ); else printf( "%15llu %15llu -INF%% %s\n", uncomp_size, comp_size, @@ -43,7 +43,7 @@ static void list_line( const unsigned long long uncomp_size, int list_files( const char * const filenames[], const int num_filenames, - const bool ignore_trailing ) + const bool ignore_trailing, const bool loose_trailing ) { unsigned long long total_comp = 0, total_uncomp = 0; int files = 0, retval = 0; @@ -63,7 +63,7 @@ int list_files( const char * const filenames[], const int num_filenames, open_instream( input_filename, &in_stats, true, true ); if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } - Fi_init( &file_index, infd, ignore_trailing ); + Fi_init( &file_index, infd, ignore_trailing, loose_trailing ); close( infd ); if( file_index.retval != 0 ) { diff --git a/lzip.h b/lzip.h index 1e6cdd2..01bf942 100644 --- a/lzip.h +++ b/lzip.h @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2017 Antonio Diaz Diaz. + Copyright (C) 2010-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,6 +22,8 @@ #define min(x,y) ((x) <= (y) ? (x) : (y)) #endif +void * resize_buffer( void * buf, const unsigned min_size ); + typedef int State; enum { states = 12 }; @@ -114,9 +116,13 @@ static inline void Lm_init( struct Len_model * const lm ) } +/* defined in main.c */ +extern int verbosity; + struct Pretty_print { const char * name; + char * padded_name; const char * stdin_name; unsigned longest_name; bool first_post; @@ -124,11 +130,12 @@ struct Pretty_print static inline void Pp_init( struct Pretty_print * const pp, const char * const filenames[], - const int num_filenames, const int verbosity ) + const int num_filenames ) { unsigned stdin_name_len; int i; pp->name = 0; + pp->padded_name = 0; pp->stdin_name = "(stdin)"; pp->longest_name = 0; pp->first_post = false; @@ -147,9 +154,19 @@ static inline void Pp_init( struct Pretty_print * const pp, static inline void Pp_set_name( struct Pretty_print * const pp, const char * const filename ) { + unsigned name_len, padded_name_len, i = 0; + if( filename && filename[0] && strcmp( filename, "-" ) != 0 ) pp->name = filename; else pp->name = pp->stdin_name; + name_len = strlen( pp->name ); + padded_name_len = max( name_len, pp->longest_name ) + 4; + pp->padded_name = resize_buffer( pp->padded_name, padded_name_len + 1 ); + while( i < 2 ) pp->padded_name[i++] = ' '; + while( i < name_len + 2 ) { pp->padded_name[i] = pp->name[i-2]; ++i; } + pp->padded_name[i++] = ':'; + while( i < padded_name_len ) pp->padded_name[i++] = ' '; + pp->padded_name[i] = 0; pp->first_post = true; } @@ -202,12 +219,21 @@ enum { Fh_size = 6 }; static inline bool Fh_verify_magic( const File_header data ) { return ( memcmp( data, magic_string, 4 ) == 0 ); } -/* detect truncated header */ -static inline bool Fh_verify_prefix( const File_header data, const int size ) +/* detect (truncated) header */ +static inline bool Fh_verify_prefix( const File_header data, const int sz ) { - int i; for( i = 0; i < size && i < 4; ++i ) + int i; for( i = 0; i < sz && i < 4; ++i ) if( data[i] != magic_string[i] ) return false; - return ( size > 0 ); + return ( sz > 0 ); + } + +/* detect corrupt header */ +static inline bool Fh_verify_corrupt( const File_header data ) + { + int matches = 0; + int i; for( i = 0; i < 4; ++i ) + if( data[i] == magic_string[i] ) ++matches; + return ( matches > 1 && matches < 4 ); } static inline uint8_t Fh_version( const File_header data ) @@ -256,6 +282,7 @@ static inline unsigned long long Ft_get_member_size( const File_trailer data ) static const char * const bad_magic_msg = "Bad magic number (file not in lzip format)."; static const char * const bad_dict_msg = "Invalid dictionary size in member header."; +static const char * const corrupt_mm_msg = "Corrupt header in multimember file."; static const char * const trailing_msg = "Trailing data not allowed."; /* defined in decoder.c */ @@ -263,17 +290,21 @@ int readblock( const int fd, uint8_t * const buf, const int size ); /* defined in list.c */ int list_files( const char * const filenames[], const int num_filenames, - const bool ignore_trailing ); + const bool ignore_trailing, const bool loose_trailing ); /* defined in main.c */ -extern int verbosity; struct stat; const char * bad_version( const unsigned version ); const char * format_ds( const unsigned dictionary_size ); +void show_header( const unsigned dictionary_size ); int open_instream( const char * const name, struct stat * const in_statsp, const bool no_ofile, const bool reg_only ); -void * resize_buffer( void * buf, const unsigned min_size ); void cleanup_and_fail( const int retval ); void show_error( const char * const msg, const int errcode, const bool help ); void show_file_error( const char * const filename, const char * const msg, const int errcode ); +struct Range_decoder; +void show_dprogress( const unsigned long long cfile_size, + const unsigned long long partial_size, + const struct Range_decoder * const d, + struct Pretty_print * const p ); diff --git a/main.c b/main.c index 47a72d6..9a14137 100644 --- a/main.c +++ b/main.c @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2017 Antonio Diaz Diaz. + Copyright (C) 2010-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -67,7 +67,7 @@ int verbosity = 0; const char * const Program_name = "Lunzip"; const char * const program_name = "lunzip"; -const char * const program_year = "2017"; +const char * const program_year = "2018"; const char * invocation_name = 0; const struct { const char * from; const char * to; } known_extensions[] = { @@ -113,6 +113,7 @@ static void show_help( void ) " -t, --test test compressed file integrity\n" " -u, --buffer-size= set output buffer size in bytes\n" " -v, --verbose be verbose (a 2nd -v gives more)\n" + " --loose-trailing allow trailing data seeming corrupt header\n" "If no file names are given, or if a file is '-', lunzip decompresses\n" "from standard input to standard output.\n" "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" @@ -138,6 +139,35 @@ static void show_version( void ) } +/* assure at least a minimum size for buffer 'buf' */ +void * resize_buffer( void * buf, const unsigned min_size ) + { + if( buf ) buf = realloc( buf, min_size ); + else buf = malloc( min_size ); + if( !buf ) + { + show_error( "Not enough memory.", 0, false ); + cleanup_and_fail( 1 ); + } + return buf; + } + + +void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ) + { + if( verbosity >= 0 ) + { + if( pp->first_post ) + { + pp->first_post = false; + fputs( pp->padded_name, stderr ); + if( !msg ) fflush( stderr ); + } + if( msg ) fprintf( stderr, "%s\n", msg ); + } + } + + const char * bad_version( const unsigned version ) { static char buf[80]; @@ -155,10 +185,10 @@ const char * format_ds( const unsigned dictionary_size ) { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; const char * p = ""; const char * np = " "; - unsigned num = dictionary_size, i; + unsigned num = dictionary_size; bool exact = ( num % factor == 0 ); - for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + int i; for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; np = ""; } snprintf( buf, bufsize, "%s%4u %sB", np, num, p ); @@ -166,10 +196,9 @@ const char * format_ds( const unsigned dictionary_size ) } -static void show_header( const unsigned dictionary_size ) +void show_header( const unsigned dictionary_size ) { - if( verbosity >= 3 ) - fprintf( stderr, "dictionary %s. ", format_ds( dictionary_size ) ); + fprintf( stderr, "dictionary %s, ", format_ds( dictionary_size ) ); } @@ -263,6 +292,31 @@ static int extension_index( const char * const name ) } +static void set_d_outname( const char * const name, const int eindex ) + { + const unsigned name_len = strlen( name ); + if( eindex >= 0 ) + { + const char * const from = known_extensions[eindex].from; + const unsigned from_len = strlen( from ); + if( name_len > from_len ) + { + output_filename = resize_buffer( output_filename, name_len + + strlen( known_extensions[eindex].to ) + 1 ); + strcpy( output_filename, name ); + strcpy( output_filename + name_len - from_len, known_extensions[eindex].to ); + return; + } + } + output_filename = resize_buffer( output_filename, name_len + 4 + 1 ); + strcpy( output_filename, name ); + strcat( output_filename, ".out" ); + if( verbosity >= 1 ) + fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", + program_name, name, output_filename ); + } + + int open_instream( const char * const name, struct stat * const in_statsp, const bool no_ofile, const bool reg_only ) { @@ -291,45 +345,6 @@ int open_instream( const char * const name, struct stat * const in_statsp, } -/* assure at least a minimum size for buffer 'buf' */ -void * resize_buffer( void * buf, const unsigned min_size ) - { - if( buf ) buf = realloc( buf, min_size ); - else buf = malloc( min_size ); - if( !buf ) - { - show_error( "Not enough memory.", 0, false ); - cleanup_and_fail( 1 ); - } - return buf; - } - - -static void set_d_outname( const char * const name, const int eindex ) - { - const unsigned name_len = strlen( name ); - if( eindex >= 0 ) - { - const char * const from = known_extensions[eindex].from; - const unsigned from_len = strlen( from ); - if( name_len > from_len ) - { - output_filename = resize_buffer( output_filename, name_len + - strlen( known_extensions[eindex].to ) + 1 ); - strcpy( output_filename, name ); - strcpy( output_filename + name_len - from_len, known_extensions[eindex].to ); - return; - } - } - output_filename = resize_buffer( output_filename, name_len + 4 + 1 ); - strcpy( output_filename, name ); - strcat( output_filename, ".out" ); - if( verbosity >= 1 ) - fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", - program_name, name, output_filename ); - } - - static bool open_outstream( const bool force, const bool from_stdin ) { const mode_t usr_rw = S_IRUSR | S_IWUSR; @@ -413,9 +428,9 @@ static unsigned char xdigit( const unsigned value ) static bool show_trailing_data( const uint8_t * const data, const int size, struct Pretty_print * const pp, const bool all, - const bool ignore_trailing ) + const int ignore_trailing ) /* -1 = show */ { - if( verbosity >= 4 || !ignore_trailing ) + if( verbosity >= 4 || ignore_trailing <= 0 ) { int i; char buf[80]; @@ -433,15 +448,16 @@ static bool show_trailing_data( const uint8_t * const data, const int size, if( len < sizeof buf ) buf[len++] = '\''; if( len < sizeof buf ) buf[len] = 0; else buf[sizeof buf - 1] = 0; Pp_show_msg( pp, buf ); - if( !ignore_trailing ) show_file_error( pp->name, trailing_msg, 0 ); + if( ignore_trailing == 0 ) show_file_error( pp->name, trailing_msg, 0 ); } - return ignore_trailing; + return ( ignore_trailing > 0 ); } -static int decompress( const int infd, struct Pretty_print * const pp, - const unsigned buffer_size, - const bool ignore_trailing, const bool testing ) +static int decompress( const unsigned long long cfile_size, const int infd, + struct Pretty_print * const pp, const unsigned buffer_size, + const bool ignore_trailing, const bool loose_trailing, + const bool testing ) { unsigned long long partial_file_pos = 0; struct Range_decoder rdec; @@ -463,8 +479,12 @@ static int decompress( const int infd, struct Pretty_print * const pp, size = Rd_read_data( &rdec, header, Fh_size ); if( Rd_finished( &rdec ) ) /* End Of File */ { - if( first_member || Fh_verify_prefix( header, size ) ) - { Pp_show_msg( pp, "File ends unexpectedly at member header." ); + if( first_member ) + { show_file_error( pp->name, "File ends unexpectedly at member header.", 0 ); + retval = 2; } + else if( Fh_verify_prefix( header, size ) ) + { Pp_show_msg( pp, "Truncated header in multimember file." ); + show_trailing_data( header, size, pp, true, -1 ); retval = 2; } else if( size > 0 && !show_trailing_data( header, size, pp, true, ignore_trailing ) ) @@ -475,27 +495,30 @@ static int decompress( const int infd, struct Pretty_print * const pp, { if( first_member ) { show_file_error( pp->name, bad_magic_msg, 0 ); retval = 2; } + else if( !loose_trailing && Fh_verify_corrupt( header ) ) + { Pp_show_msg( pp, corrupt_mm_msg ); + show_trailing_data( header, size, pp, false, -1 ); + retval = 2; } else if( !show_trailing_data( header, size, pp, false, ignore_trailing ) ) retval = 2; break; } if( !Fh_verify_version( header ) ) - { - Pp_show_msg( pp, bad_version( Fh_version( header ) ) ); - retval = 2; break; - } + { Pp_show_msg( pp, bad_version( Fh_version( header ) ) ); + retval = 2; break; } dictionary_size = Fh_get_dictionary_size( header ); if( !isvalid_ds( dictionary_size ) ) { Pp_show_msg( pp, bad_dict_msg ); retval = 2; break; } if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) - { Pp_show_msg( pp, 0 ); show_header( dictionary_size ); } + Pp_show_msg( pp, 0 ); if( !LZd_init( &decoder, &rdec, buffer_size, dictionary_size, outfd ) ) { Pp_show_msg( pp, "Not enough memory. Try a smaller output buffer size." ); retval = 1; break; } + show_dprogress( cfile_size, partial_file_pos, &rdec, pp ); /* init */ result = LZd_decode_member( &decoder, pp ); partial_file_pos += Rd_member_position( &rdec ); LZd_free( &decoder ); @@ -568,42 +591,76 @@ void internal_error( const char * const msg ) } +void show_dprogress( const unsigned long long cfile_size, + const unsigned long long partial_size, + const struct Range_decoder * const d, + struct Pretty_print * const p ) + { + static unsigned long long csize = 0; /* file_size / 100 */ + static unsigned long long psize = 0; + static const struct Range_decoder * rdec = 0; + static struct Pretty_print * pp = 0; + static int counter = 0; + static bool enabled = true; + + if( !enabled ) return; + if( p ) /* initialize static vars */ + { + if( verbosity < 2 || !isatty( STDERR_FILENO ) ) { enabled = false; return; } + csize = cfile_size; psize = partial_size; rdec = d; pp = p; counter = 0; + } + if( rdec && pp && --counter <= 0 ) + { + const unsigned long long pos = psize + Rd_member_position( rdec ); + counter = 7; /* update display every 114688 bytes */ + if( csize > 0 ) + fprintf( stderr, "%4llu%% %.1f MB\r", pos / csize, pos / 1000000.0 ); + else + fprintf( stderr, " %.1f MB\r", pos / 1000000.0 ); + Pp_reset( pp ); Pp_show_msg( pp, 0 ); /* restore cursor position */ + } + } + + int main( const int argc, const char * const argv[] ) { const char * default_output_filename = ""; const char ** filenames = 0; int num_filenames = 0; unsigned buffer_size = max_dictionary_size; - int infd = -1; enum Mode program_mode = m_compress; int argind = 0; + int failed_tests = 0; int retval = 0; int i; bool filenames_given = false; bool force = false; bool ignore_trailing = true; bool keep_input_files = false; + bool loose_trailing = false; bool stdin_used = false; bool to_stdout = false; struct Pretty_print pp; + enum { opt_lt = 256 }; const struct ap_Option options[] = { - { 'a', "trailing-error", ap_no }, - { 'c', "stdout", ap_no }, - { 'd', "decompress", ap_no }, - { 'f', "force", ap_no }, - { 'h', "help", ap_no }, - { 'k', "keep", ap_no }, - { 'l', "list", ap_no }, - { 'n', "threads", ap_yes }, - { 'o', "output", ap_yes }, - { 'q', "quiet", ap_no }, - { 't', "test", ap_no }, - { 'u', "buffer-size", ap_yes }, - { 'v', "verbose", ap_no }, - { 'V', "version", ap_no }, - { 0 , 0, ap_no } }; + { 'a', "trailing-error", ap_no }, + { 'c', "stdout", ap_no }, + { 'd', "decompress", ap_no }, + { 'f', "force", ap_no }, + { 'h', "help", ap_no }, + { 'k', "keep", ap_no }, + { 'l', "list", ap_no }, + { 'n', "threads", ap_yes }, + { 'o', "output", ap_yes }, + { 'q', "quiet", ap_no }, + { 't', "test", ap_no }, + { 'u', "buffer-size", ap_yes }, + { 'v', "verbose", ap_no }, + { 'V', "version", ap_no }, + { opt_lt, "loose-trailing", ap_no }, + { 0 , 0, ap_no } }; struct Arg_parser parser; @@ -636,6 +693,7 @@ int main( const int argc, const char * const argv[] ) case 'u': buffer_size = get_dict_size( arg ); break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; + case opt_lt: loose_trailing = true; break; default : internal_error( "uncaught option." ); } } /* end process options */ @@ -656,7 +714,7 @@ int main( const int argc, const char * const argv[] ) } if( program_mode == m_list ) - return list_files( filenames, num_filenames, ignore_trailing ); + return list_files( filenames, num_filenames, ignore_trailing, loose_trailing ); if( program_mode == m_test ) outfd = -1; @@ -681,12 +739,14 @@ int main( const int argc, const char * const argv[] ) ( filenames_given || default_output_filename[0] ) ) set_signals(); - Pp_init( &pp, filenames, num_filenames, verbosity ); + Pp_init( &pp, filenames, num_filenames ); output_filename = resize_buffer( output_filename, 1 ); for( i = 0; i < num_filenames; ++i ) { + unsigned long long cfile_size; const char * input_filename = ""; + int infd; int tmp; struct stat in_stats; const struct stat * in_statsp; @@ -708,7 +768,7 @@ int main( const int argc, const char * const argv[] ) if( !open_outstream( force, true ) ) { if( retval < 1 ) retval = 1; - close( infd ); infd = -1; + close( infd ); continue; } } @@ -729,7 +789,7 @@ int main( const int argc, const char * const argv[] ) if( !open_outstream( force, false ) ) { if( retval < 1 ) retval = 1; - close( infd ); infd = -1; + close( infd ); continue; } } @@ -742,7 +802,7 @@ int main( const int argc, const char * const argv[] ) show_file_error( pp.name, "I won't read compressed data from a terminal.", 0 ); if( retval < 1 ) retval = 1; - if( program_mode == m_test ) { close( infd ); infd = -1; continue; } + if( program_mode == m_test ) { close( infd ); continue; } cleanup_and_fail( retval ); } @@ -761,24 +821,33 @@ int main( const int argc, const char * const argv[] ) } in_statsp = input_filename[0] ? &in_stats : 0; - tmp = decompress( infd, &pp, buffer_size, ignore_trailing, program_mode == m_test ); + cfile_size = ( in_statsp && S_ISREG( in_statsp->st_mode ) ) ? + ( in_statsp->st_size + 99 ) / 100 : 0; + tmp = decompress( cfile_size, infd, &pp, buffer_size, ignore_trailing, + loose_trailing, program_mode == m_test ); if( tmp > retval ) retval = tmp; - if( tmp && program_mode != m_test ) cleanup_and_fail( retval ); + if( tmp ) + { if( program_mode != m_test ) cleanup_and_fail( retval ); + else ++failed_tests; } if( delete_output_on_interrupt ) close_and_set_permissions( in_statsp ); if( input_filename[0] ) { - close( infd ); infd = -1; + close( infd ); if( !keep_input_files && !to_stdout && program_mode != m_test ) remove( input_filename ); } } if( outfd >= 0 && close( outfd ) != 0 ) { - show_error( "Can't close stdout", errno, false ); + show_error( "Error closing stdout", errno, false ); if( retval < 1 ) retval = 1; } + if( failed_tests > 0 && verbosity >= 1 && num_filenames > 1 ) + fprintf( stderr, "%s: warning: %d %s failed the test.\n", + program_name, failed_tests, + ( failed_tests == 1 ) ? "file" : "files" ); free( output_filename ); free( filenames ); ap_free( &parser ); diff --git a/testsuite/check.sh b/testsuite/check.sh index 334cb71..728c7c6 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lunzip - Decompressor for the lzip format -# Copyright (C) 2010-2017 Antonio Diaz Diaz. +# Copyright (C) 2010-2018 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -51,6 +51,8 @@ rm -f uin.lz [ $? = 2 ] || test_failed $LINENO "${LZIP}" -cdq < in [ $? = 2 ] || test_failed $LINENO +"${LZIP}" -dq -o in < "${in_lz}" +[ $? = 1 ] || test_failed $LINENO # these are for code coverage "${LZIP}" -lt "${in_lz}" 2> /dev/null [ $? = 1 ] || test_failed $LINENO @@ -169,6 +171,41 @@ done printf "\ntesting bad input..." +headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP' +body='\001\014\000\203\377\373\377\377\300\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000$\000\000\000\000\000\000\000' +cat "${in_lz}" > in0.lz +printf "LZIP${body}" >> in0.lz +if "${LZIP}" -tq in0.lz ; then + for header in ${headers} ; do + printf "${header}${body}" > in0.lz # first member + "${LZIP}" -lq in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -lq --loose-trailing in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + cat "${in_lz}" > in0.lz + printf "${header}${body}" >> in0.lz # trailing data + "${LZIP}" -lq in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -lq --loose-trailing in0.lz + [ $? = 0 ] || test_failed $LINENO ${header} + "${LZIP}" -t --loose-trailing in0.lz + [ $? = 0 ] || test_failed $LINENO ${header} + "${LZIP}" -lq --loose-trailing --trailing-error in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing --trailing-error in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + done +else + printf "\nwarning: skipping header test: 'printf' does not work on your system." +fi +rm -f in0.lz + cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && [ -e trunc.lz ] && cmp in2.lz trunc.lz > /dev/null 2>&1 ; then @@ -176,7 +213,7 @@ if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && dd if=in3.lz of=trunc.lz bs=$i count=1 2> /dev/null "${LZIP}" -lq trunc.lz [ $? = 2 ] || test_failed $LINENO $i - "${LZIP}" -t trunc.lz 2> /dev/null + "${LZIP}" -tq trunc.lz [ $? = 2 ] || test_failed $LINENO $i "${LZIP}" -tq < trunc.lz [ $? = 2 ] || test_failed $LINENO $i @@ -188,6 +225,7 @@ if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && else printf "\nwarning: skipping truncation test: 'dd' does not work on your system." fi +rm -f in3.lz trunc.lz cat "${in_lz}" > ingin.lz || framework_failure printf "g" >> ingin.lz || framework_failure @@ -200,6 +238,7 @@ cmp in copy || test_failed $LINENO "${LZIP}" -t < ingin.lz || test_failed $LINENO "${LZIP}" -d < ingin.lz > copy || test_failed $LINENO cmp in copy || test_failed $LINENO +rm -f ingin.lz echo if [ ${fail} = 0 ] ; then -- cgit v1.2.3