From 5a3663d51fd30fee4a7fca98ad0a9603b298e6dc Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Tue, 13 Feb 2018 08:04:57 +0100 Subject: Adding upstream version 1.9. Signed-off-by: Daniel Baumann --- ChangeLog | 17 ++- INSTALL | 2 +- LzmaEnc.c | 10 +- NEWS | 28 +++- README | 4 +- carg_parser.c | 2 +- carg_parser.h | 2 +- configure | 6 +- doc/pdlzip.1 | 7 +- lzip.h | 39 +++++- main.c | 393 ++++++++++++++++++++++++++++++----------------------- testsuite/check.sh | 35 ++++- 12 files changed, 340 insertions(+), 205 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9d6c5c3..d90a18f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2018-02-04 Antonio Diaz Diaz + + * Version 1.9 released. + * main.c: Added new option '--loose-trailing'. + * main.c (decompress): Improved corrupt header detection to HD=3. + * Replaced 'bits/byte' with inverse compression ratio in output. + * main.c: Show final diagnostic when testing multiple files. + * main.c: Do not add a second .lz extension to the arg of -o. + * main.c (lzip_decode): Show stored sizes also in hex. + Show dictionary size at verbosity level 4 (-vvvv). + 2017-04-12 Antonio Diaz Diaz * Version 1.8 released. @@ -11,8 +22,8 @@ * main.c (main): Delete '--output' file if infd is a terminal. * main.c (main): Don't use stdin more than once. * configure: Avoid warning on some shells when testing for gcc. - * testsuite/check.sh: A POSIX shell is required to run the tests. - * testsuite/check.sh: Don't check error messages. + * check.sh: A POSIX shell is required to run the tests. + * check.sh: Don't check error messages. 2015-05-26 Antonio Diaz Diaz @@ -73,7 +84,7 @@ * Using LZMA SDK 9.10 (public domain) from Igor Pavlov. -Copyright (C) 2010-2017 Antonio Diaz Diaz. +Copyright (C) 2010-2018 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and diff --git a/INSTALL b/INSTALL index 419e1af..327c502 100644 --- a/INSTALL +++ b/INSTALL @@ -61,7 +61,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2010-2017 Antonio Diaz Diaz. +Copyright (C) 2010-2018 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/LzmaEnc.c b/LzmaEnc.c index e931a3d..1dd8ea1 100644 --- a/LzmaEnc.c +++ b/LzmaEnc.c @@ -1371,14 +1371,14 @@ static void LZe_full_flush(CLzmaEnc *p, uint32_t posState) { unsigned long long in_size = p->nowPos64; unsigned long long out_size = p->rc.processed + Fh_size + Ft_size; - if( in_size <= 0 || out_size <= 0 ) + if( in_size == 0 || out_size == 0 ) fputs( " no data compressed.\n", stderr ); else - fprintf( stderr, "%6.3f:1, %6.3f bits/byte, " - "%5.2f%% saved, %llu in, %llu out.\n", + fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved, " + "%llu in, %llu out.\n", (double)in_size / out_size, - ( 8.0 * out_size ) / in_size, - 100.0 * ( 1.0 - ( (double)out_size / in_size ) ), + ( 100.0 * out_size ) / in_size, + 100.0 - ( ( 100.0 * out_size ) / in_size ), in_size, out_size ); } } diff --git a/NEWS b/NEWS index e1cbb03..d99cfae 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,26 @@ -Changes in version 1.8: +Changes in version 1.9: -In test mode, pdlzip now continues checking the rest of the files if any -input file is a terminal. +The option '--loose-trailing', has been added. -Trailing data shorter than a lzip header are now also reported. +The test used by pdlzip to discriminate trailing data from a corrupt +header in multimember or concatenated files has been improved to a +Hamming distance (HD) of 3, and the 3 bit flips must happen in different +magic bytes for the test to fail. As a consequence some kinds of files +no longer can be appended to a lzip file as trailing data unless the +'--loose-trailing' option is used when decompressing. +Lziprecover can be used to remove conflicting trailing data from a file. + +The 'bits/byte' ratio has been replaced with the inverse compression +ratio in the output. + +A final diagnostic is now shown at verbosity level 1 (-v) or higher if +any file fails the test when testing multiple files. + +A second '.lz' extension is no longer added to the argument of '-o' if +it already ends in '.lz' or '.tlz'. + +In case of (de)compressed size mismatch, the stored size is now also +shown in hexadecimal to ease visual comparison. + +The dictionary size is now shown at verbosity level 4 (-vvvv) when +decompressing or testing. diff --git a/README b/README index e392d71..517adcf 100644 --- a/README +++ b/README @@ -14,7 +14,7 @@ archiving, taking into account both data integrity and decoder availability: * The lzip format provides very safe integrity checking and some data - recovery means. The lziprecover program can repair bit-flip errors + recovery means. The lziprecover program can repair bit flip errors (one of the most common forms of data corruption) in lzip files, and provides data recovery capabilities, including error-checked merging of damaged copies of a file. @@ -52,7 +52,7 @@ users of the most non-free platforms can share lzip files with everybody else. -Copyright (C) 2010-2017 Antonio Diaz Diaz. +Copyright (C) 2010-2018 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/carg_parser.c b/carg_parser.c index 6850643..10ad4dc 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2017 Antonio Diaz Diaz. + Copyright (C) 2006-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/carg_parser.h b/carg_parser.h index c4ce31d..e1c70dd 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2017 Antonio Diaz Diaz. + Copyright (C) 2006-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/configure b/configure index 4380dac..75f55bd 100755 --- a/configure +++ b/configure @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Pdlzip - LZMA lossless data compressor -# Copyright (C) 2010-2017 Antonio Diaz Diaz. +# Copyright (C) 2010-2018 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=pdlzip -pkgversion=1.8 +pkgversion=1.9 progname=pdlzip srctrigger=doc/${progname}.1 @@ -168,7 +168,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Pdlzip - LZMA lossless data compressor -# Copyright (C) 2010-2017 Antonio Diaz Diaz. +# Copyright (C) 2010-2018 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission diff --git a/doc/pdlzip.1 b/doc/pdlzip.1 index 155e5c5..e971323 100644 --- a/doc/pdlzip.1 +++ b/doc/pdlzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH PDLZIP "1" "April 2017" "pdlzip 1.8" "User Commands" +.TH PDLZIP "1" "February 2018" "pdlzip 1.9" "User Commands" .SH NAME pdlzip \- reduces the size of files .SH SYNOPSIS @@ -65,6 +65,9 @@ alias for \fB\-1\fR .TP \fB\-\-best\fR alias for \fB\-9\fR +.TP +\fB\-\-loose\-trailing\fR +allow trailing data seeming corrupt header .PP If no file names are given, or if a file is '\-', pdlzip compresses or decompresses from standard input to standard output. @@ -88,7 +91,7 @@ Report bugs to lzip\-bug@nongnu.org .br Pdlzip home page: http://www.nongnu.org/lzip/pdlzip.html .SH COPYRIGHT -Copyright \(co 2017 Antonio Diaz Diaz. +Copyright \(co 2018 Antonio Diaz Diaz. Public Domain 2009 Igor Pavlov. License 2\-clause BSD. .br diff --git a/lzip.h b/lzip.h index 631599d..0f65f30 100644 --- a/lzip.h +++ b/lzip.h @@ -1,5 +1,5 @@ /* Pdlzip - LZMA lossless data compressor - Copyright (C) 2010-2017 Antonio Diaz Diaz. + Copyright (C) 2010-2018 Antonio Diaz Diaz. This program is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -24,6 +24,8 @@ #define min(x,y) ((x) <= (y) ? (x) : (y)) #endif +void * resize_buffer( void * buf, const unsigned min_size ); + typedef int State; enum { @@ -50,9 +52,13 @@ enum { min_match_len_limit = 5 }; +/* defined in main.c */ +extern int verbosity; + struct Pretty_print { const char * name; + char * padded_name; const char * stdin_name; unsigned longest_name; bool first_post; @@ -60,11 +66,12 @@ struct Pretty_print static inline void Pp_init( struct Pretty_print * const pp, const char * const filenames[], - const int num_filenames, const int verbosity ) + const int num_filenames ) { unsigned stdin_name_len; int i; pp->name = 0; + pp->padded_name = 0; pp->stdin_name = "(stdin)"; pp->longest_name = 0; pp->first_post = false; @@ -83,9 +90,19 @@ static inline void Pp_init( struct Pretty_print * const pp, static inline void Pp_set_name( struct Pretty_print * const pp, const char * const filename ) { + unsigned name_len, padded_name_len, i = 0; + if( filename && filename[0] && strcmp( filename, "-" ) != 0 ) pp->name = filename; else pp->name = pp->stdin_name; + name_len = strlen( pp->name ); + padded_name_len = max( name_len, pp->longest_name ) + 4; + pp->padded_name = resize_buffer( pp->padded_name, padded_name_len + 1 ); + while( i < 2 ) pp->padded_name[i++] = ' '; + while( i < name_len + 2 ) { pp->padded_name[i] = pp->name[i-2]; ++i; } + pp->padded_name[i++] = ':'; + while( i < padded_name_len ) pp->padded_name[i++] = ' '; + pp->padded_name[i] = 0; pp->first_post = true; } @@ -149,12 +166,21 @@ static inline void Fh_set_magic( File_header data ) static inline bool Fh_verify_magic( const File_header data ) { return ( memcmp( data, magic_string, 4 ) == 0 ); } -/* detect truncated header */ -static inline bool Fh_verify_prefix( const File_header data, const int size ) +/* detect (truncated) header */ +static inline bool Fh_verify_prefix( const File_header data, const int sz ) { - int i; for( i = 0; i < size && i < 4; ++i ) + int i; for( i = 0; i < sz && i < 4; ++i ) if( data[i] != magic_string[i] ) return false; - return ( size > 0 ); + return ( sz > 0 ); + } + +/* detect corrupt header */ +static inline bool Fh_verify_corrupt( const File_header data ) + { + int matches = 0; + int i; for( i = 0; i < 4; ++i ) + if( data[i] == magic_string[i] ) ++matches; + return ( matches > 1 && matches < 4 ); } static inline uint8_t Fh_version( const File_header data ) @@ -233,7 +259,6 @@ int readblock( const int fd, uint8_t * const buf, const int size ); int writeblock( const int fd, const uint8_t * const buf, const int size ); /* defined in main.c */ -extern int verbosity; void cleanup_and_fail( const int retval ); void show_error( const char * const msg, const int errcode, const bool help ); void show_file_error( const char * const filename, const char * const msg, diff --git a/main.c b/main.c index de8e12e..4803491 100644 --- a/main.c +++ b/main.c @@ -1,6 +1,6 @@ /* Pdlzip - LZMA lossless data compressor 2009-08-14 : Igor Pavlov : Public domain - Copyright (C) 2010-2017 Antonio Diaz Diaz. + Copyright (C) 2010-2018 Antonio Diaz Diaz. This program is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -70,7 +70,7 @@ int verbosity = 0; const char * const Program_name = "Pdlzip"; const char * const program_name = "pdlzip"; -const char * const program_year = "2017"; +const char * const program_year = "2018"; const char * invocation_name = 0; const struct { const char * from; const char * to; } known_extensions[] = { @@ -119,6 +119,7 @@ static void show_help( void ) " -1 .. -9 set compression level [default 6]\n" " --fast alias for -1\n" " --best alias for -9\n" + " --loose-trailing allow trailing data seeming corrupt header\n" "If no file names are given, or if a file is '-', pdlzip compresses or\n" "decompresses from standard input to standard output.\n" "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" @@ -150,23 +151,49 @@ static void show_version( void ) } -static void show_header( const unsigned dictionary_size ) +/* assure at least a minimum size for buffer 'buf' */ +void * resize_buffer( void * buf, const unsigned min_size ) { - if( verbosity >= 3 ) + if( buf ) buf = realloc( buf, min_size ); + else buf = malloc( min_size ); + if( !buf ) { - enum { factor = 1024 }; - const char * const prefix[8] = - { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; - const char * p = ""; - const char * np = " "; - unsigned num = dictionary_size, i; - bool exact = ( num % factor == 0 ); - - for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) - { num /= factor; if( num % factor != 0 ) exact = false; - p = prefix[i]; np = ""; } - fprintf( stderr, "dictionary %s%4u %sB. ", np, num, p ); + show_error( "Not enough memory.", 0, false ); + cleanup_and_fail( 1 ); } + return buf; + } + + +void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ) + { + if( verbosity >= 0 ) + { + if( pp->first_post ) + { + pp->first_post = false; + fputs( pp->padded_name, stderr ); + if( !msg ) fflush( stderr ); + } + if( msg ) fprintf( stderr, "%s\n", msg ); + } + } + + +static void show_header( const unsigned dictionary_size ) + { + enum { factor = 1024 }; + const char * const prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + const char * p = ""; + const char * np = " "; + unsigned num = dictionary_size; + bool exact = ( num % factor == 0 ); + + int i; for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; + p = prefix[i]; np = ""; } + fprintf( stderr, "dictionary %s%4u %sB, ", np, num, p ); } @@ -249,6 +276,41 @@ static int extension_index( const char * const name ) } +static void set_c_outname( const char * const name, const bool force_ext ) + { + output_filename = resize_buffer( output_filename, strlen( name ) + + strlen( known_extensions[0].from ) + 1 ); + strcpy( output_filename, name ); + if( force_ext || extension_index( output_filename ) < 0 ) + strcat( output_filename, known_extensions[0].from ); + } + + +static void set_d_outname( const char * const name, const int eindex ) + { + const unsigned name_len = strlen( name ); + if( eindex >= 0 ) + { + const char * const from = known_extensions[eindex].from; + const unsigned from_len = strlen( from ); + if( name_len > from_len ) + { + output_filename = resize_buffer( output_filename, name_len + + strlen( known_extensions[eindex].to ) + 1 ); + strcpy( output_filename, name ); + strcpy( output_filename + name_len - from_len, known_extensions[eindex].to ); + return; + } + } + output_filename = resize_buffer( output_filename, name_len + 4 + 1 ); + strcpy( output_filename, name ); + strcat( output_filename, ".out" ); + if( verbosity >= 1 ) + fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", + program_name, name, output_filename ); + } + + static int open_instream( const char * const name, struct stat * const in_statsp, const enum Mode program_mode, const int eindex, const bool recompress, const bool to_stdout ) @@ -289,54 +351,6 @@ static int open_instream( const char * const name, struct stat * const in_statsp } -/* assure at least a minimum size for buffer 'buf' */ -static void * resize_buffer( void * buf, const unsigned min_size ) - { - if( buf ) buf = realloc( buf, min_size ); - else buf = malloc( min_size ); - if( !buf ) - { - show_error( "Not enough memory.", 0, false ); - cleanup_and_fail( 1 ); - } - return buf; - } - - -static void set_c_outname( const char * const name ) - { - output_filename = resize_buffer( output_filename, strlen( name ) + - strlen( known_extensions[0].from ) + 1 ); - strcpy( output_filename, name ); - strcat( output_filename, known_extensions[0].from ); - } - - -static void set_d_outname( const char * const name, const int eindex ) - { - const unsigned name_len = strlen( name ); - if( eindex >= 0 ) - { - const char * const from = known_extensions[eindex].from; - const unsigned from_len = strlen( from ); - if( name_len > from_len ) - { - output_filename = resize_buffer( output_filename, name_len + - strlen( known_extensions[eindex].to ) + 1 ); - strcpy( output_filename, name ); - strcpy( output_filename + name_len - from_len, known_extensions[eindex].to ); - return; - } - } - output_filename = resize_buffer( output_filename, name_len + 4 + 1 ); - strcpy( output_filename, name ); - strcat( output_filename, ".out" ); - if( verbosity >= 1 ) - fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", - program_name, name, output_filename ); - } - - static bool open_outstream( const bool force, const bool from_stdin ) { const mode_t usr_rw = S_IRUSR | S_IWUSR; @@ -492,9 +506,10 @@ enum { lzma_header_size = LZMA_PROPS_SIZE + 8 }; static int lzma_decode( uint64_t unpackSize, CLzmaDec *decoder, const int infd, uint8_t inBuf[], int * const inPos, - int * const inSize, const bool testing ) + int * const inSize, const unsigned dictionary_size, + const bool testing ) { - unsigned long long total_in = lzma_header_size, total_out = 0; + unsigned long long member_size = lzma_header_size, data_size = 0; uint8_t outBuf[OUT_BUF_SIZE]; int outPos = 0; const bool thereIsSize = (unpackSize != (uint64_t)-1); @@ -519,14 +534,14 @@ static int lzma_decode( uint64_t unpackSize, CLzmaDec *decoder, const int infd, inBuf + *inPos, &inProcessed, finishMode, &status ) ) { show_error( "Data error.", 0, false ); return 2; } *inPos += inProcessed; - total_in += inProcessed; + member_size += inProcessed; outPos += outProcessed; unpackSize -= outProcessed; if( outfd >= 0 && writeblock( outfd, outBuf, outPos ) != outPos ) { show_error( "Can't write output file", errno, false ); return 1; } - total_out += outPos; + data_size += outPos; outPos = 0; if( ( inProcessed == 0 && outProcessed == 0 ) || @@ -535,16 +550,21 @@ static int lzma_decode( uint64_t unpackSize, CLzmaDec *decoder, const int infd, if( ( thereIsSize && unpackSize != 0 ) || ( !thereIsSize && status != LZMA_STATUS_FINISHED_WITH_MARK ) ) { show_error( "Data error.", 0, false ); return 2; } - if( verbosity >= 2 && total_out > 0 && total_in > 0 ) - fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", - (double)total_out / total_in, - ( 8.0 * total_in ) / total_out, - 100.0 * ( 1.0 - ( (double)total_in / total_out ) ) ); - if( verbosity >= 4 ) - fprintf( stderr, "uncompressed size %9llu, compressed size %8llu. ", - total_out, total_in ); if( verbosity >= 2 ) + { + if( verbosity >= 4 ) show_header( dictionary_size ); + if( data_size == 0 || member_size == 0 ) + fputs( "no data compressed. ", stderr ); + else + fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved. ", + (double)data_size / member_size, + ( 100.0 * member_size ) / data_size, + 100.0 - ( ( 100.0 * member_size ) / data_size ) ); + if( verbosity >= 3 ) + fprintf( stderr, "decompressed %9llu, compressed %8llu. ", + data_size, member_size ); fputs( "lzma-alone, ", stderr ); + } if( verbosity >= 1 ) fputs( testing ? "(apparently) ok\n" : "(apparently) done\n", stderr ); return 0; @@ -553,10 +573,12 @@ static int lzma_decode( uint64_t unpackSize, CLzmaDec *decoder, const int infd, } -static int lzip_decode( CLzmaDec *decoder, const int infd, uint8_t inBuf[], - int * const inPos, int * const inSize ) +static int lzip_decode( CLzmaDec *decoder, const int infd, + struct Pretty_print * const pp, uint8_t inBuf[], + int * const inPos, int * const inSize, + const unsigned dictionary_size ) { - unsigned long long total_in = Fh_size, total_out = 0; + unsigned long long member_size = Fh_size, data_size = 0; uint8_t outBuf[OUT_BUF_SIZE]; int outPos = 0; uint32_t crc = 0xFFFFFFFFU; @@ -571,76 +593,99 @@ static int lzip_decode( CLzmaDec *decoder, const int infd, uint8_t inBuf[], if( *inPos >= *inSize && !read_inbuf( infd, inBuf, inPos, inSize ) ) return 1; if( *inPos >= *inSize ) - { show_error( "Unexpected EOF.", 0, false ); return 2; } + { Pp_show_msg( pp, "Unexpected EOF." ); return 2; } inProcessed = *inSize - *inPos; if( !LzmaDec_DecodeToBuf( decoder, outBuf + outPos, &outProcessed, inBuf + *inPos, &inProcessed, finishMode, &status ) ) - { show_error( "Data error.", 0, false ); return 2; } + { Pp_show_msg( pp, "Data error." ); return 2; } *inPos += inProcessed; - total_in += inProcessed; + member_size += inProcessed; outPos += outProcessed; if( outfd >= 0 && writeblock( outfd, outBuf, outPos ) != outPos ) { show_error( "Can't write output file", errno, false ); return 1; } CRC32_update_buf( &crc, outBuf, outPos ); - total_out += outPos; + data_size += outPos; outPos = 0; if (inProcessed == 0 && outProcessed == 0) { File_trailer trailer; int i; + unsigned td_crc; + unsigned long long td_size, tm_size; bool error = false; if( status != LZMA_STATUS_FINISHED_WITH_MARK ) - { show_error( "Data error.", 0, false ); return 2; } + { Pp_show_msg( pp, "Data error." ); return 2; } if( *inSize - *inPos < Ft_size && !read_inbuf( infd, inBuf, inPos, inSize ) ) return 1; if( *inSize - *inPos < Ft_size ) { error = true; if( verbosity >= 0 ) + { + Pp_show_msg( pp, 0 ); fprintf( stderr, "Trailer truncated at trailer position %d;" " some checks may fail.\n", *inSize - *inPos ); - for( i = *inSize - *inPos; i < Ft_size; ++i ) - inBuf[*inPos+i] = 0; + } } - for( i = 0; i < Ft_size; ++i ) + for( i = 0; i < Ft_size && *inPos < *inSize; ++i ) trailer[i] = inBuf[(*inPos)++]; - total_in += Ft_size; + member_size += i; + while( i < Ft_size ) trailer[i++] = 0; crc ^= 0xFFFFFFFFU; - if( Ft_get_data_crc( trailer ) != crc ) + td_crc = Ft_get_data_crc( trailer ); + if( td_crc != crc ) { error = true; if( verbosity >= 0 ) - fprintf( stderr, "CRC mismatch; trailer says %08X, data crc is %08X\n", - Ft_get_data_crc( trailer ), crc ); + { + Pp_show_msg( pp, 0 ); + fprintf( stderr, "CRC mismatch; stored %08X, computed %08X\n", + td_crc, crc ); + } } - if( Ft_get_data_size( trailer ) != total_out ) + td_size = Ft_get_data_size( trailer ); + if( td_size != data_size ) { error = true; if( verbosity >= 0 ) - fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX)\n", - Ft_get_data_size( trailer ), total_out, total_out ); + { + Pp_show_msg( pp, 0 ); + fprintf( stderr, "Data size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n", + td_size, td_size, data_size, data_size ); + } } - if( Ft_get_member_size( trailer ) != total_in ) + tm_size = Ft_get_member_size( trailer ); + if( tm_size != member_size ) { error = true; if( verbosity >= 0 ) - fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX)\n", - Ft_get_member_size( trailer ), total_in, total_in ); + { + Pp_show_msg( pp, 0 ); + fprintf( stderr, "Member size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n", + tm_size, tm_size, member_size, member_size ); + } } - if( !error && verbosity >= 2 && total_out > 0 && total_in > 0 ) - fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", - (double)total_out / total_in, - ( 8.0 * total_in ) / total_out, - 100.0 * ( 1.0 - ( (double)total_in / total_out ) ) ); - if( !error && verbosity >= 4 ) - fprintf( stderr, "CRC %08X, decompressed %9llu, compressed %8llu. ", - crc, total_out, total_in ); if( error ) return 2; + if( verbosity >= 2 ) + { + if( verbosity >= 4 ) show_header( dictionary_size ); + if( data_size == 0 || member_size == 0 ) + fputs( "no data compressed. ", stderr ); + else + fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved. ", + (double)data_size / member_size, + ( 100.0 * member_size ) / data_size, + 100.0 - ( ( 100.0 * member_size ) / data_size ) ); + if( verbosity >= 4 ) fprintf( stderr, "CRC %08X, ", td_crc ); + if( verbosity >= 3 ) + fprintf( stderr, "decompressed %9llu, compressed %8llu. ", + data_size, member_size ); + } return 0; } } @@ -648,7 +693,8 @@ static int lzip_decode( CLzmaDec *decoder, const int infd, uint8_t inBuf[], static int decompress( const int infd, struct Pretty_print * const pp, - const bool ignore_trailing, const bool testing ) + const bool ignore_trailing, const bool loose_trailing, + const bool testing ) { uint64_t unpackSize = 0; CLzmaDec decoder; @@ -671,19 +717,25 @@ static int decompress( const int infd, struct Pretty_print * const pp, raw_props[i] = header[i] = inBuf[inPos++]; if( size <= Fh_size ) /* End Of File */ { - if( first_member || Fh_verify_prefix( header, size ) ) - { Pp_show_msg( pp, "File ends unexpectedly at member header." ); + if( first_member ) + { show_file_error( pp->name, "File ends unexpectedly at member header.", 0 ); + retval = 2; } + else if( Fh_verify_prefix( header, size ) ) + { Pp_show_msg( pp, "Truncated header in multimember file." ); retval = 2; } else if( size > 0 && !ignore_trailing ) - { show_file_error( pp->name, trailing_msg, 0 ); retval = 2; } + { Pp_show_msg( pp, trailing_msg ); retval = 2; } break; } if( !Fh_verify_magic( header ) ) { if( !first_member ) { - if( !ignore_trailing ) - { show_file_error( pp->name, trailing_msg, 0 ); retval = 2; } + if( !loose_trailing && Fh_verify_corrupt( header ) ) + { Pp_show_msg( pp, "Corrupt header in multimember file." ); + retval = 2; } + else if( !ignore_trailing ) + { Pp_show_msg( pp, trailing_msg ); retval = 2; } break; } if( inSize - inPos >= lzma_header_size - Fh_size ) /* try lzma-alone */ @@ -730,15 +782,16 @@ static int decompress( const int infd, struct Pretty_print * const pp, } if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) - { Pp_show_msg( pp, 0 ); show_header( dictionary_size ); } + Pp_show_msg( pp, 0 ); if( !LzmaDec_Init( &decoder, raw_props ) ) { Pp_show_msg( pp, "Not enough memory." ); return 1; } if( lzip_mode ) - retval = lzip_decode( &decoder, infd, inBuf, &inPos, &inSize ); + retval = lzip_decode( &decoder, infd, pp, inBuf, &inPos, &inSize, + dictionary_size ); else - retval = lzma_decode( unpackSize, &decoder, infd, - inBuf, &inPos, &inSize, testing ); + retval = lzma_decode( unpackSize, &decoder, infd, inBuf, &inPos, + &inSize, dictionary_size, testing ); LzmaDec_Free(&decoder); if( retval != 0 || !lzip_mode ) break; if( verbosity >= 2 ) @@ -769,24 +822,6 @@ static void set_signals( void ) CRC32 crc32; -void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ) - { - if( verbosity >= 0 ) - { - if( pp->first_post ) - { - unsigned i; - pp->first_post = false; - fprintf( stderr, " %s: ", pp->name ); - for( i = strlen( pp->name ); i < pp->longest_name; ++i ) - fputc( ' ', stderr ); - if( !msg ) fflush( stderr ); - } - if( msg ) fprintf( stderr, "%s\n", msg ); - } - } - - /* Returns the number of bytes really read. If (returned value < size) and (errno == 0), means EOF was reached. */ @@ -877,50 +912,53 @@ int main( const int argc, const char * const argv[] ) const char * default_output_filename = ""; const char ** filenames = 0; int num_filenames = 0; - int infd = -1; enum Mode program_mode = m_compress; int argind = 0; + int failed_tests = 0; int retval = 0; int i; bool filenames_given = false; bool force = false; bool ignore_trailing = true; bool keep_input_files = false; + bool loose_trailing = false; bool recompress = false; bool stdin_used = false; bool to_stdout = false; struct Pretty_print pp; + enum { opt_lt = 256 }; const struct ap_Option options[] = { - { '0', "fast", ap_no }, - { '1', 0, ap_no }, - { '2', 0, ap_no }, - { '3', 0, ap_no }, - { '4', 0, ap_no }, - { '5', 0, ap_no }, - { '6', 0, ap_no }, - { '7', 0, ap_no }, - { '8', 0, ap_no }, - { '9', "best", ap_no }, - { 'a', "trailing-error", ap_no }, - { 'b', "member-size", ap_yes }, - { 'c', "stdout", ap_no }, - { 'd', "decompress", ap_no }, - { 'f', "force", ap_no }, - { 'F', "recompress", ap_no }, - { 'h', "help", ap_no }, - { 'k', "keep", ap_no }, - { 'm', "match-length", ap_yes }, - { 'n', "threads", ap_yes }, - { 'o', "output", ap_yes }, - { 'q', "quiet", ap_no }, - { 's', "dictionary-size", ap_yes }, - { 'S', "volume-size", ap_yes }, - { 't', "test", ap_no }, - { 'v', "verbose", ap_no }, - { 'V', "version", ap_no }, - { 0 , 0, ap_no } }; + { '0', "fast", ap_no }, + { '1', 0, ap_no }, + { '2', 0, ap_no }, + { '3', 0, ap_no }, + { '4', 0, ap_no }, + { '5', 0, ap_no }, + { '6', 0, ap_no }, + { '7', 0, ap_no }, + { '8', 0, ap_no }, + { '9', "best", ap_no }, + { 'a', "trailing-error", ap_no }, + { 'b', "member-size", ap_yes }, + { 'c', "stdout", ap_no }, + { 'd', "decompress", ap_no }, + { 'f', "force", ap_no }, + { 'F', "recompress", ap_no }, + { 'h', "help", ap_no }, + { 'k', "keep", ap_no }, + { 'm', "match-length", ap_yes }, + { 'n', "threads", ap_yes }, + { 'o', "output", ap_yes }, + { 'q', "quiet", ap_no }, + { 's', "dictionary-size", ap_yes }, + { 'S', "volume-size", ap_yes }, + { 't', "test", ap_no }, + { 'v', "verbose", ap_no }, + { 'V', "version", ap_no }, + { opt_lt, "loose-trailing", ap_no }, + { 0 , 0, ap_no } }; struct Arg_parser parser; @@ -961,6 +999,7 @@ int main( const int argc, const char * const argv[] ) case 't': program_mode = m_test; break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; + case opt_lt: loose_trailing = true; break; default : internal_error( "uncaught option." ); } } /* end process options */ @@ -987,12 +1026,13 @@ int main( const int argc, const char * const argv[] ) ( filenames_given || default_output_filename[0] ) ) set_signals(); - Pp_init( &pp, filenames, num_filenames, verbosity ); + Pp_init( &pp, filenames, num_filenames ); output_filename = resize_buffer( output_filename, 1 ); for( i = 0; i < num_filenames; ++i ) { const char * input_filename = ""; + int infd; int tmp; struct stat in_stats; const struct stat * in_statsp; @@ -1009,17 +1049,17 @@ int main( const int argc, const char * const argv[] ) else { if( program_mode == m_compress ) - set_c_outname( default_output_filename ); + set_c_outname( default_output_filename, false ); else { output_filename = resize_buffer( output_filename, - strlen( default_output_filename ) + 1 ); + strlen( default_output_filename ) + 1 ); strcpy( output_filename, default_output_filename ); } if( !open_outstream( force, true ) ) { if( retval < 1 ) retval = 1; - close( infd ); infd = -1; + close( infd ); continue; } } @@ -1037,12 +1077,12 @@ int main( const int argc, const char * const argv[] ) else { if( program_mode == m_compress ) - set_c_outname( input_filename ); + set_c_outname( input_filename, true ); else set_d_outname( input_filename, eindex ); if( !open_outstream( force, false ) ) { if( retval < 1 ) retval = 1; - close( infd ); infd = -1; + close( infd ); continue; } } @@ -1053,7 +1093,7 @@ int main( const int argc, const char * const argv[] ) if( !check_tty( pp.name, infd, program_mode ) ) { if( retval < 1 ) retval = 1; - if( program_mode == m_test ) { close( infd ); infd = -1; continue; } + if( program_mode == m_test ) { close( infd ); continue; } cleanup_and_fail( retval ); } @@ -1061,24 +1101,31 @@ int main( const int argc, const char * const argv[] ) if( program_mode == m_compress ) tmp = compress( &encoder_options, &pp, infd ); else - tmp = decompress( infd, &pp, ignore_trailing, program_mode == m_test ); + tmp = decompress( infd, &pp, ignore_trailing, + loose_trailing, program_mode == m_test ); if( tmp > retval ) retval = tmp; - if( tmp && program_mode != m_test ) cleanup_and_fail( retval ); + if( tmp ) + { if( program_mode != m_test ) cleanup_and_fail( retval ); + else ++failed_tests; } if( delete_output_on_interrupt ) close_and_set_permissions( in_statsp ); if( input_filename[0] ) { - close( infd ); infd = -1; + close( infd ); if( !keep_input_files && !to_stdout && program_mode != m_test ) remove( input_filename ); } } if( outfd >= 0 && close( outfd ) != 0 ) { - show_error( "Can't close stdout", errno, false ); + show_error( "Error closing stdout", errno, false ); if( retval < 1 ) retval = 1; } + if( failed_tests > 0 && verbosity >= 1 && num_filenames > 1 ) + fprintf( stderr, "%s: warning: %d %s failed the test.\n", + program_name, failed_tests, + ( failed_tests == 1 ) ? "file" : "files" ); free( output_filename ); free( filenames ); ap_free( &parser ); diff --git a/testsuite/check.sh b/testsuite/check.sh index 82e609a..406c6e0 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Pdlzip - LZMA lossless data compressor -# Copyright (C) 2010-2017 Antonio Diaz Diaz. +# Copyright (C) 2010-2018 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -51,6 +51,8 @@ done [ $? = 2 ] || test_failed $LINENO "${LZIP}" -cdq < in [ $? = 2 ] || test_failed $LINENO +"${LZIP}" -dq -o in < "${in_lz}" +[ $? = 1 ] || test_failed $LINENO # these are for code coverage "${LZIP}" -t -- nx_file 2> /dev/null [ $? = 1 ] || test_failed $LINENO @@ -133,7 +135,7 @@ cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure "${LZIP}" -cd in2.lz > copy2 || test_failed $LINENO cmp in2 copy2 || test_failed $LINENO -"${LZIP}" -s16 --output=copy2 < in2 || test_failed $LINENO +"${LZIP}" -s16 --output=copy2.lz < in2 || test_failed $LINENO "${LZIP}" -t copy2.lz || test_failed $LINENO "${LZIP}" -cd copy2.lz > copy2 || test_failed $LINENO cmp in2 copy2 || test_failed $LINENO @@ -190,12 +192,37 @@ done printf "\ntesting bad input..." +headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP' +body='\001\014\000\203\377\373\377\377\300\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000$\000\000\000\000\000\000\000' +cat "${in_lz}" > in0.lz +printf "LZIP${body}" >> in0.lz +if "${LZIP}" -tq in0.lz ; then + for header in ${headers} ; do + printf "${header}${body}" > in0.lz # first member + "${LZIP}" -tq in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + cat "${in_lz}" > in0.lz + printf "${header}${body}" >> in0.lz # trailing data + "${LZIP}" -tq in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -t --loose-trailing in0.lz + [ $? = 0 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing --trailing-error in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + done +else + printf "\nwarning: skipping header test: 'printf' does not work on your system." +fi +rm -f in0.lz + cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && [ -e trunc.lz ] && cmp in2.lz trunc.lz > /dev/null 2>&1 ; then for i in 6 20 14734 14753 14754 14755 14756 14757 14758 ; do dd if=in3.lz of=trunc.lz bs=$i count=1 2> /dev/null - "${LZIP}" -t trunc.lz 2> /dev/null + "${LZIP}" -tq trunc.lz [ $? = 2 ] || test_failed $LINENO $i "${LZIP}" -tq < trunc.lz [ $? = 2 ] || test_failed $LINENO $i @@ -207,6 +234,7 @@ if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && else printf "\nwarning: skipping truncation test: 'dd' does not work on your system." fi +rm -f in3.lz trunc.lz cat "${in_lz}" > ingin.lz || framework_failure printf "g" >> ingin.lz || framework_failure @@ -217,6 +245,7 @@ cmp in copy || test_failed $LINENO "${LZIP}" -t < ingin.lz || test_failed $LINENO "${LZIP}" -d < ingin.lz > copy || test_failed $LINENO cmp in copy || test_failed $LINENO +rm -f ingin.lz echo if [ ${fail} = 0 ] ; then -- cgit v1.2.3