diff options
-rw-r--r-- | ChangeLog | 23 | ||||
-rw-r--r-- | INSTALL | 4 | ||||
-rw-r--r-- | Makefile.in | 9 | ||||
-rw-r--r-- | NEWS | 25 | ||||
-rw-r--r-- | README | 6 | ||||
-rw-r--r-- | carg_parser.c | 2 | ||||
-rw-r--r-- | carg_parser.h | 2 | ||||
-rwxr-xr-x | configure | 14 | ||||
-rw-r--r-- | decoder.c | 56 | ||||
-rw-r--r-- | decoder.h | 48 | ||||
-rw-r--r-- | doc/clzip.1 | 20 | ||||
-rw-r--r-- | doc/clzip.info | 189 | ||||
-rw-r--r-- | doc/clzip.texi | 165 | ||||
-rw-r--r-- | encoder.c | 24 | ||||
-rw-r--r-- | encoder.h | 4 | ||||
-rw-r--r-- | encoder_base.c | 2 | ||||
-rw-r--r-- | encoder_base.h | 9 | ||||
-rw-r--r-- | fast_encoder.c | 3 | ||||
-rw-r--r-- | fast_encoder.h | 2 | ||||
-rw-r--r-- | lzip.h | 47 | ||||
-rw-r--r-- | main.c | 196 | ||||
-rwxr-xr-x | testsuite/check.sh | 100 |
22 files changed, 614 insertions, 336 deletions
@@ -1,3 +1,18 @@ +2016-05-13 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.8 released. + * main.c: Added new option '-a, --trailing-error'. + * main.c (decompress): Print up to 6 bytes of trailing data + when '-vvvv' is specified. + * decoder.c (LZd_verify_trailer): Removed test of final code. + * main.c (main): Delete '--output' file if infd is a terminal. + * main.c (main): Don't use stdin more than once. + * lzip.texi: Added chapter 'Trailing data'. + * configure: Avoid warning on some shells when testing for gcc. + * Makefile.in: Detect the existence of install-info. + * testsuite/check.sh: A POSIX shell is required to run the tests. + * testsuite/check.sh: Don't check error messages. + 2015-07-07 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.7 released. @@ -16,7 +31,7 @@ * Version 1.5 released. * Show progress of compression at verbosity level 2 (-vv). - * main.c (show_header): Do not show header version. + * main.c (show_header): Don't show header version. * Ignore option '-n, --threads' for compatibility with plzip. * configure: Options now accept a separate argument. @@ -48,7 +63,7 @@ * Version 1.2 released. * main.c: Added new option '-F, --recompress'. * main.c (decompress): Print only one status line for each - multi-member file when only one '-v' is specified. + multimember file when only one '-v' is specified. * encoder.h (Lee_update_prices): Update high length symbol prices independently of the value of 'pos_state'. This gives better compression for large values of '--match-length' without being @@ -68,7 +83,7 @@ compress less but faster. (-1 now takes 43% less time for only 20% larger compressed size). * Compression ratio of option '-9' has been slightly increased. - * main.c (open_instream): Do not show the message + * main.c (open_instream): Don't show the message " and '--stdout' was not specified" for directories, etc. * New examples have been added to the manual. @@ -79,7 +94,7 @@ * Translated to C from the C++ source of lzip 1.10. -Copyright (C) 2010-2015 Antonio Diaz Diaz. +Copyright (C) 2010-2016 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and @@ -1,7 +1,7 @@ Requirements ------------ You will need a C compiler. -I use gcc 4.9.1 and 4.1.2, but the code should compile with any +I use gcc 5.3.0 and 4.1.2, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -62,7 +62,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2010-2015 Antonio Diaz Diaz. +Copyright (C) 2010-2016 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Makefile.in b/Makefile.in index f40352e..d028148 100644 --- a/Makefile.in +++ b/Makefile.in @@ -5,6 +5,7 @@ INSTALL_PROGRAM = $(INSTALL) -m 755 INSTALL_DATA = $(INSTALL) -m 644 INSTALL_DIR = $(INSTALL) -d -m 755 SHELL = /bin/sh +CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 objs = carg_parser.o encoder_base.o encoder.o fast_encoder.o decoder.o main.o @@ -69,7 +70,9 @@ install-info : if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info" - -install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" + -if $(CAN_RUN_INSTALLINFO) ; then \ + install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ + fi install-info-compress : install-info lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info" @@ -92,7 +95,9 @@ uninstall-bin : -rm -f "$(DESTDIR)$(bindir)/$(progname)" uninstall-info : - -install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" + -if $(CAN_RUN_INSTALLINFO) ; then \ + install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ + fi -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* uninstall-man : @@ -1,8 +1,21 @@ -Changes in version 1.7: +Changes in version 1.8: -The option "-0", which produces a compression speed and ratio comparable -to those of gzip, has been ported from lzip. +The option "-a, --trailing-error", which makes clzip exit with error +status 2 if any remaining input is detected after decompressing the last +member, has been added. -The targets "install-compress", "install-strip-compress", -"install-info-compress" and "install-man-compress" have been added to -the Makefile. +When decompressing or testing, up to 6 bytes of trailing data are +printed if "-vvvv" is specified. + +The test of the value remaining in the range decoder has been removed. +(After extensive testing it has been found useless to detect corruption +in the decompressed data. Eliminating it reduces the number of false +positives for corruption and makes error detection more accurate). + +When decompressing, the file specified with the '--output' option is now +deleted if the input is a terminal. + +The new chapter "Trailing data" has been added to the manual. + +A harmless check failure on Windows, caused by the failed comparison of +a message in text mode, has been fixed. @@ -80,14 +80,14 @@ or more compressed files. The result is the concatenation of the corresponding uncompressed files. Integrity testing of concatenated compressed files is also supported. -Clzip can produce multi-member files and safely recover, with +Clzip can produce multimember files and safely recover, with lziprecover, the undamaged members in case of file damage. Clzip can also split the compressed output in volumes of a given size, even when reading from standard input. This allows the direct creation of multivolume compressed tar archives. Clzip is able to compress and decompress streams of unlimited size by -automatically creating multi-member output. The members so created are +automatically creating multimember output. The members so created are large, about 2 PiB each. In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a @@ -115,7 +115,7 @@ range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI). -Copyright (C) 2010-2015 Antonio Diaz Diaz. +Copyright (C) 2010-2016 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/carg_parser.c b/carg_parser.c index 8d74ea6..3d4e89f 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2015 Antonio Diaz Diaz. + Copyright (C) 2006-2016 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/carg_parser.h b/carg_parser.h index ed4d9c5..e918942 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2015 Antonio Diaz Diaz. + Copyright (C) 2006-2016 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Clzip - LZMA lossless data compressor -# Copyright (C) 2010-2015 Antonio Diaz Diaz. +# Copyright (C) 2010-2016 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=clzip -pkgversion=1.7 +pkgversion=1.8 progname=clzip srctrigger=doc/${pkgname}.texi @@ -26,8 +26,8 @@ CFLAGS='-Wall -W -O2' LDFLAGS= # checking whether we are using GNU C. -${CC} --version > /dev/null 2>&1 -if [ $? != 0 ] ; then +if /bin/sh -c "${CC} --version" > /dev/null 2>&1 ; then true +else CC=cc CFLAGS='-W -O2' fi @@ -139,7 +139,7 @@ if [ -z "${no_create}" ] ; then rm -f config.status cat > config.status << EOF #! /bin/sh -# This file was generated automatically by configure. Do not edit. +# This file was generated automatically by configure. Don't edit. # Run this file to recreate the current configuration. # # This script is free software: you have unlimited permission @@ -165,8 +165,8 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Clzip - LZMA lossless data compressor -# Copyright (C) 2010-2015 Antonio Diaz Diaz. -# This file was generated automatically by configure. Do not edit. +# Copyright (C) 2010-2016 Antonio Diaz Diaz. +# This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission # to copy, distribute and modify it. @@ -1,5 +1,5 @@ /* Clzip - LZMA lossless data compressor - Copyright (C) 2010-2015 Antonio Diaz Diaz. + Copyright (C) 2010-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,19 +29,17 @@ #include "decoder.h" -CRC32 crc32; - - void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ) { if( verbosity >= 0 ) { if( pp->first_post ) { - int i, len = pp->longest_name - strlen( pp->name ); + unsigned i; pp->first_post = false; fprintf( stderr, " %s: ", pp->name ); - for( i = 0; i < len; ++i ) fputc( ' ', stderr ); + for( i = strlen( pp->name ); i < pp->longest_name; ++i ) + fputc( ' ', stderr ); if( !msg ) fflush( stderr ); } if( msg ) fprintf( stderr, "%s\n", msg ); @@ -110,8 +108,8 @@ void LZd_flush_data( struct LZ_decoder * const d ) if( d->outfd >= 0 && writeblock( d->outfd, d->buffer + d->stream_pos, size ) != size ) { show_error( "Write error", errno, false ); cleanup_and_fail( 1 ); } - if( d->pos >= d->buffer_size ) - { d->partial_data_pos += d->pos; d->pos = 0; } + if( d->pos >= d->dictionary_size ) + { d->partial_data_pos += d->pos; d->pos = 0; d->pos_wrapped = true; } d->stream_pos = d->pos; } } @@ -121,13 +119,11 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d, struct Pretty_print * const pp ) { File_trailer trailer; - const unsigned long long member_size = Rd_member_position( d->rdec ) + Ft_size; - unsigned long long trailer_data_size; - unsigned long long trailer_member_size; - unsigned trailer_crc; + int size = Rd_read_data( d->rdec, trailer, Ft_size ); + const unsigned long long data_size = LZd_data_position( d ); + const unsigned long long member_size = Rd_member_position( d->rdec ); bool error = false; - int size = Rd_read_data( d->rdec, trailer, Ft_size ); if( size < Ft_size ) { error = true; @@ -140,52 +136,44 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d, while( size < Ft_size ) trailer[size++] = 0; } - if( d->rdec->code != 0 ) - { - error = true; - Pp_show_msg( pp, "Range decoder final code is not zero." ); - } - trailer_crc = Ft_get_data_crc( trailer ); - if( trailer_crc != LZd_crc( d ) ) + if( Ft_get_data_crc( trailer ) != LZd_crc( d ) ) { error = true; if( verbosity >= 0 ) { Pp_show_msg( pp, 0 ); fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X\n", - trailer_crc, LZd_crc( d ) ); + Ft_get_data_crc( trailer ), LZd_crc( d ) ); } } - trailer_data_size = Ft_get_data_size( trailer ); - if( trailer_data_size != LZd_data_position( d ) ) + if( Ft_get_data_size( trailer ) != data_size ) { error = true; if( verbosity >= 0 ) { Pp_show_msg( pp, 0 ); fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX)\n", - trailer_data_size, LZd_data_position( d ), LZd_data_position( d ) ); + Ft_get_data_size( trailer ), data_size, data_size ); } } - trailer_member_size = Ft_get_member_size( trailer ); - if( trailer_member_size != member_size ) + if( Ft_get_member_size( trailer ) != member_size ) { error = true; if( verbosity >= 0 ) { Pp_show_msg( pp, 0 ); fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX)\n", - trailer_member_size, member_size, member_size ); + Ft_get_member_size( trailer ), member_size, member_size ); } } - if( !error && verbosity >= 2 && LZd_data_position( d ) > 0 && member_size > 0 ) + if( !error && verbosity >= 2 && data_size > 0 && member_size > 0 ) fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", - (double)LZd_data_position( d ) / member_size, - ( 8.0 * member_size ) / LZd_data_position( d ), - 100.0 * ( 1.0 - ( (double)member_size / LZd_data_position( d ) ) ) ); + (double)data_size / member_size, + ( 8.0 * member_size ) / data_size, + 100.0 * ( 1.0 - ( (double)member_size / data_size ) ) ); if( !error && verbosity >= 4 ) fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ", - trailer_crc, trailer_data_size, trailer_member_size ); + LZd_crc( d ), data_size, member_size ); return !error; } @@ -255,8 +243,8 @@ int LZd_decode_member( struct LZ_decoder * const d, } else /* match */ { - int dis_slot; const unsigned rep0_saved = rep0; + int dis_slot; len = min_match_len + Rd_decode_len( rdec, &d->match_len_model, pos_state ); dis_slot = Rd_decode_tree6( rdec, d->bm_dis_slot[get_len_state(len)] ); if( dis_slot < start_dis_model ) rep0 = dis_slot; @@ -295,7 +283,7 @@ int LZd_decode_member( struct LZ_decoder * const d, } rep3 = rep2; rep2 = rep1; rep1 = rep0_saved; state = St_set_match( state ); - if( rep0 >= d->dictionary_size || rep0 >= LZd_data_position( d ) ) + if( rep0 >= d->dictionary_size || ( rep0 >= d->pos && !d->pos_wrapped ) ) { LZd_flush_data( d ); return 1; } } LZd_copy_block( d, rep0, len ); @@ -1,5 +1,5 @@ /* Clzip - LZMA lossless data compressor - Copyright (C) 2010-2015 Antonio Diaz Diaz. + Copyright (C) 2010-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -60,7 +60,8 @@ static inline void Rd_reset_member_position( struct Range_decoder * const rdec ) static inline uint8_t Rd_get_byte( struct Range_decoder * const rdec ) { - if( Rd_finished( rdec ) ) return 0xAA; /* make code != 0 */ + /* 0xFF avoids decoder error if member is truncated at EOS marker */ + if( Rd_finished( rdec ) ) return 0xFF; return rdec->buffer[rdec->pos++]; } @@ -232,12 +233,12 @@ struct LZ_decoder unsigned long long partial_data_pos; struct Range_decoder * rdec; unsigned dictionary_size; - int buffer_size; uint8_t * buffer; /* output buffer */ - int pos; /* current pos in buffer */ - int stream_pos; /* first byte not yet written to file */ + unsigned pos; /* current pos in buffer */ + unsigned stream_pos; /* first byte not yet written to file */ uint32_t crc; int outfd; /* output file descriptor */ + bool pos_wrapped; Bit_model bm_literal[1<<literal_context_bits][0x300]; Bit_model bm_match[states][pos_states]; @@ -258,56 +259,61 @@ void LZd_flush_data( struct LZ_decoder * const d ); static inline uint8_t LZd_peek_prev( const struct LZ_decoder * const d ) { - const int i = ( ( d->pos > 0 ) ? d->pos : d->buffer_size ) - 1; + const unsigned i = ( ( d->pos > 0 ) ? d->pos : d->dictionary_size ) - 1; return d->buffer[i]; } static inline uint8_t LZd_peek( const struct LZ_decoder * const d, - const int distance ) + const unsigned distance ) { - int i = d->pos - distance - 1; - if( i < 0 ) i += d->buffer_size; + unsigned i = d->pos - distance - 1; + if( d->pos <= distance ) i += d->dictionary_size; return d->buffer[i]; } static inline void LZd_put_byte( struct LZ_decoder * const d, const uint8_t b ) { d->buffer[d->pos] = b; - if( ++d->pos >= d->buffer_size ) LZd_flush_data( d ); + if( ++d->pos >= d->dictionary_size ) LZd_flush_data( d ); } static inline void LZd_copy_block( struct LZ_decoder * const d, - const int distance, int len ) + const unsigned distance, unsigned len ) { - int i = d->pos - distance - 1; - if( i < 0 ) i += d->buffer_size; - if( len < d->buffer_size - max( d->pos, i ) && len <= abs( d->pos - i ) ) + unsigned i = d->pos - distance - 1; + bool fast; + if( d->pos <= distance ) + { i += d->dictionary_size; + fast = ( len <= d->dictionary_size - i && len <= i - d->pos ); } + else + fast = ( len < d->dictionary_size - d->pos && len <= d->pos - i ); + if( fast ) /* no wrap, no overlap */ { - memcpy( d->buffer + d->pos, d->buffer + i, len ); /* no wrap, no overlap */ + memcpy( d->buffer + d->pos, d->buffer + i, len ); d->pos += len; } else for( ; len > 0; --len ) { d->buffer[d->pos] = d->buffer[i]; - if( ++d->pos >= d->buffer_size ) LZd_flush_data( d ); - if( ++i >= d->buffer_size ) i = 0; + if( ++d->pos >= d->dictionary_size ) LZd_flush_data( d ); + if( ++i >= d->dictionary_size ) i = 0; } } static inline bool LZd_init( struct LZ_decoder * const d, struct Range_decoder * const rde, - const int dict_size, const int ofd ) + const unsigned dict_size, const int ofd ) { d->partial_data_pos = 0; d->rdec = rde; d->dictionary_size = dict_size; - d->buffer_size = max( 65536U, d->dictionary_size ); - d->buffer = (uint8_t *)malloc( d->buffer_size ); + d->buffer = (uint8_t *)malloc( d->dictionary_size ); if( !d->buffer ) return false; d->pos = 0; d->stream_pos = 0; d->crc = 0xFFFFFFFFU; d->outfd = ofd; + d->pos_wrapped = false; Bm_array_init( d->bm_literal[0], (1 << literal_context_bits) * 0x300 ); Bm_array_init( d->bm_match[0], states * pos_states ); @@ -321,7 +327,7 @@ static inline bool LZd_init( struct LZ_decoder * const d, Bm_array_init( d->bm_align, dis_align_size ); Lm_init( &d->match_len_model ); Lm_init( &d->rep_len_model ); - d->buffer[d->buffer_size-1] = 0; /* prev_byte of first byte */ + d->buffer[d->dictionary_size-1] = 0; /* prev_byte of first byte */ return true; } diff --git a/doc/clzip.1 b/doc/clzip.1 index 32b3bde..5dbb695 100644 --- a/doc/clzip.1 +++ b/doc/clzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH CLZIP "1" "July 2015" "clzip 1.7" "User Commands" +.TH CLZIP "1" "May 2016" "clzip 1.8" "User Commands" .SH NAME clzip \- reduces the size of files .SH SYNOPSIS @@ -15,11 +15,14 @@ display this help and exit \fB\-V\fR, \fB\-\-version\fR output version information and exit .TP +\fB\-a\fR, \fB\-\-trailing\-error\fR +exit with error status if trailing data +.TP \fB\-b\fR, \fB\-\-member\-size=\fR<bytes> set member size limit in bytes .TP \fB\-c\fR, \fB\-\-stdout\fR -send output to standard output +write to standard output, keep input files .TP \fB\-d\fR, \fB\-\-decompress\fR decompress @@ -37,7 +40,7 @@ keep (don't delete) input files set match length limit in bytes [36] .TP \fB\-o\fR, \fB\-\-output=\fR<file> -if reading stdin, place the output into <file> +if reading standard input, write to <file> .TP \fB\-q\fR, \fB\-\-quiet\fR suppress all messages @@ -63,13 +66,16 @@ alias for \fB\-0\fR \fB\-\-best\fR alias for \fB\-9\fR .PP -If no file names are given, clzip compresses or decompresses -from standard input to standard output. +If no file names are given, or if a file is '\-', clzip compresses or +decompresses from standard input to standard output. Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... +Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12 +to 2^29 bytes. +.PP The bidimensional parameter space of LZMA can't be mapped to a linear scale optimal for all files. If your files are large, very repetitive, -etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR +etc, you may need to use the \fB\-\-dictionary\-size\fR and \fB\-\-match\-length\fR options directly to achieve optimal performance. .PP Exit status: 0 for a normal exit, 1 for environmental problems (file @@ -81,7 +87,7 @@ Report bugs to lzip\-bug@nongnu.org .br Clzip home page: http://www.nongnu.org/lzip/clzip.html .SH COPYRIGHT -Copyright \(co 2015 Antonio Diaz Diaz. +Copyright \(co 2016 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. diff --git a/doc/clzip.info b/doc/clzip.info index 786d8c1..c590473 100644 --- a/doc/clzip.info +++ b/doc/clzip.info @@ -11,7 +11,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir) Clzip Manual ************ -This manual is for Clzip (version 1.7, 7 July 2015). +This manual is for Clzip (version 1.8, 13 May 2016). * Menu: @@ -19,12 +19,13 @@ This manual is for Clzip (version 1.7, 7 July 2015). * Invoking clzip:: Command line interface * File format:: Detailed format of the compressed file * Algorithm:: How clzip compresses the data +* Trailing data:: Extra data appended to the file * Examples:: A small tutorial with examples * Problems:: Reporting bugs * Concept index:: Index of concepts - Copyright (C) 2010-2015 Antonio Diaz Diaz. + Copyright (C) 2010-2016 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -53,7 +54,7 @@ availability: recovery means. The lziprecover program can repair bit-flip errors (one of the most common forms of data corruption) in lzip files, and provides data recovery capabilities, including error-checked - merging of damaged copies of a file. *note Data safety: + merging of damaged copies of a file. *Note Data safety: (lziprecover)Data safety. * The lzip format is as simple as possible (but not simpler). The @@ -73,15 +74,14 @@ corrupt byte near the beginning is a thing of the past. The member trailer stores the 32-bit CRC of the original data, the size of the original data and the size of the member. These values, -together with the value remaining in the range decoder and the -end-of-stream marker, provide a 4 factor integrity checking which -guarantees that the decompressed version of the data is identical to -the original. This guards against corruption of the compressed data, -and against undetected bugs in clzip (hopefully very unlikely). The -chances of data corruption going undetected are microscopic. Be aware, -though, that the check occurs upon decompression, so it can only tell -you that something is wrong. It can't help you recover the original -uncompressed data. +together with the end-of-stream marker, provide a 3 factor integrity +checking which guarantees that the decompressed version of the data is +identical to the original. This guards against corruption of the +compressed data, and against undetected bugs in clzip (hopefully very +unlikely). The chances of data corruption going undetected are +microscopic. Be aware, though, that the check occurs upon +decompression, so it can only tell you that something is wrong. It +can't help you recover the original uncompressed data. Clzip uses the same well-defined exit status values used by lzip and bzip2, which makes it safer than compressors returning ambiguous warning @@ -128,14 +128,14 @@ two or more compressed files. The result is the concatenation of the corresponding uncompressed files. Integrity testing of concatenated compressed files is also supported. - Clzip can produce multi-member files and safely recover, with + Clzip can produce multimember files and safely recover, with lziprecover, the undamaged members in case of file damage. Clzip can also split the compressed output in volumes of a given size, even when reading from standard input. This allows the direct creation of multivolume compressed tar archives. Clzip is able to compress and decompress streams of unlimited size by -automatically creating multi-member output. The members so created are +automatically creating multimember output. The members so created are large, about 2 PiB each. @@ -148,6 +148,10 @@ The format for running clzip is: clzip [OPTIONS] [FILES] +'-' used as a FILE argument means standard input. It can be mixed with +other FILES and is read just once, the first time it appears in the +command line. + Clzip supports the following options: '-h' @@ -158,6 +162,13 @@ The format for running clzip is: '--version' Print the version number of clzip on the standard output and exit. +'-a' +'--trailing-error' + Exit with error status 2 if any remaining input is detected after + decompressing the last member. Such remaining input is usually + trailing garbage that can be safely ignored. *Note + concat-example::. + '-b BYTES' '--member-size=BYTES' Set the member size limit to BYTES. A small member size may @@ -166,14 +177,19 @@ The format for running clzip is: '-c' '--stdout' - Compress or decompress to standard output. Needed when reading - from a named pipe (fifo) or from a device. Use it to recover as - much of the uncompressed data as possible when decompressing a - corrupt file. + Compress or decompress to standard output; keep input files + unchanged. If compressing several files, each file is compressed + independently. This option is needed when reading from a named + pipe (fifo) or from a device. Use it also to recover as much of + the uncompressed data as possible when decompressing a corrupt + file. '-d' '--decompress' - Decompress. + Decompress the specified file(s). If a file does not exist or + can't be opened, clzip continues decompressing the rest of the + files. If a file fails to decompress, clzip exits immediately + without decompressing the rest of the files. '-f' '--force' @@ -211,12 +227,13 @@ The format for running clzip is: '-s BYTES' '--dictionary-size=BYTES' - Set the dictionary size limit in bytes. Valid values range from 4 - KiB to 512 MiB. Clzip will use the smallest possible dictionary - size for each file without exceeding this limit. Note that - dictionary sizes are quantized. If the specified size does not - match one of the valid sizes, it will be rounded upwards by adding - up to (BYTES / 16) to it. + Set the dictionary size limit in bytes. Clzip will use the smallest + possible dictionary size for each file without exceeding this + limit. Valid values range from 4 KiB to 512 MiB. Values 12 to 29 + are interpreted as powers of two, meaning 2^12 to 2^29 bytes. Note + that dictionary sizes are quantized. If the specified size does + not match one of the valid sizes, it will be rounded upwards by + adding up to (BYTES / 8) to it. For maximum compression you should use a dictionary size limit as large as possible, but keep in mind that the decompression memory @@ -228,16 +245,17 @@ The format for running clzip is: Split the compressed output into several volume files with names 'original_name00001.lz', 'original_name00002.lz', etc, and set the volume size limit to BYTES. Each volume is a complete, maybe - multi-member, lzip file. A small volume size may degrade - compression ratio, so use it only when needed. Valid values range - from 100 kB to 4 EiB. + multimember, lzip file. A small volume size may degrade compression + ratio, so use it only when needed. Valid values range from 100 kB + to 4 EiB. '-t' '--test' Check integrity of the specified file(s), but don't decompress them. This really performs a trial decompression and throws away the result. Use it together with '-v' to see information about - the file. + the file(s). If a file fails the test, clzip continues checking + the rest of the files. '-v' '--verbose' @@ -246,18 +264,19 @@ The format for running clzip is: processed. A second '-v' shows the progress of compression. When decompressing or testing, further -v's (up to 4) increase the verbosity level, showing status, compression ratio, dictionary - size, and trailer contents (CRC, data size, member size). + size, trailer contents (CRC, data size, member size), and up to 6 + bytes of trailing data (if any). '-0 .. -9' Set the compression parameters (dictionary size and match length - limit) as shown in the table below. Note that '-9' can be much - slower than '-0'. These options have no effect when decompressing. + limit) as shown in the table below. The default compression level + is '-6'. Note that '-9' can be much slower than '-0'. These + options have no effect when decompressing. The bidimensional parameter space of LZMA can't be mapped to a linear scale optimal for all files. If your files are large, very - repetitive, etc, you may need to use the '--match-length' and - '--dictionary-size' options directly to achieve optimal - performance. + repetitive, etc, you may need to use the '--dictionary-size' and + '--match-length' options directly to achieve optimal performance. Level Dictionary size Match length limit -0 64 KiB 16 bytes @@ -327,12 +346,12 @@ additional information before, between, or after them. Each member has the following structure: +--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -| ID string | VN | DS | Lzma stream | CRC32 | Data size | Member size | +| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size | +--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ All multibyte values are stored in little endian order. -'ID string' +'ID string (the "magic" bytes)' A four byte string, identifying the lzip format, with the value "LZIP" (0x4C, 0x5A, 0x49, 0x50). @@ -350,8 +369,8 @@ additional information before, between, or after them. Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB Valid values for dictionary size range from 4 KiB to 512 MiB. -'Lzma stream' - The lzma stream, finished by an end of stream marker. Uses default +'LZMA stream' + The LZMA stream, finished by an end of stream marker. Uses default values for encoder properties. *Note Stream format: (lzip)Stream format, for a complete description. @@ -365,11 +384,11 @@ additional information before, between, or after them. Total size of the member, including header and trailer. This field acts as a distributed index, allows the verification of stream integrity, and facilitates safe recovery of undamaged members from - multi-member files. + multimember files. -File: clzip.info, Node: Algorithm, Next: Examples, Prev: File format, Up: Top +File: clzip.info, Node: Algorithm, Next: Trailing data, Prev: File format, Up: Top 4 Algorithm *********** @@ -435,15 +454,48 @@ range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI). -File: clzip.info, Node: Examples, Next: Problems, Prev: Algorithm, Up: Top +File: clzip.info, Node: Trailing data, Next: Examples, Prev: Algorithm, Up: Top + +5 Extra data appended to the file +********************************* + +Sometimes extra data is found appended to a lzip file after the last +member. Such trailing data may be: + + * Padding added to make the file size a multiple of some block size, + for example when writing to a tape. + + * Garbage added by some not totally successful copy operation. + + * Useful data added by the user; a cryptographically secure hash, a + description of file contents, etc. + + * Malicious data added to the file in order to make its total size + and hash value (for a chosen hash) coincide with those of another + file. -5 A small tutorial with examples + * In very rare cases, trailing data could be the corrupt header of + another member. In multimember or concatenated files the + probability of corruption happening in the magic bytes is 5 times + smaller than the probability of getting a false positive caused by + the corruption of the integrity information itself. Therefore it + can be considered to be below the noise level. + + Trailing data can be safely ignored in most cases. In some cases, +like that of user-added data, it is expected to be ignored. In those +cases where a file containing trailing data must be rejected, the option +'--trailing-error' can be used. *Note --trailing-error::. + + +File: clzip.info, Node: Examples, Next: Problems, Prev: Trailing data, Up: Top + +6 A small tutorial with examples ******************************** WARNING! Even if clzip is bug-free, other causes may result in a corrupt compressed file (bugs in the system libraries, memory errors, etc). Therefore, if the data you are going to compress are important, give the -'--keep' option to clzip and do not remove the original file until you +'--keep' option to clzip and don't remove the original file until you verify the compressed file with a command like 'clzip -cd file.lz | cmp file -'. @@ -454,8 +506,8 @@ and show the compression ratio. clzip -v file -Example 2: Like example 1 but the created 'file.lz' is multi-member -with a member size of 1 MiB. The compression ratio is not shown. +Example 2: Like example 1 but the created 'file.lz' is multimember with +a member size of 1 MiB. The compression ratio is not shown. clzip -b 1MiB file @@ -472,37 +524,46 @@ show status. clzip -tv file.lz -Example 5: Compress a whole floppy in /dev/fd0 and send the output to +Example 5: Compress a whole device in /dev/sdc and send the output to 'file.lz'. - clzip -c /dev/fd0 > file.lz + clzip -c /dev/sdc > file.lz + + +Example 6: The right way of concatenating compressed files. *Note +Trailing data::. + + Don't do this + cat file1.lz file2.lz file3.lz | clzip -d + Do this instead + clzip -cd file1.lz file2.lz file3.lz -Example 6: Decompress 'file.lz' partially until 10 KiB of decompressed +Example 7: Decompress 'file.lz' partially until 10 KiB of decompressed data are produced. clzip -cd file.lz | dd bs=1024 count=10 -Example 7: Decompress 'file.lz' partially from decompressed byte 10000 +Example 8: Decompress 'file.lz' partially from decompressed byte 10000 to decompressed byte 15000 (5000 bytes are produced). clzip -cd file.lz | dd bs=1000 skip=10 count=5 -Example 8: Create a multivolume compressed tar archive with a volume +Example 9: Create a multivolume compressed tar archive with a volume size of 1440 KiB. tar -c some_directory | clzip -S 1440KiB -o volume_name -Example 9: Extract a multivolume compressed tar archive. +Example 10: Extract a multivolume compressed tar archive. clzip -cd volume_name*.lz | tar -xf - -Example 10: Create a multivolume compressed backup of a large database -file with a volume size of 650 MB, where each volume is a multi-member +Example 11: Create a multivolume compressed backup of a large database +file with a volume size of 650 MB, where each volume is a multimember file with a member size of 32 MiB. clzip -b 32MiB -S 650MB big_db @@ -510,7 +571,7 @@ file with a member size of 32 MiB. File: clzip.info, Node: Problems, Next: Concept index, Prev: Examples, Up: Top -6 Reporting bugs +7 Reporting bugs **************** There are probably bugs in clzip. There are certainly errors and @@ -539,6 +600,7 @@ Concept index * introduction: Introduction. (line 6) * invoking: Invoking clzip. (line 6) * options: Invoking clzip. (line 6) +* trailing data: Trailing data. (line 6) * usage: Invoking clzip. (line 6) * version: Invoking clzip. (line 6) @@ -546,13 +608,16 @@ Concept index Tag Table: Node: Top210 -Node: Introduction893 -Node: Invoking clzip6152 -Node: File format11705 -Node: Algorithm14108 -Node: Examples16933 -Node: Problems18900 -Node: Concept index19426 +Node: Introduction952 +Node: Invoking clzip6164 +Ref: --trailing-error6730 +Node: File format12728 +Node: Algorithm15150 +Node: Trailing data17980 +Node: Examples19355 +Ref: concat-example20537 +Node: Problems21544 +Node: Concept index22070 End Tag Table diff --git a/doc/clzip.texi b/doc/clzip.texi index e2ca889..331d4eb 100644 --- a/doc/clzip.texi +++ b/doc/clzip.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 7 July 2015 -@set VERSION 1.7 +@set UPDATED 13 May 2016 +@set VERSION 1.8 @dircategory Data Compression @direntry @@ -39,13 +39,14 @@ This manual is for Clzip (version @value{VERSION}, @value{UPDATED}). * Invoking clzip:: Command line interface * File format:: Detailed format of the compressed file * Algorithm:: How clzip compresses the data +* Trailing data:: Extra data appended to the file * Examples:: A small tutorial with examples * Problems:: Reporting bugs * Concept index:: Index of concepts @end menu @sp 1 -Copyright @copyright{} 2010-2015 Antonio Diaz Diaz. +Copyright @copyright{} 2010-2016 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -78,7 +79,7 @@ program can repair bit-flip errors (one of the most common forms of data corruption) in lzip files, and provides data recovery capabilities, including error-checked merging of damaged copies of a file. @ifnothtml -@ref{Data safety,,,lziprecover}. +@xref{Data safety,,,lziprecover}. @end ifnothtml @item @@ -101,14 +102,14 @@ corrupt byte near the beginning is a thing of the past. The member trailer stores the 32-bit CRC of the original data, the size of the original data and the size of the member. These values, together -with the value remaining in the range decoder and the end-of-stream -marker, provide a 4 factor integrity checking which guarantees that the -decompressed version of the data is identical to the original. This -guards against corruption of the compressed data, and against undetected -bugs in clzip (hopefully very unlikely). The chances of data corruption -going undetected are microscopic. Be aware, though, that the check -occurs upon decompression, so it can only tell you that something is -wrong. It can't help you recover the original uncompressed data. +with the end-of-stream marker, provide a 3 factor integrity checking +which guarantees that the decompressed version of the data is identical +to the original. This guards against corruption of the compressed data, +and against undetected bugs in clzip (hopefully very unlikely). The +chances of data corruption going undetected are microscopic. Be aware, +though, that the check occurs upon decompression, so it can only tell +you that something is wrong. It can't help you recover the original +uncompressed data. Clzip uses the same well-defined exit status values used by lzip and bzip2, which makes it safer than compressors returning ambiguous warning @@ -157,14 +158,14 @@ or more compressed files. The result is the concatenation of the corresponding uncompressed files. Integrity testing of concatenated compressed files is also supported. -Clzip can produce multi-member files and safely recover, with +Clzip can produce multimember files and safely recover, with lziprecover, the undamaged members in case of file damage. Clzip can also split the compressed output in volumes of a given size, even when reading from standard input. This allows the direct creation of multivolume compressed tar archives. Clzip is able to compress and decompress streams of unlimited size by -automatically creating multi-member output. The members so created are +automatically creating multimember output. The members so created are large, about 2 PiB each. @@ -181,6 +182,11 @@ The format for running clzip is: clzip [@var{options}] [@var{files}] @end example +@noindent +@samp{-} used as a @var{file} argument means standard input. It can be +mixed with other @var{files} and is read just once, the first time it +appears in the command line. + Clzip supports the following options: @table @code @@ -192,6 +198,13 @@ Print an informative help message describing the options and exit. @itemx --version Print the version number of clzip on the standard output and exit. +@anchor{--trailing-error} +@item -a +@itemx --trailing-error +Exit with error status 2 if any remaining input is detected after +decompressing the last member. Such remaining input is usually trailing +garbage that can be safely ignored. @xref{concat-example}. + @item -b @var{bytes} @itemx --member-size=@var{bytes} Set the member size limit to @var{bytes}. A small member size may @@ -200,13 +213,18 @@ range from 100 kB to 2 PiB. Defaults to 2 PiB. @item -c @itemx --stdout -Compress or decompress to standard output. Needed when reading from a -named pipe (fifo) or from a device. Use it to recover as much of the -uncompressed data as possible when decompressing a corrupt file. +Compress or decompress to standard output; keep input files unchanged. +If compressing several files, each file is compressed independently. +This option is needed when reading from a named pipe (fifo) or from a +device. Use it also to recover as much of the uncompressed data as +possible when decompressing a corrupt file. @item -d @itemx --decompress -Decompress. +Decompress the specified file(s). If a file does not exist or can't be +opened, clzip continues decompressing the rest of the files. If a file +fails to decompress, clzip exits immediately without decompressing the +rest of the files. @item -f @itemx --force @@ -242,11 +260,13 @@ Quiet operation. Suppress all messages. @item -s @var{bytes} @itemx --dictionary-size=@var{bytes} -Set the dictionary size limit in bytes. Valid values range from 4 KiB to -512 MiB. Clzip will use the smallest possible dictionary size for each -file without exceeding this limit. Note that dictionary sizes are -quantized. If the specified size does not match one of the valid sizes, -it will be rounded upwards by adding up to (@var{bytes} / 16) to it. +Set the dictionary size limit in bytes. Clzip will use the smallest +possible dictionary size for each file without exceeding this limit. +Valid values range from 4 KiB to 512 MiB. Values 12 to 29 are +interpreted as powers of two, meaning 2^12 to 2^29 bytes. Note that +dictionary sizes are quantized. If the specified size does not match one +of the valid sizes, it will be rounded upwards by adding up to +@w{(@var{bytes} / 8)} to it. For maximum compression you should use a dictionary size limit as large as possible, but keep in mind that the decompression memory requirement @@ -257,7 +277,7 @@ is affected at compression time by the choice of dictionary size limit. Split the compressed output into several volume files with names @samp{original_name00001.lz}, @samp{original_name00002.lz}, etc, and set the volume size limit to @var{bytes}. Each volume is a complete, maybe -multi-member, lzip file. A small volume size may degrade compression +multimember, lzip file. A small volume size may degrade compression ratio, so use it only when needed. Valid values range from 100 kB to 4 EiB. @@ -265,7 +285,8 @@ EiB. @itemx --test Check integrity of the specified file(s), but don't decompress them. This really performs a trial decompression and throws away the result. -Use it together with @samp{-v} to see information about the file. +Use it together with @samp{-v} to see information about the file(s). If +a file fails the test, clzip continues checking the rest of the files. @item -v @itemx --verbose @@ -274,18 +295,19 @@ When compressing, show the compression ratio for each file processed. A second @samp{-v} shows the progress of compression.@* When decompressing or testing, further -v's (up to 4) increase the verbosity level, showing status, compression ratio, dictionary size, -and trailer contents (CRC, data size, member size). +trailer contents (CRC, data size, member size), and up to 6 bytes of +trailing data (if any). @item -0 .. -9 Set the compression parameters (dictionary size and match length limit) -as shown in the table below. Note that @samp{-9} can be much slower than -@samp{-0}. These options have no effect when decompressing. +as shown in the table below. The default compression level is @samp{-6}. +Note that @samp{-9} can be much slower than @samp{-0}. These options +have no effect when decompressing. The bidimensional parameter space of LZMA can't be mapped to a linear scale optimal for all files. If your files are large, very repetitive, -etc, you may need to use the @samp{--match-length} and -@samp{--dictionary-size} options directly to achieve optimal -performance. +etc, you may need to use the @samp{--dictionary-size} and +@samp{--match-length} options directly to achieve optimal performance. @multitable {Level} {Dictionary size} {Match length limit} @item Level @tab Dictionary size @tab Match length limit @@ -364,14 +386,14 @@ additional information before, between, or after them. Each member has the following structure: @verbatim +--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -| ID string | VN | DS | Lzma stream | CRC32 | Data size | Member size | +| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size | +--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @end verbatim All multibyte values are stored in little endian order. @table @samp -@item ID string +@item ID string (the "magic" bytes) A four byte string, identifying the lzip format, with the value "LZIP" (0x4C, 0x5A, 0x49, 0x50). @@ -388,8 +410,8 @@ from the base size to obtain the dictionary size.@* Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@* Valid values for dictionary size range from 4 KiB to 512 MiB. -@item Lzma stream -The lzma stream, finished by an end of stream marker. Uses default +@item LZMA stream +The LZMA stream, finished by an end of stream marker. Uses default values for encoder properties. @ifnothtml @xref{Stream format,,,lzip}, @@ -409,7 +431,7 @@ Size of the uncompressed original data. @item Member size (8 bytes) Total size of the member, including header and trailer. This field acts as a distributed index, allows the verification of stream integrity, and -facilitates safe recovery of undamaged members from multi-member files. +facilitates safe recovery of undamaged members from multimember files. @end table @@ -480,6 +502,44 @@ range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI). +@node Trailing data +@chapter Extra data appended to the file +@cindex trailing data + +Sometimes extra data is found appended to a lzip file after the last +member. Such trailing data may be: + +@itemize @bullet +@item +Padding added to make the file size a multiple of some block size, for +example when writing to a tape. + +@item +Garbage added by some not totally successful copy operation. + +@item +Useful data added by the user; a cryptographically secure hash, a +description of file contents, etc. + +@item +Malicious data added to the file in order to make its total size and +hash value (for a chosen hash) coincide with those of another file. + +@item +In very rare cases, trailing data could be the corrupt header of another +member. In multimember or concatenated files the probability of +corruption happening in the magic bytes is 5 times smaller than the +probability of getting a false positive caused by the corruption of the +integrity information itself. Therefore it can be considered to be below +the noise level. +@end itemize + +Trailing data can be safely ignored in most cases. In some cases, like +that of user-added data, it is expected to be ignored. In those cases +where a file containing trailing data must be rejected, the option +@samp{--trailing-error} can be used. @xref{--trailing-error}. + + @node Examples @chapter A small tutorial with examples @cindex examples @@ -487,7 +547,7 @@ LZMA), and Julian Seward (for bzip2's CLI). WARNING! Even if clzip is bug-free, other causes may result in a corrupt compressed file (bugs in the system libraries, memory errors, etc). Therefore, if the data you are going to compress are important, give the -@samp{--keep} option to clzip and do not remove the original file until +@samp{--keep} option to clzip and don't remove the original file until you verify the compressed file with a command like @w{@samp{clzip -cd file.lz | cmp file -}}. @@ -502,7 +562,7 @@ clzip -v file @sp 1 @noindent -Example 2: Like example 1 but the created @samp{file.lz} is multi-member +Example 2: Like example 1 but the created @samp{file.lz} is multimember with a member size of 1 MiB. The compression ratio is not shown. @example @@ -530,16 +590,29 @@ clzip -tv file.lz @sp 1 @noindent -Example 5: Compress a whole floppy in /dev/fd0 and send the output to +Example 5: Compress a whole device in /dev/sdc and send the output to @samp{file.lz}. @example -clzip -c /dev/fd0 > file.lz +clzip -c /dev/sdc > file.lz +@end example + +@sp 1 +@anchor{concat-example} +@noindent +Example 6: The right way of concatenating compressed files. +@xref{Trailing data}. + +@example +Don't do this + cat file1.lz file2.lz file3.lz | clzip -d +Do this instead + clzip -cd file1.lz file2.lz file3.lz @end example @sp 1 @noindent -Example 6: Decompress @samp{file.lz} partially until 10 KiB of +Example 7: Decompress @samp{file.lz} partially until 10 KiB of decompressed data are produced. @example @@ -548,7 +621,7 @@ clzip -cd file.lz | dd bs=1024 count=10 @sp 1 @noindent -Example 7: Decompress @samp{file.lz} partially from decompressed byte +Example 8: Decompress @samp{file.lz} partially from decompressed byte 10000 to decompressed byte 15000 (5000 bytes are produced). @example @@ -557,7 +630,7 @@ clzip -cd file.lz | dd bs=1000 skip=10 count=5 @sp 1 @noindent -Example 8: Create a multivolume compressed tar archive with a volume +Example 9: Create a multivolume compressed tar archive with a volume size of 1440 KiB. @example @@ -566,7 +639,7 @@ tar -c some_directory | clzip -S 1440KiB -o volume_name @sp 1 @noindent -Example 9: Extract a multivolume compressed tar archive. +Example 10: Extract a multivolume compressed tar archive. @example clzip -cd volume_name*.lz | tar -xf - @@ -574,8 +647,8 @@ clzip -cd volume_name*.lz | tar -xf - @sp 1 @noindent -Example 10: Create a multivolume compressed backup of a large database -file with a volume size of 650 MB, where each volume is a multi-member +Example 11: Create a multivolume compressed backup of a large database +file with a volume size of 650 MB, where each volume is a multimember file with a member size of 32 MiB. @example @@ -1,5 +1,5 @@ /* Clzip - LZMA lossless data compressor - Copyright (C) 2010-2015 Antonio Diaz Diaz. + Copyright (C) 2010-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,6 +28,9 @@ #include "encoder.h" +CRC32 crc32; + + int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs ) { int32_t * ptr0 = e->eb.mb.pos_array + ( e->eb.mb.cyclic_pos << 1 ); @@ -40,7 +43,7 @@ int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs ) const int min_pos = ( e->eb.mb.pos > e->eb.mb.dictionary_size ) ? e->eb.mb.pos - e->eb.mb.dictionary_size : 0; const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb ); - int count, delta, key2, key3, key4, newpos; + int count, key2, key3, key4, newpos; unsigned tmp; int len_limit = e->match_len_limit; @@ -76,7 +79,7 @@ int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs ) } if( num_pairs > 0 ) { - delta = pos1 - np2; + const int delta = pos1 - np2; while( maxlen < len_limit && data[maxlen-delta] == data[maxlen] ) ++maxlen; pairs[num_pairs-1].len = maxlen; @@ -92,6 +95,7 @@ int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs ) for( count = e->cycles; ; ) { + int delta; if( newpos <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; } delta = pos1 - newpos; @@ -196,16 +200,16 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, } if( replens[rep_index] >= e->match_len_limit ) { - e->trials[0].dis = rep_index; e->trials[0].price = replens[rep_index]; + e->trials[0].dis = rep_index; LZe_move_and_update( e, replens[rep_index] ); return replens[rep_index]; } if( main_len >= e->match_len_limit ) { - e->trials[0].dis = e->pairs[num_pairs-1].dis + num_rep_distances; e->trials[0].price = main_len; + e->trials[0].dis = e->pairs[num_pairs-1].dis + num_rep_distances; LZe_move_and_update( e, main_len ); return main_len; } @@ -218,13 +222,12 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, const uint8_t cur_byte = Mb_peek( &e->eb.mb, 0 ); const uint8_t match_byte = Mb_peek( &e->eb.mb, reps[0] + 1 ); - e->trials[0].state = state; - e->trials[1].dis = -1; /* literal */ e->trials[1].price = price0( e->eb.bm_match[state][pos_state] ); if( St_is_char( state ) ) e->trials[1].price += LZeb_price_literal( &e->eb, prev_byte, cur_byte ); else e->trials[1].price += LZeb_price_matched( &e->eb, prev_byte, cur_byte, match_byte ); + e->trials[1].dis = -1; /* literal */ if( match_byte == cur_byte ) Tr_update( &e->trials[1], rep_match_price + @@ -234,16 +237,15 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, if( num_trials < min_match_len ) { - e->trials[0].dis = e->trials[1].dis; e->trials[0].price = 1; + e->trials[0].dis = e->trials[1].dis; Mb_move_pos( &e->eb.mb ); return 1; } + e->trials[0].state = state; for( i = 0; i < num_rep_distances; ++i ) e->trials[0].reps[i] = reps[i]; - e->trials[1].prev_index = 0; - e->trials[1].prev_index2 = single_step_trial; for( len = min_match_len; len <= num_trials; ++len ) e->trials[len].price = infinite_price; @@ -556,8 +558,8 @@ bool LZe_encode_member( struct LZ_encoder * const e, { const int pos_state = ( Mb_data_position( &e->eb.mb ) - ahead ) & pos_state_mask; - const int dis = e->trials[i].dis; const int len = e->trials[i].price; + const int dis = e->trials[i].dis; bool bit = ( dis < 0 ); Re_encode_bit( &e->eb.renc, &e->eb.bm_match[state][pos_state], !bit ); @@ -1,5 +1,5 @@ /* Clzip - LZMA lossless data compressor - Copyright (C) 2010-2015 Antonio Diaz Diaz. + Copyright (C) 2010-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -306,6 +306,8 @@ static inline bool LZe_init( struct LZ_encoder * const e, Lp_init( &e->rep_len_prices, &e->eb.rep_len_model, e->match_len_limit ); e->pending_num_pairs = 0; e->num_dis_slots = 2 * real_bits( e->eb.mb.dictionary_size - 1 ); + e->trials[1].prev_index = 0; + e->trials[1].prev_index2 = single_step_trial; return true; } diff --git a/encoder_base.c b/encoder_base.c index 9ce4563..31cad3f 100644 --- a/encoder_base.c +++ b/encoder_base.c @@ -1,5 +1,5 @@ /* Clzip - LZMA lossless data compressor - Copyright (C) 2010-2015 Antonio Diaz Diaz. + Copyright (C) 2010-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/encoder_base.h b/encoder_base.h index a72442f..54fecd1 100644 --- a/encoder_base.h +++ b/encoder_base.h @@ -1,5 +1,5 @@ /* Clzip - LZMA lossless data compressor - Copyright (C) 2010-2015 Antonio Diaz Diaz. + Copyright (C) 2010-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -113,8 +113,7 @@ static inline int price_symbol_reversed( const Bit_model bm[], int symbol, } -static inline int price_matched( const Bit_model bm[], int symbol, - int match_byte ) +static inline int price_matched( const Bit_model bm[], int symbol, int match_byte ) { int price = 0; int mask = 0x100; @@ -409,8 +408,8 @@ static inline bool LZeb_init( struct LZ_encoder_base * const eb, const int before, const int dict_size, const int after_size, const int dict_factor, const int num_prev_positions23, - const int pos_array_factor, const int ifd, - const int outfd ) + const int pos_array_factor, + const int ifd, const int outfd ) { if( !Mb_init( &eb->mb, before, dict_size, after_size, dict_factor, num_prev_positions23, pos_array_factor, ifd ) ) return false; diff --git a/fast_encoder.c b/fast_encoder.c index 211f74d..941c0e2 100644 --- a/fast_encoder.c +++ b/fast_encoder.c @@ -1,5 +1,5 @@ /* Clzip - LZMA lossless data compressor - Copyright (C) 2010-2015 Antonio Diaz Diaz. + Copyright (C) 2010-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -43,7 +43,6 @@ int FLZe_longest_match_len( struct FLZ_encoder * const fe, int * const distance newpos = fe->eb.mb.prev_positions[fe->key4]; fe->eb.mb.prev_positions[fe->key4] = pos1; - for( count = 4; ; ) { if( --count < 0 || newpos <= 0 ) { *ptr0 = 0; break; } diff --git a/fast_encoder.h b/fast_encoder.h index 797649b..df1741d 100644 --- a/fast_encoder.h +++ b/fast_encoder.h @@ -1,5 +1,5 @@ /* Clzip - LZMA lossless data compressor - Copyright (C) 2010-2015 Antonio Diaz Diaz. + Copyright (C) 2010-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* Clzip - LZMA lossless data compressor - Copyright (C) 2010-2015 Antonio Diaz Diaz. + Copyright (C) 2010-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -50,6 +50,7 @@ enum { max_dictionary_bits = 29, max_dictionary_size = 1 << max_dictionary_bits, literal_context_bits = 3, + literal_pos_state_bits = 0, /* not used */ pos_state_bits = 2, pos_states = 1 << pos_state_bits, pos_state_mask = pos_states - 1, @@ -90,8 +91,8 @@ typedef int Bit_model; static inline void Bm_init( Bit_model * const probability ) { *probability = bit_model_total / 2; } -static inline void Bm_array_init( Bit_model * const p, const int size ) - { int i = 0; while( i < size ) p[i++] = bit_model_total / 2; } +static inline void Bm_array_init( Bit_model bm[], const int size ) + { int i; for( i = 0; i < size; ++i ) Bm_init( &bm[i] ); } struct Len_model { @@ -121,7 +122,8 @@ struct Pretty_print }; static inline void Pp_init( struct Pretty_print * const pp, - const char * const filenames[], const int num_filenames ) + const char * const filenames[], + const int num_filenames, const int verbosity ) { unsigned stdin_name_len; int i; @@ -131,6 +133,7 @@ static inline void Pp_init( struct Pretty_print * const pp, pp->first_post = false; stdin_name_len = strlen( pp->stdin_name ); + if( verbosity <= 0 ) return; for( i = 0; i < num_filenames; ++i ) { const char * const s = filenames[i]; @@ -184,6 +187,11 @@ static inline void CRC32_update_buf( uint32_t * const crc, } +static inline bool isvalid_ds( const unsigned dictionary_size ) + { return ( dictionary_size >= min_dictionary_size && + dictionary_size <= max_dictionary_size ); } + + static inline int real_bits( unsigned value ) { int bits = 0; @@ -205,6 +213,14 @@ static inline void Fh_set_magic( File_header data ) static inline bool Fh_verify_magic( const File_header data ) { return ( memcmp( data, magic_string, 4 ) == 0 ); } +/* detect truncated header */ +static inline bool Fh_verify_prefix( const File_header data, const int size ) + { + int i; for( i = 0; i < size && i < 4; ++i ) + if( data[i] != magic_string[i] ) return false; + return ( size > 0 ); + } + static inline uint8_t Fh_version( const File_header data ) { return data[4]; } @@ -221,21 +237,18 @@ static inline unsigned Fh_get_dictionary_size( const File_header data ) static inline bool Fh_set_dictionary_size( File_header data, const unsigned sz ) { - if( sz >= min_dictionary_size && sz <= max_dictionary_size ) + if( !isvalid_ds( sz ) ) return false; + data[5] = real_bits( sz - 1 ); + if( sz > min_dictionary_size ) { - data[5] = real_bits( sz - 1 ); - if( sz > min_dictionary_size ) - { - const unsigned base_size = 1 << data[5]; - const unsigned fraction = base_size / 16; - int i; - for( i = 7; i >= 1; --i ) - if( base_size - ( i * fraction ) >= sz ) - { data[5] |= ( i << 5 ); break; } - } - return true; + const unsigned base_size = 1 << data[5]; + const unsigned fraction = base_size / 16; + int i; + for( i = 7; i >= 1; --i ) + if( base_size - ( i * fraction ) >= sz ) + { data[5] |= ( i << 5 ); break; } } - return false; + return true; } @@ -1,5 +1,5 @@ /* Clzip - LZMA lossless data compressor - Copyright (C) 2010-2015 Antonio Diaz Diaz. + Copyright (C) 2010-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -23,6 +23,7 @@ #define _FILE_OFFSET_BITS 64 +#include <ctype.h> #include <errno.h> #include <fcntl.h> #include <limits.h> @@ -66,10 +67,11 @@ #error "Environments where CHAR_BIT != 8 are not supported." #endif +int verbosity = 0; const char * const Program_name = "Clzip"; const char * const program_name = "clzip"; -const char * const program_year = "2015"; +const char * const program_year = "2016"; const char * invocation_name = 0; struct { const char * from; const char * to; } const known_extensions[] = { @@ -87,10 +89,6 @@ enum Mode { m_compress, m_decompress, m_test }; char * output_filename = 0; int outfd = -1; -int verbosity = 0; -const mode_t usr_rw = S_IRUSR | S_IWUSR; -const mode_t all_rw = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; -mode_t outfd_mode = S_IRUSR | S_IWUSR; bool delete_output_on_interrupt = false; @@ -101,14 +99,15 @@ static void show_help( void ) printf( "\nOptions:\n" " -h, --help display this help and exit\n" " -V, --version output version information and exit\n" + " -a, --trailing-error exit with error status if trailing data\n" " -b, --member-size=<bytes> set member size limit in bytes\n" - " -c, --stdout send output to standard output\n" + " -c, --stdout write to standard output, keep input files\n" " -d, --decompress decompress\n" " -f, --force overwrite existing output files\n" " -F, --recompress force re-compression of compressed files\n" " -k, --keep keep (don't delete) input files\n" " -m, --match-length=<bytes> set match length limit in bytes [36]\n" - " -o, --output=<file> if reading stdin, place the output into <file>\n" + " -o, --output=<file> if reading standard input, write to <file>\n" " -q, --quiet suppress all messages\n" " -s, --dictionary-size=<bytes> set dictionary size limit in bytes [8 MiB]\n" " -S, --volume-size=<bytes> set volume size limit in bytes\n" @@ -117,13 +116,15 @@ static void show_help( void ) " -0 .. -9 set compression level [default 6]\n" " --fast alias for -0\n" " --best alias for -9\n" - "If no file names are given, clzip compresses or decompresses\n" - "from standard input to standard output.\n" + "If no file names are given, or if a file is '-', clzip compresses or\n" + "decompresses from standard input to standard output.\n" "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" - "The bidimensional parameter space of LZMA can't be mapped to a linear\n" + "Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12\n" + "to 2^29 bytes.\n" + "\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n" "scale optimal for all files. If your files are large, very repetitive,\n" - "etc, you may need to use the --match-length and --dictionary-size\n" + "etc, you may need to use the --dictionary-size and --match-length\n" "options directly to achieve optimal performance.\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" @@ -181,11 +182,10 @@ static unsigned long long getnum( const char * const ptr, if( !errno && tail[0] ) { const int factor = ( tail[1] == 'i' ) ? 1024 : 1000; - int exponent = 0, i; - bool bad_multiplier = false; + int exponent = 0; /* 0 = bad multiplier */ + int i; switch( tail[0] ) { - case ' ': break; case 'Y': exponent = 8; break; case 'Z': exponent = 7; break; case 'E': exponent = 6; break; @@ -193,13 +193,10 @@ static unsigned long long getnum( const char * const ptr, case 'T': exponent = 4; break; case 'G': exponent = 3; break; case 'M': exponent = 2; break; - case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true; - break; - case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true; - break; - default : bad_multiplier = true; + case 'K': if( factor == 1024 ) exponent = 1; break; + case 'k': if( factor == 1000 ) exponent = 1; break; } - if( bad_multiplier ) + if( exponent <= 0 ) { show_error( "Bad multiplier in numerical argument.", 0, true ); exit( 1 ); @@ -274,7 +271,7 @@ static int open_instream( const char * const name, struct stat * const in_statsp const bool can_read = ( i == 0 && ( S_ISBLK( mode ) || S_ISCHR( mode ) || S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); - const bool no_ofile = to_stdout || program_mode == m_test; + const bool no_ofile = ( to_stdout || program_mode == m_test ); if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || !no_ofile ) ) ) { if( verbosity >= 0 ) @@ -340,13 +337,17 @@ static void set_d_outname( const char * const name, const int i ) } -static bool open_outstream( const bool force ) +static bool open_outstream( const bool force, const bool from_stdin ) { + const mode_t usr_rw = S_IRUSR | S_IWUSR; + const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; + const mode_t outfd_mode = from_stdin ? all_rw : usr_rw; int flags = O_CREAT | O_WRONLY | O_BINARY; if( force ) flags |= O_TRUNC; else flags |= O_EXCL; outfd = open( output_filename, flags, outfd_mode ); - if( outfd < 0 && verbosity >= 0 ) + if( outfd >= 0 ) delete_output_on_interrupt = true; + else if( verbosity >= 0 ) { if( errno == EEXIST ) fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n", @@ -407,7 +408,11 @@ static void close_and_set_permissions( const struct stat * const in_statsp ) fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 ) warning = true; } - if( close( outfd ) != 0 ) cleanup_and_fail( 1 ); + if( close( outfd ) != 0 ) + { + show_error( "Error closing output file", errno, false ); + cleanup_and_fail( 1 ); + } outfd = -1; delete_output_on_interrupt = false; if( in_statsp ) @@ -481,8 +486,8 @@ static int compress( const unsigned long long member_size, } if( error ) { - show_error( "Not enough memory. Try a smaller dictionary size.", 0, false ); - cleanup_and_fail( 1 ); + Pp_show_msg( pp, "Not enough memory. Try a smaller dictionary size." ); + return 1; } } @@ -508,8 +513,7 @@ static int compress( const unsigned long long member_size, close_and_set_permissions( in_statsp ); if( !next_filename() ) { Pp_show_msg( pp, "Too many volume files." ); retval = 1; break; } - if( !open_outstream( true ) ) { retval = 1; break; } - delete_output_on_interrupt = true; + if( !open_outstream( true, !in_statsp ) ) { retval = 1; break; } } } } @@ -534,8 +538,51 @@ static int compress( const unsigned long long member_size, } +static unsigned char xdigit( const int value ) + { + if( value >= 0 && value <= 9 ) return '0' + value; + if( value >= 10 && value <= 15 ) return 'A' + value - 10; + return 0; + } + + +static bool show_trailing_data( const uint8_t * const data, const int size, + struct Pretty_print * const pp, const bool all, + const bool ignore_trailing ) + { + if( verbosity >= 4 || !ignore_trailing ) + { + int i; + char buf[80]; + int len = snprintf( buf, sizeof buf, "%strailing data = ", + all ? "" : "first bytes of " ); + bool text = true; + for( i = 0; i < size; ++i ) + if( !isprint( data[i] ) ) { text = false; break; } + if( text ) + { + if( len > 0 && len < (int)sizeof buf ) + snprintf( buf + len, sizeof buf - len, "'%.*s'", size, (const char *)data ); + } + else + { + for( i = 0; i < size && len > 0 && len + 3 < (int)sizeof buf; ++i ) + { + if( i > 0 ) buf[len++] = ' '; + buf[len++] = xdigit( data[i] >> 4 ); + buf[len++] = xdigit( data[i] & 0x0F ); + buf[len] = 0; + } + } + Pp_show_msg( pp, buf ); + if( !ignore_trailing ) show_error( "Trailing data not allowed.", 0, false ); + } + return ignore_trailing; + } + + static int decompress( const int infd, struct Pretty_print * const pp, - const bool testing ) + const bool ignore_trailing, const bool testing ) { unsigned long long partial_file_pos = 0; struct Range_decoder rdec; @@ -549,24 +596,30 @@ static int decompress( const int infd, struct Pretty_print * const pp, for( first_member = true; ; first_member = false ) { - int result; + int result, size; unsigned dictionary_size; File_header header; struct LZ_decoder decoder; Rd_reset_member_position( &rdec ); - Rd_read_data( &rdec, header, Fh_size ); + size = Rd_read_data( &rdec, header, Fh_size ); if( Rd_finished( &rdec ) ) /* End Of File */ { - if( first_member ) + if( first_member || Fh_verify_prefix( header, size ) ) { Pp_show_msg( pp, "File ends unexpectedly at member header." ); retval = 2; } + else if( size > 0 && !show_trailing_data( header, size, pp, + true, ignore_trailing ) ) + retval = 2; break; } if( !Fh_verify_magic( header ) ) { - if( !first_member ) break; /* trailing garbage */ - Pp_show_msg( pp, "Bad magic number (file not in lzip format)." ); - retval = 2; break; + if( first_member ) + { Pp_show_msg( pp, "Bad magic number (file not in lzip format)." ); + retval = 2; } + else if( !show_trailing_data( header, size, pp, false, ignore_trailing ) ) + retval = 2; + break; } if( !Fh_verify_version( header ) ) { @@ -577,8 +630,7 @@ static int decompress( const int infd, struct Pretty_print * const pp, retval = 2; break; } dictionary_size = Fh_get_dictionary_size( header ); - if( dictionary_size < min_dictionary_size || - dictionary_size > max_dictionary_size ) + if( !isvalid_ds( dictionary_size ) ) { Pp_show_msg( pp, "Invalid dictionary size in member header." ); retval = 2; break; } @@ -586,10 +638,7 @@ static int decompress( const int infd, struct Pretty_print * const pp, { Pp_show_msg( pp, 0 ); show_header( dictionary_size ); } if( !LZd_init( &decoder, &rdec, dictionary_size, outfd ) ) - { - show_error( "Not enough memory.", 0, false ); - cleanup_and_fail( 1 ); - } + { Pp_show_msg( pp, "Not enough memory." ); retval = 1; break; } result = LZd_decode_member( &decoder, pp ); partial_file_pos += Rd_member_position( &rdec ); LZd_free( &decoder ); @@ -631,18 +680,16 @@ static void set_signals( void ) void show_error( const char * const msg, const int errcode, const bool help ) { - if( verbosity >= 0 ) + if( verbosity < 0 ) return; + if( msg && msg[0] ) { - if( msg && msg[0] ) - { - fprintf( stderr, "%s: %s", program_name, msg ); - if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) ); - fputc( '\n', stderr ); - } - if( help ) - fprintf( stderr, "Try '%s --help' for more information.\n", - invocation_name ); + fprintf( stderr, "%s: %s", program_name, msg ); + if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) ); + fputc( '\n', stderr ); } + if( help ) + fprintf( stderr, "Try '%s --help' for more information.\n", + invocation_name ); } @@ -664,18 +711,16 @@ void show_progress( const unsigned long long partial_size, static const struct Matchfinder_base * mb = 0; static struct Pretty_print * pp = 0; - if( verbosity >= 2 ) + if( verbosity < 2 ) return; + if( m ) /* initialize static vars */ + { csize = cfile_size; psize = partial_size; mb = m; pp = p; } + if( mb && pp ) { - if( m ) /* initialize static vars */ - { csize = cfile_size; psize = partial_size; mb = m; pp = p; } - if( mb && pp ) - { - const unsigned long long pos = psize + Mb_data_position( mb ); - if( csize > 0 ) - fprintf( stderr, "%4llu%%", pos / csize ); - fprintf( stderr, " %.1f MB\r", pos / 1000000.0 ); - Pp_reset( pp ); Pp_show_msg( pp, 0 ); /* restore cursor position */ - } + const unsigned long long pos = psize + Mb_data_position( mb ); + if( csize > 0 ) + fprintf( stderr, "%4llu%%", pos / csize ); + fprintf( stderr, " %.1f MB\r", pos / 1000000.0 ); + Pp_reset( pp ); Pp_show_msg( pp, 0 ); /* restore cursor position */ } } @@ -712,7 +757,9 @@ int main( const int argc, const char * const argv[] ) int i; bool filenames_given = false; bool force = false; + bool ignore_trailing = true; bool keep_input_files = false; + bool stdin_used = false; bool recompress = false; bool to_stdout = false; bool zero = false; @@ -730,6 +777,7 @@ int main( const int argc, const char * const argv[] ) { '7', 0, ap_no }, { '8', 0, ap_no }, { '9', "best", ap_no }, + { 'a', "trailing-error", ap_no }, { 'b', "member-size", ap_yes }, { 'c', "stdout", ap_no }, { 'd', "decompress", ap_no }, @@ -769,6 +817,7 @@ int main( const int argc, const char * const argv[] ) case '5': case '6': case '7': case '8': case '9': zero = ( code == '0' ); encoder_options = option_mapping[code-'0']; break; + case 'a': ignore_trailing = false; break; case 'b': member_size = getnum( arg, 100000, max_member_size ); break; case 'c': to_stdout = true; break; case 'd': program_mode = m_decompress; break; @@ -819,7 +868,7 @@ int main( const int argc, const char * const argv[] ) ( filenames_given || default_output_filename[0] ) ) set_signals(); - Pp_init( &pp, filenames, num_filenames ); + Pp_init( &pp, filenames, num_filenames, verbosity ); output_filename = resize_buffer( output_filename, 1 ); for( i = 0; i < num_filenames; ++i ) @@ -831,6 +880,7 @@ int main( const int argc, const char * const argv[] ) if( !filenames[i][0] || strcmp( filenames[i], "-" ) == 0 ) { + if( stdin_used ) continue; else stdin_used = true; input_filename = ""; infd = STDIN_FILENO; if( program_mode != m_test ) @@ -844,11 +894,10 @@ int main( const int argc, const char * const argv[] ) else { output_filename = resize_buffer( output_filename, - strlen( default_output_filename ) + 1 ); + strlen( default_output_filename ) + 1 ); strcpy( output_filename, default_output_filename ); } - outfd_mode = all_rw; - if( !open_outstream( force ) ) + if( !open_outstream( force, true ) ) { if( retval < 1 ) retval = 1; close( infd ); infd = -1; @@ -872,8 +921,7 @@ int main( const int argc, const char * const argv[] ) if( program_mode == m_compress ) set_c_outname( input_filename, volume_size > 0 ); else set_d_outname( input_filename, eindex ); - outfd_mode = usr_rw; - if( !open_outstream( force ) ) + if( !open_outstream( force, false ) ) { if( retval < 1 ) retval = 1; close( infd ); infd = -1; @@ -883,17 +931,19 @@ int main( const int argc, const char * const argv[] ) } } - if( !check_tty( infd, program_mode ) ) return 1; + if( !check_tty( infd, program_mode ) ) + { + if( retval < 1 ) retval = 1; + cleanup_and_fail( retval ); + } - if( output_filename[0] && !to_stdout && program_mode != m_test ) - delete_output_on_interrupt = true; in_statsp = input_filename[0] ? &in_stats : 0; Pp_set_name( &pp, input_filename ); if( program_mode == m_compress ) tmp = compress( member_size, volume_size, infd, &encoder_options, &pp, in_statsp, zero ); else - tmp = decompress( infd, &pp, program_mode == m_test ); + tmp = decompress( infd, &pp, ignore_trailing, program_mode == m_test ); if( tmp > retval ) retval = tmp; if( tmp && program_mode != m_test ) cleanup_and_fail( retval ); diff --git a/testsuite/check.sh b/testsuite/check.sh index f64a090..52347b4 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Clzip - LZMA lossless data compressor -# Copyright (C) 2010-2015 Antonio Diaz Diaz. +# Copyright (C) 2010-2016 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -17,9 +17,16 @@ if [ ! -f "${LZIP}" ] || [ ! -x "${LZIP}" ] ; then exit 1 fi +if [ -e "${LZIP}" ] 2> /dev/null ; then true +else + echo "$0: a POSIX shell is required to run the tests" + echo "Try bash -c \"$0 $1 $2\"" + exit 1 +fi + if [ -d tmp ] ; then rm -rf tmp ; fi mkdir tmp -cd "${objdir}"/tmp +cd "${objdir}"/tmp || framework_failure cat "${testdir}"/test.txt > in || framework_failure in_lz="${testdir}"/test.txt.lz @@ -27,25 +34,22 @@ fail=0 printf "testing clzip-%s..." "$2" -"${LZIP}" -cqm4 in > /dev/null -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -cqm274 in > /dev/null -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -cqs-1 in > /dev/null -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -cqs0 in > /dev/null -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -cqs4095 in > /dev/null -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -cqs513MiB in > /dev/null -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi -printf " in: Bad magic number (file not in lzip format).\n" > msg -"${LZIP}" -t in 2> out -if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi -printf " (stdin): Bad magic number (file not in lzip format).\n" > msg -"${LZIP}" -t < in 2> out -if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi -rm -f out msg +"${LZIP}" -fkqm4 in +if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -fkqm274 in +if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -fkqs-1 in +if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -fkqs0 in +if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -fkqs4095 in +if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -fkqs513MiB in +if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -tq in +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -tq < in +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIP}" -cdq in if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIP}" -cdq < in @@ -55,26 +59,53 @@ if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" -tq if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -t "${in_lz}" || fail=1 +printf "\ntesting decompression..." + +"${LZIP}" -t "${in_lz}" +if [ $? = 0 ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIP}" -cd "${in_lz}" > copy || fail=1 cmp in copy || fail=1 printf . +rm -f copy cat "${in_lz}" > copy.lz || framework_failure -printf "to be overwritten" > copy || framework_failure -"${LZIP}" -df copy.lz || fail=1 +"${LZIP}" -dk copy.lz || fail=1 cmp in copy || fail=1 -printf . +printf "to be overwritten" > copy || framework_failure +"${LZIP}" -dq copy.lz +if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -df copy.lz +if [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; then + printf . ; else printf - ; fail=1 ; fi printf "to be overwritten" > copy || framework_failure "${LZIP}" -df -o copy < "${in_lz}" || fail=1 cmp in copy || fail=1 printf . +rm -f copy "${LZIP}" < in > anyothername || fail=1 -"${LZIP}" -d anyothername || fail=1 -cmp in anyothername.out || fail=1 -printf . +"${LZIP}" -d -o copy - anyothername - < "${in_lz}" +if [ $? = 0 ] && cmp in copy && cmp in anyothername.out ; then + printf . ; else printf - ; fail=1 ; fi +rm -f copy anyothername.out + +"${LZIP}" -tq in "${in_lz}" +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -tq foo.lz "${in_lz}" +if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -cdq in "${in_lz}" > copy +if [ $? = 2 ] && cat copy in | cmp in - ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -cdq foo.lz "${in_lz}" > copy +if [ $? = 1 ] && cmp in copy ; then printf . ; else printf - ; fail=1 ; fi +rm -f copy +cat "${in_lz}" > copy.lz || framework_failure +"${LZIP}" -dq in copy.lz +if [ $? = 2 ] && [ -e copy.lz ] && [ ! -e copy ] && [ ! -e in.out ] ; then + printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -dq foo.lz copy.lz +if [ $? = 1 ] && [ ! -e copy.lz ] && [ ! -e foo ] && cmp in copy ; then + printf . ; else printf - ; fail=1 ; fi cat in in > in2 || framework_failure "${LZIP}" -o copy2 < in2 || fail=1 @@ -84,12 +115,23 @@ cmp in2 copy2 || fail=1 printf . printf "garbage" >> copy2.lz || framework_failure +rm -f copy2 +"${LZIP}" -atq copy2.lz +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -atq < copy2.lz +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -adkq copy2.lz +if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -adkq -o copy2 < copy2.lz +if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi printf "to be overwritten" > copy2 || framework_failure "${LZIP}" -df copy2.lz || fail=1 cmp in2 copy2 || fail=1 printf . -"${LZIP}" -cfq "${in_lz}" > out +printf "\ntesting compression..." + +"${LZIP}" -cfq "${in_lz}" > out # /dev/null is a tty on OS/2 if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIP}" -cF "${in_lz}" > out || fail=1 "${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1 |