summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--ChangeLog23
-rw-r--r--INSTALL4
-rw-r--r--Makefile.in9
-rw-r--r--NEWS25
-rw-r--r--README6
-rw-r--r--carg_parser.c2
-rw-r--r--carg_parser.h2
-rwxr-xr-xconfigure14
-rw-r--r--decoder.c56
-rw-r--r--decoder.h48
-rw-r--r--doc/clzip.120
-rw-r--r--doc/clzip.info189
-rw-r--r--doc/clzip.texi165
-rw-r--r--encoder.c24
-rw-r--r--encoder.h4
-rw-r--r--encoder_base.c2
-rw-r--r--encoder_base.h9
-rw-r--r--fast_encoder.c3
-rw-r--r--fast_encoder.h2
-rw-r--r--lzip.h47
-rw-r--r--main.c196
-rwxr-xr-xtestsuite/check.sh100
22 files changed, 614 insertions, 336 deletions
diff --git a/ChangeLog b/ChangeLog
index af69db0..4377266 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2016-05-13 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.8 released.
+ * main.c: Added new option '-a, --trailing-error'.
+ * main.c (decompress): Print up to 6 bytes of trailing data
+ when '-vvvv' is specified.
+ * decoder.c (LZd_verify_trailer): Removed test of final code.
+ * main.c (main): Delete '--output' file if infd is a terminal.
+ * main.c (main): Don't use stdin more than once.
+ * lzip.texi: Added chapter 'Trailing data'.
+ * configure: Avoid warning on some shells when testing for gcc.
+ * Makefile.in: Detect the existence of install-info.
+ * testsuite/check.sh: A POSIX shell is required to run the tests.
+ * testsuite/check.sh: Don't check error messages.
+
2015-07-07 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.7 released.
@@ -16,7 +31,7 @@
* Version 1.5 released.
* Show progress of compression at verbosity level 2 (-vv).
- * main.c (show_header): Do not show header version.
+ * main.c (show_header): Don't show header version.
* Ignore option '-n, --threads' for compatibility with plzip.
* configure: Options now accept a separate argument.
@@ -48,7 +63,7 @@
* Version 1.2 released.
* main.c: Added new option '-F, --recompress'.
* main.c (decompress): Print only one status line for each
- multi-member file when only one '-v' is specified.
+ multimember file when only one '-v' is specified.
* encoder.h (Lee_update_prices): Update high length symbol prices
independently of the value of 'pos_state'. This gives better
compression for large values of '--match-length' without being
@@ -68,7 +83,7 @@
compress less but faster. (-1 now takes 43% less time for only
20% larger compressed size).
* Compression ratio of option '-9' has been slightly increased.
- * main.c (open_instream): Do not show the message
+ * main.c (open_instream): Don't show the message
" and '--stdout' was not specified" for directories, etc.
* New examples have been added to the manual.
@@ -79,7 +94,7 @@
* Translated to C from the C++ source of lzip 1.10.
-Copyright (C) 2010-2015 Antonio Diaz Diaz.
+Copyright (C) 2010-2016 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute and
diff --git a/INSTALL b/INSTALL
index e510baf..ed6f68a 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,7 +1,7 @@
Requirements
------------
You will need a C compiler.
-I use gcc 4.9.1 and 4.1.2, but the code should compile with any
+I use gcc 5.3.0 and 4.1.2, but the code should compile with any
standards compliant compiler.
Gcc is available at http://gcc.gnu.org.
@@ -62,7 +62,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above.
-Copyright (C) 2010-2015 Antonio Diaz Diaz.
+Copyright (C) 2010-2016 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.
diff --git a/Makefile.in b/Makefile.in
index f40352e..d028148 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -5,6 +5,7 @@ INSTALL_PROGRAM = $(INSTALL) -m 755
INSTALL_DATA = $(INSTALL) -m 644
INSTALL_DIR = $(INSTALL) -d -m 755
SHELL = /bin/sh
+CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
objs = carg_parser.o encoder_base.o encoder.o fast_encoder.o decoder.o main.o
@@ -69,7 +70,9 @@ install-info :
if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
$(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info"
- -install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info"
+ -if $(CAN_RUN_INSTALLINFO) ; then \
+ install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \
+ fi
install-info-compress : install-info
lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info"
@@ -92,7 +95,9 @@ uninstall-bin :
-rm -f "$(DESTDIR)$(bindir)/$(progname)"
uninstall-info :
- -install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info"
+ -if $(CAN_RUN_INSTALLINFO) ; then \
+ install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \
+ fi
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
uninstall-man :
diff --git a/NEWS b/NEWS
index d05808b..7f49444 100644
--- a/NEWS
+++ b/NEWS
@@ -1,8 +1,21 @@
-Changes in version 1.7:
+Changes in version 1.8:
-The option "-0", which produces a compression speed and ratio comparable
-to those of gzip, has been ported from lzip.
+The option "-a, --trailing-error", which makes clzip exit with error
+status 2 if any remaining input is detected after decompressing the last
+member, has been added.
-The targets "install-compress", "install-strip-compress",
-"install-info-compress" and "install-man-compress" have been added to
-the Makefile.
+When decompressing or testing, up to 6 bytes of trailing data are
+printed if "-vvvv" is specified.
+
+The test of the value remaining in the range decoder has been removed.
+(After extensive testing it has been found useless to detect corruption
+in the decompressed data. Eliminating it reduces the number of false
+positives for corruption and makes error detection more accurate).
+
+When decompressing, the file specified with the '--output' option is now
+deleted if the input is a terminal.
+
+The new chapter "Trailing data" has been added to the manual.
+
+A harmless check failure on Windows, caused by the failed comparison of
+a message in text mode, has been fixed.
diff --git a/README b/README
index e6464da..9316c4e 100644
--- a/README
+++ b/README
@@ -80,14 +80,14 @@ or more compressed files. The result is the concatenation of the
corresponding uncompressed files. Integrity testing of concatenated
compressed files is also supported.
-Clzip can produce multi-member files and safely recover, with
+Clzip can produce multimember files and safely recover, with
lziprecover, the undamaged members in case of file damage. Clzip can
also split the compressed output in volumes of a given size, even when
reading from standard input. This allows the direct creation of
multivolume compressed tar archives.
Clzip is able to compress and decompress streams of unlimited size by
-automatically creating multi-member output. The members so created are
+automatically creating multimember output. The members so created are
large, about 2 PiB each.
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
@@ -115,7 +115,7 @@ range encoding), Igor Pavlov (for putting all the above together in
LZMA), and Julian Seward (for bzip2's CLI).
-Copyright (C) 2010-2015 Antonio Diaz Diaz.
+Copyright (C) 2010-2016 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.
diff --git a/carg_parser.c b/carg_parser.c
index 8d74ea6..3d4e89f 100644
--- a/carg_parser.c
+++ b/carg_parser.c
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
- Copyright (C) 2006-2015 Antonio Diaz Diaz.
+ Copyright (C) 2006-2016 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
diff --git a/carg_parser.h b/carg_parser.h
index ed4d9c5..e918942 100644
--- a/carg_parser.h
+++ b/carg_parser.h
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
- Copyright (C) 2006-2015 Antonio Diaz Diaz.
+ Copyright (C) 2006-2016 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
diff --git a/configure b/configure
index d848018..bdfc775 100755
--- a/configure
+++ b/configure
@@ -1,12 +1,12 @@
#! /bin/sh
# configure script for Clzip - LZMA lossless data compressor
-# Copyright (C) 2010-2015 Antonio Diaz Diaz.
+# Copyright (C) 2010-2016 Antonio Diaz Diaz.
#
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
pkgname=clzip
-pkgversion=1.7
+pkgversion=1.8
progname=clzip
srctrigger=doc/${pkgname}.texi
@@ -26,8 +26,8 @@ CFLAGS='-Wall -W -O2'
LDFLAGS=
# checking whether we are using GNU C.
-${CC} --version > /dev/null 2>&1
-if [ $? != 0 ] ; then
+if /bin/sh -c "${CC} --version" > /dev/null 2>&1 ; then true
+else
CC=cc
CFLAGS='-W -O2'
fi
@@ -139,7 +139,7 @@ if [ -z "${no_create}" ] ; then
rm -f config.status
cat > config.status << EOF
#! /bin/sh
-# This file was generated automatically by configure. Do not edit.
+# This file was generated automatically by configure. Don't edit.
# Run this file to recreate the current configuration.
#
# This script is free software: you have unlimited permission
@@ -165,8 +165,8 @@ echo "LDFLAGS = ${LDFLAGS}"
rm -f Makefile
cat > Makefile << EOF
# Makefile for Clzip - LZMA lossless data compressor
-# Copyright (C) 2010-2015 Antonio Diaz Diaz.
-# This file was generated automatically by configure. Do not edit.
+# Copyright (C) 2010-2016 Antonio Diaz Diaz.
+# This file was generated automatically by configure. Don't edit.
#
# This Makefile is free software: you have unlimited permission
# to copy, distribute and modify it.
diff --git a/decoder.c b/decoder.c
index dc12b72..942ec60 100644
--- a/decoder.c
+++ b/decoder.c
@@ -1,5 +1,5 @@
/* Clzip - LZMA lossless data compressor
- Copyright (C) 2010-2015 Antonio Diaz Diaz.
+ Copyright (C) 2010-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -29,19 +29,17 @@
#include "decoder.h"
-CRC32 crc32;
-
-
void Pp_show_msg( struct Pretty_print * const pp, const char * const msg )
{
if( verbosity >= 0 )
{
if( pp->first_post )
{
- int i, len = pp->longest_name - strlen( pp->name );
+ unsigned i;
pp->first_post = false;
fprintf( stderr, " %s: ", pp->name );
- for( i = 0; i < len; ++i ) fputc( ' ', stderr );
+ for( i = strlen( pp->name ); i < pp->longest_name; ++i )
+ fputc( ' ', stderr );
if( !msg ) fflush( stderr );
}
if( msg ) fprintf( stderr, "%s\n", msg );
@@ -110,8 +108,8 @@ void LZd_flush_data( struct LZ_decoder * const d )
if( d->outfd >= 0 &&
writeblock( d->outfd, d->buffer + d->stream_pos, size ) != size )
{ show_error( "Write error", errno, false ); cleanup_and_fail( 1 ); }
- if( d->pos >= d->buffer_size )
- { d->partial_data_pos += d->pos; d->pos = 0; }
+ if( d->pos >= d->dictionary_size )
+ { d->partial_data_pos += d->pos; d->pos = 0; d->pos_wrapped = true; }
d->stream_pos = d->pos;
}
}
@@ -121,13 +119,11 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
struct Pretty_print * const pp )
{
File_trailer trailer;
- const unsigned long long member_size = Rd_member_position( d->rdec ) + Ft_size;
- unsigned long long trailer_data_size;
- unsigned long long trailer_member_size;
- unsigned trailer_crc;
+ int size = Rd_read_data( d->rdec, trailer, Ft_size );
+ const unsigned long long data_size = LZd_data_position( d );
+ const unsigned long long member_size = Rd_member_position( d->rdec );
bool error = false;
- int size = Rd_read_data( d->rdec, trailer, Ft_size );
if( size < Ft_size )
{
error = true;
@@ -140,52 +136,44 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
while( size < Ft_size ) trailer[size++] = 0;
}
- if( d->rdec->code != 0 )
- {
- error = true;
- Pp_show_msg( pp, "Range decoder final code is not zero." );
- }
- trailer_crc = Ft_get_data_crc( trailer );
- if( trailer_crc != LZd_crc( d ) )
+ if( Ft_get_data_crc( trailer ) != LZd_crc( d ) )
{
error = true;
if( verbosity >= 0 )
{
Pp_show_msg( pp, 0 );
fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X\n",
- trailer_crc, LZd_crc( d ) );
+ Ft_get_data_crc( trailer ), LZd_crc( d ) );
}
}
- trailer_data_size = Ft_get_data_size( trailer );
- if( trailer_data_size != LZd_data_position( d ) )
+ if( Ft_get_data_size( trailer ) != data_size )
{
error = true;
if( verbosity >= 0 )
{
Pp_show_msg( pp, 0 );
fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX)\n",
- trailer_data_size, LZd_data_position( d ), LZd_data_position( d ) );
+ Ft_get_data_size( trailer ), data_size, data_size );
}
}
- trailer_member_size = Ft_get_member_size( trailer );
- if( trailer_member_size != member_size )
+ if( Ft_get_member_size( trailer ) != member_size )
{
error = true;
if( verbosity >= 0 )
{
Pp_show_msg( pp, 0 );
fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX)\n",
- trailer_member_size, member_size, member_size );
+ Ft_get_member_size( trailer ), member_size, member_size );
}
}
- if( !error && verbosity >= 2 && LZd_data_position( d ) > 0 && member_size > 0 )
+ if( !error && verbosity >= 2 && data_size > 0 && member_size > 0 )
fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
- (double)LZd_data_position( d ) / member_size,
- ( 8.0 * member_size ) / LZd_data_position( d ),
- 100.0 * ( 1.0 - ( (double)member_size / LZd_data_position( d ) ) ) );
+ (double)data_size / member_size,
+ ( 8.0 * member_size ) / data_size,
+ 100.0 * ( 1.0 - ( (double)member_size / data_size ) ) );
if( !error && verbosity >= 4 )
fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ",
- trailer_crc, trailer_data_size, trailer_member_size );
+ LZd_crc( d ), data_size, member_size );
return !error;
}
@@ -255,8 +243,8 @@ int LZd_decode_member( struct LZ_decoder * const d,
}
else /* match */
{
- int dis_slot;
const unsigned rep0_saved = rep0;
+ int dis_slot;
len = min_match_len + Rd_decode_len( rdec, &d->match_len_model, pos_state );
dis_slot = Rd_decode_tree6( rdec, d->bm_dis_slot[get_len_state(len)] );
if( dis_slot < start_dis_model ) rep0 = dis_slot;
@@ -295,7 +283,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
}
rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
state = St_set_match( state );
- if( rep0 >= d->dictionary_size || rep0 >= LZd_data_position( d ) )
+ if( rep0 >= d->dictionary_size || ( rep0 >= d->pos && !d->pos_wrapped ) )
{ LZd_flush_data( d ); return 1; }
}
LZd_copy_block( d, rep0, len );
diff --git a/decoder.h b/decoder.h
index 833701b..662aaf9 100644
--- a/decoder.h
+++ b/decoder.h
@@ -1,5 +1,5 @@
/* Clzip - LZMA lossless data compressor
- Copyright (C) 2010-2015 Antonio Diaz Diaz.
+ Copyright (C) 2010-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -60,7 +60,8 @@ static inline void Rd_reset_member_position( struct Range_decoder * const rdec )
static inline uint8_t Rd_get_byte( struct Range_decoder * const rdec )
{
- if( Rd_finished( rdec ) ) return 0xAA; /* make code != 0 */
+ /* 0xFF avoids decoder error if member is truncated at EOS marker */
+ if( Rd_finished( rdec ) ) return 0xFF;
return rdec->buffer[rdec->pos++];
}
@@ -232,12 +233,12 @@ struct LZ_decoder
unsigned long long partial_data_pos;
struct Range_decoder * rdec;
unsigned dictionary_size;
- int buffer_size;
uint8_t * buffer; /* output buffer */
- int pos; /* current pos in buffer */
- int stream_pos; /* first byte not yet written to file */
+ unsigned pos; /* current pos in buffer */
+ unsigned stream_pos; /* first byte not yet written to file */
uint32_t crc;
int outfd; /* output file descriptor */
+ bool pos_wrapped;
Bit_model bm_literal[1<<literal_context_bits][0x300];
Bit_model bm_match[states][pos_states];
@@ -258,56 +259,61 @@ void LZd_flush_data( struct LZ_decoder * const d );
static inline uint8_t LZd_peek_prev( const struct LZ_decoder * const d )
{
- const int i = ( ( d->pos > 0 ) ? d->pos : d->buffer_size ) - 1;
+ const unsigned i = ( ( d->pos > 0 ) ? d->pos : d->dictionary_size ) - 1;
return d->buffer[i];
}
static inline uint8_t LZd_peek( const struct LZ_decoder * const d,
- const int distance )
+ const unsigned distance )
{
- int i = d->pos - distance - 1;
- if( i < 0 ) i += d->buffer_size;
+ unsigned i = d->pos - distance - 1;
+ if( d->pos <= distance ) i += d->dictionary_size;
return d->buffer[i];
}
static inline void LZd_put_byte( struct LZ_decoder * const d, const uint8_t b )
{
d->buffer[d->pos] = b;
- if( ++d->pos >= d->buffer_size ) LZd_flush_data( d );
+ if( ++d->pos >= d->dictionary_size ) LZd_flush_data( d );
}
static inline void LZd_copy_block( struct LZ_decoder * const d,
- const int distance, int len )
+ const unsigned distance, unsigned len )
{
- int i = d->pos - distance - 1;
- if( i < 0 ) i += d->buffer_size;
- if( len < d->buffer_size - max( d->pos, i ) && len <= abs( d->pos - i ) )
+ unsigned i = d->pos - distance - 1;
+ bool fast;
+ if( d->pos <= distance )
+ { i += d->dictionary_size;
+ fast = ( len <= d->dictionary_size - i && len <= i - d->pos ); }
+ else
+ fast = ( len < d->dictionary_size - d->pos && len <= d->pos - i );
+ if( fast ) /* no wrap, no overlap */
{
- memcpy( d->buffer + d->pos, d->buffer + i, len ); /* no wrap, no overlap */
+ memcpy( d->buffer + d->pos, d->buffer + i, len );
d->pos += len;
}
else for( ; len > 0; --len )
{
d->buffer[d->pos] = d->buffer[i];
- if( ++d->pos >= d->buffer_size ) LZd_flush_data( d );
- if( ++i >= d->buffer_size ) i = 0;
+ if( ++d->pos >= d->dictionary_size ) LZd_flush_data( d );
+ if( ++i >= d->dictionary_size ) i = 0;
}
}
static inline bool LZd_init( struct LZ_decoder * const d,
struct Range_decoder * const rde,
- const int dict_size, const int ofd )
+ const unsigned dict_size, const int ofd )
{
d->partial_data_pos = 0;
d->rdec = rde;
d->dictionary_size = dict_size;
- d->buffer_size = max( 65536U, d->dictionary_size );
- d->buffer = (uint8_t *)malloc( d->buffer_size );
+ d->buffer = (uint8_t *)malloc( d->dictionary_size );
if( !d->buffer ) return false;
d->pos = 0;
d->stream_pos = 0;
d->crc = 0xFFFFFFFFU;
d->outfd = ofd;
+ d->pos_wrapped = false;
Bm_array_init( d->bm_literal[0], (1 << literal_context_bits) * 0x300 );
Bm_array_init( d->bm_match[0], states * pos_states );
@@ -321,7 +327,7 @@ static inline bool LZd_init( struct LZ_decoder * const d,
Bm_array_init( d->bm_align, dis_align_size );
Lm_init( &d->match_len_model );
Lm_init( &d->rep_len_model );
- d->buffer[d->buffer_size-1] = 0; /* prev_byte of first byte */
+ d->buffer[d->dictionary_size-1] = 0; /* prev_byte of first byte */
return true;
}
diff --git a/doc/clzip.1 b/doc/clzip.1
index 32b3bde..5dbb695 100644
--- a/doc/clzip.1
+++ b/doc/clzip.1
@@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
-.TH CLZIP "1" "July 2015" "clzip 1.7" "User Commands"
+.TH CLZIP "1" "May 2016" "clzip 1.8" "User Commands"
.SH NAME
clzip \- reduces the size of files
.SH SYNOPSIS
@@ -15,11 +15,14 @@ display this help and exit
\fB\-V\fR, \fB\-\-version\fR
output version information and exit
.TP
+\fB\-a\fR, \fB\-\-trailing\-error\fR
+exit with error status if trailing data
+.TP
\fB\-b\fR, \fB\-\-member\-size=\fR<bytes>
set member size limit in bytes
.TP
\fB\-c\fR, \fB\-\-stdout\fR
-send output to standard output
+write to standard output, keep input files
.TP
\fB\-d\fR, \fB\-\-decompress\fR
decompress
@@ -37,7 +40,7 @@ keep (don't delete) input files
set match length limit in bytes [36]
.TP
\fB\-o\fR, \fB\-\-output=\fR<file>
-if reading stdin, place the output into <file>
+if reading standard input, write to <file>
.TP
\fB\-q\fR, \fB\-\-quiet\fR
suppress all messages
@@ -63,13 +66,16 @@ alias for \fB\-0\fR
\fB\-\-best\fR
alias for \fB\-9\fR
.PP
-If no file names are given, clzip compresses or decompresses
-from standard input to standard output.
+If no file names are given, or if a file is '\-', clzip compresses or
+decompresses from standard input to standard output.
Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,
Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...
+Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12
+to 2^29 bytes.
+.PP
The bidimensional parameter space of LZMA can't be mapped to a linear
scale optimal for all files. If your files are large, very repetitive,
-etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR
+etc, you may need to use the \fB\-\-dictionary\-size\fR and \fB\-\-match\-length\fR
options directly to achieve optimal performance.
.PP
Exit status: 0 for a normal exit, 1 for environmental problems (file
@@ -81,7 +87,7 @@ Report bugs to lzip\-bug@nongnu.org
.br
Clzip home page: http://www.nongnu.org/lzip/clzip.html
.SH COPYRIGHT
-Copyright \(co 2015 Antonio Diaz Diaz.
+Copyright \(co 2016 Antonio Diaz Diaz.
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
.br
This is free software: you are free to change and redistribute it.
diff --git a/doc/clzip.info b/doc/clzip.info
index 786d8c1..c590473 100644
--- a/doc/clzip.info
+++ b/doc/clzip.info
@@ -11,7 +11,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir)
Clzip Manual
************
-This manual is for Clzip (version 1.7, 7 July 2015).
+This manual is for Clzip (version 1.8, 13 May 2016).
* Menu:
@@ -19,12 +19,13 @@ This manual is for Clzip (version 1.7, 7 July 2015).
* Invoking clzip:: Command line interface
* File format:: Detailed format of the compressed file
* Algorithm:: How clzip compresses the data
+* Trailing data:: Extra data appended to the file
* Examples:: A small tutorial with examples
* Problems:: Reporting bugs
* Concept index:: Index of concepts
- Copyright (C) 2010-2015 Antonio Diaz Diaz.
+ Copyright (C) 2010-2016 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to
copy, distribute and modify it.
@@ -53,7 +54,7 @@ availability:
recovery means. The lziprecover program can repair bit-flip errors
(one of the most common forms of data corruption) in lzip files,
and provides data recovery capabilities, including error-checked
- merging of damaged copies of a file. *note Data safety:
+ merging of damaged copies of a file. *Note Data safety:
(lziprecover)Data safety.
* The lzip format is as simple as possible (but not simpler). The
@@ -73,15 +74,14 @@ corrupt byte near the beginning is a thing of the past.
The member trailer stores the 32-bit CRC of the original data, the
size of the original data and the size of the member. These values,
-together with the value remaining in the range decoder and the
-end-of-stream marker, provide a 4 factor integrity checking which
-guarantees that the decompressed version of the data is identical to
-the original. This guards against corruption of the compressed data,
-and against undetected bugs in clzip (hopefully very unlikely). The
-chances of data corruption going undetected are microscopic. Be aware,
-though, that the check occurs upon decompression, so it can only tell
-you that something is wrong. It can't help you recover the original
-uncompressed data.
+together with the end-of-stream marker, provide a 3 factor integrity
+checking which guarantees that the decompressed version of the data is
+identical to the original. This guards against corruption of the
+compressed data, and against undetected bugs in clzip (hopefully very
+unlikely). The chances of data corruption going undetected are
+microscopic. Be aware, though, that the check occurs upon
+decompression, so it can only tell you that something is wrong. It
+can't help you recover the original uncompressed data.
Clzip uses the same well-defined exit status values used by lzip and
bzip2, which makes it safer than compressors returning ambiguous warning
@@ -128,14 +128,14 @@ two or more compressed files. The result is the concatenation of the
corresponding uncompressed files. Integrity testing of concatenated
compressed files is also supported.
- Clzip can produce multi-member files and safely recover, with
+ Clzip can produce multimember files and safely recover, with
lziprecover, the undamaged members in case of file damage. Clzip can
also split the compressed output in volumes of a given size, even when
reading from standard input. This allows the direct creation of
multivolume compressed tar archives.
Clzip is able to compress and decompress streams of unlimited size by
-automatically creating multi-member output. The members so created are
+automatically creating multimember output. The members so created are
large, about 2 PiB each.

@@ -148,6 +148,10 @@ The format for running clzip is:
clzip [OPTIONS] [FILES]
+'-' used as a FILE argument means standard input. It can be mixed with
+other FILES and is read just once, the first time it appears in the
+command line.
+
Clzip supports the following options:
'-h'
@@ -158,6 +162,13 @@ The format for running clzip is:
'--version'
Print the version number of clzip on the standard output and exit.
+'-a'
+'--trailing-error'
+ Exit with error status 2 if any remaining input is detected after
+ decompressing the last member. Such remaining input is usually
+ trailing garbage that can be safely ignored. *Note
+ concat-example::.
+
'-b BYTES'
'--member-size=BYTES'
Set the member size limit to BYTES. A small member size may
@@ -166,14 +177,19 @@ The format for running clzip is:
'-c'
'--stdout'
- Compress or decompress to standard output. Needed when reading
- from a named pipe (fifo) or from a device. Use it to recover as
- much of the uncompressed data as possible when decompressing a
- corrupt file.
+ Compress or decompress to standard output; keep input files
+ unchanged. If compressing several files, each file is compressed
+ independently. This option is needed when reading from a named
+ pipe (fifo) or from a device. Use it also to recover as much of
+ the uncompressed data as possible when decompressing a corrupt
+ file.
'-d'
'--decompress'
- Decompress.
+ Decompress the specified file(s). If a file does not exist or
+ can't be opened, clzip continues decompressing the rest of the
+ files. If a file fails to decompress, clzip exits immediately
+ without decompressing the rest of the files.
'-f'
'--force'
@@ -211,12 +227,13 @@ The format for running clzip is:
'-s BYTES'
'--dictionary-size=BYTES'
- Set the dictionary size limit in bytes. Valid values range from 4
- KiB to 512 MiB. Clzip will use the smallest possible dictionary
- size for each file without exceeding this limit. Note that
- dictionary sizes are quantized. If the specified size does not
- match one of the valid sizes, it will be rounded upwards by adding
- up to (BYTES / 16) to it.
+ Set the dictionary size limit in bytes. Clzip will use the smallest
+ possible dictionary size for each file without exceeding this
+ limit. Valid values range from 4 KiB to 512 MiB. Values 12 to 29
+ are interpreted as powers of two, meaning 2^12 to 2^29 bytes. Note
+ that dictionary sizes are quantized. If the specified size does
+ not match one of the valid sizes, it will be rounded upwards by
+ adding up to (BYTES / 8) to it.
For maximum compression you should use a dictionary size limit as
large as possible, but keep in mind that the decompression memory
@@ -228,16 +245,17 @@ The format for running clzip is:
Split the compressed output into several volume files with names
'original_name00001.lz', 'original_name00002.lz', etc, and set the
volume size limit to BYTES. Each volume is a complete, maybe
- multi-member, lzip file. A small volume size may degrade
- compression ratio, so use it only when needed. Valid values range
- from 100 kB to 4 EiB.
+ multimember, lzip file. A small volume size may degrade compression
+ ratio, so use it only when needed. Valid values range from 100 kB
+ to 4 EiB.
'-t'
'--test'
Check integrity of the specified file(s), but don't decompress
them. This really performs a trial decompression and throws away
the result. Use it together with '-v' to see information about
- the file.
+ the file(s). If a file fails the test, clzip continues checking
+ the rest of the files.
'-v'
'--verbose'
@@ -246,18 +264,19 @@ The format for running clzip is:
processed. A second '-v' shows the progress of compression.
When decompressing or testing, further -v's (up to 4) increase the
verbosity level, showing status, compression ratio, dictionary
- size, and trailer contents (CRC, data size, member size).
+ size, trailer contents (CRC, data size, member size), and up to 6
+ bytes of trailing data (if any).
'-0 .. -9'
Set the compression parameters (dictionary size and match length
- limit) as shown in the table below. Note that '-9' can be much
- slower than '-0'. These options have no effect when decompressing.
+ limit) as shown in the table below. The default compression level
+ is '-6'. Note that '-9' can be much slower than '-0'. These
+ options have no effect when decompressing.
The bidimensional parameter space of LZMA can't be mapped to a
linear scale optimal for all files. If your files are large, very
- repetitive, etc, you may need to use the '--match-length' and
- '--dictionary-size' options directly to achieve optimal
- performance.
+ repetitive, etc, you may need to use the '--dictionary-size' and
+ '--match-length' options directly to achieve optimal performance.
Level Dictionary size Match length limit
-0 64 KiB 16 bytes
@@ -327,12 +346,12 @@ additional information before, between, or after them.
Each member has the following structure:
+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-| ID string | VN | DS | Lzma stream | CRC32 | Data size | Member size |
+| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size |
+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
All multibyte values are stored in little endian order.
-'ID string'
+'ID string (the "magic" bytes)'
A four byte string, identifying the lzip format, with the value
"LZIP" (0x4C, 0x5A, 0x49, 0x50).
@@ -350,8 +369,8 @@ additional information before, between, or after them.
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB
Valid values for dictionary size range from 4 KiB to 512 MiB.
-'Lzma stream'
- The lzma stream, finished by an end of stream marker. Uses default
+'LZMA stream'
+ The LZMA stream, finished by an end of stream marker. Uses default
values for encoder properties. *Note Stream format: (lzip)Stream
format, for a complete description.
@@ -365,11 +384,11 @@ additional information before, between, or after them.
Total size of the member, including header and trailer. This field
acts as a distributed index, allows the verification of stream
integrity, and facilitates safe recovery of undamaged members from
- multi-member files.
+ multimember files.

-File: clzip.info, Node: Algorithm, Next: Examples, Prev: File format, Up: Top
+File: clzip.info, Node: Algorithm, Next: Trailing data, Prev: File format, Up: Top
4 Algorithm
***********
@@ -435,15 +454,48 @@ range encoding), Igor Pavlov (for putting all the above together in
LZMA), and Julian Seward (for bzip2's CLI).

-File: clzip.info, Node: Examples, Next: Problems, Prev: Algorithm, Up: Top
+File: clzip.info, Node: Trailing data, Next: Examples, Prev: Algorithm, Up: Top
+
+5 Extra data appended to the file
+*********************************
+
+Sometimes extra data is found appended to a lzip file after the last
+member. Such trailing data may be:
+
+ * Padding added to make the file size a multiple of some block size,
+ for example when writing to a tape.
+
+ * Garbage added by some not totally successful copy operation.
+
+ * Useful data added by the user; a cryptographically secure hash, a
+ description of file contents, etc.
+
+ * Malicious data added to the file in order to make its total size
+ and hash value (for a chosen hash) coincide with those of another
+ file.
-5 A small tutorial with examples
+ * In very rare cases, trailing data could be the corrupt header of
+ another member. In multimember or concatenated files the
+ probability of corruption happening in the magic bytes is 5 times
+ smaller than the probability of getting a false positive caused by
+ the corruption of the integrity information itself. Therefore it
+ can be considered to be below the noise level.
+
+ Trailing data can be safely ignored in most cases. In some cases,
+like that of user-added data, it is expected to be ignored. In those
+cases where a file containing trailing data must be rejected, the option
+'--trailing-error' can be used. *Note --trailing-error::.
+
+
+File: clzip.info, Node: Examples, Next: Problems, Prev: Trailing data, Up: Top
+
+6 A small tutorial with examples
********************************
WARNING! Even if clzip is bug-free, other causes may result in a corrupt
compressed file (bugs in the system libraries, memory errors, etc).
Therefore, if the data you are going to compress are important, give the
-'--keep' option to clzip and do not remove the original file until you
+'--keep' option to clzip and don't remove the original file until you
verify the compressed file with a command like
'clzip -cd file.lz | cmp file -'.
@@ -454,8 +506,8 @@ and show the compression ratio.
clzip -v file
-Example 2: Like example 1 but the created 'file.lz' is multi-member
-with a member size of 1 MiB. The compression ratio is not shown.
+Example 2: Like example 1 but the created 'file.lz' is multimember with
+a member size of 1 MiB. The compression ratio is not shown.
clzip -b 1MiB file
@@ -472,37 +524,46 @@ show status.
clzip -tv file.lz
-Example 5: Compress a whole floppy in /dev/fd0 and send the output to
+Example 5: Compress a whole device in /dev/sdc and send the output to
'file.lz'.
- clzip -c /dev/fd0 > file.lz
+ clzip -c /dev/sdc > file.lz
+
+
+Example 6: The right way of concatenating compressed files. *Note
+Trailing data::.
+
+ Don't do this
+ cat file1.lz file2.lz file3.lz | clzip -d
+ Do this instead
+ clzip -cd file1.lz file2.lz file3.lz
-Example 6: Decompress 'file.lz' partially until 10 KiB of decompressed
+Example 7: Decompress 'file.lz' partially until 10 KiB of decompressed
data are produced.
clzip -cd file.lz | dd bs=1024 count=10
-Example 7: Decompress 'file.lz' partially from decompressed byte 10000
+Example 8: Decompress 'file.lz' partially from decompressed byte 10000
to decompressed byte 15000 (5000 bytes are produced).
clzip -cd file.lz | dd bs=1000 skip=10 count=5
-Example 8: Create a multivolume compressed tar archive with a volume
+Example 9: Create a multivolume compressed tar archive with a volume
size of 1440 KiB.
tar -c some_directory | clzip -S 1440KiB -o volume_name
-Example 9: Extract a multivolume compressed tar archive.
+Example 10: Extract a multivolume compressed tar archive.
clzip -cd volume_name*.lz | tar -xf -
-Example 10: Create a multivolume compressed backup of a large database
-file with a volume size of 650 MB, where each volume is a multi-member
+Example 11: Create a multivolume compressed backup of a large database
+file with a volume size of 650 MB, where each volume is a multimember
file with a member size of 32 MiB.
clzip -b 32MiB -S 650MB big_db
@@ -510,7 +571,7 @@ file with a member size of 32 MiB.

File: clzip.info, Node: Problems, Next: Concept index, Prev: Examples, Up: Top
-6 Reporting bugs
+7 Reporting bugs
****************
There are probably bugs in clzip. There are certainly errors and
@@ -539,6 +600,7 @@ Concept index
* introduction: Introduction. (line 6)
* invoking: Invoking clzip. (line 6)
* options: Invoking clzip. (line 6)
+* trailing data: Trailing data. (line 6)
* usage: Invoking clzip. (line 6)
* version: Invoking clzip. (line 6)
@@ -546,13 +608,16 @@ Concept index

Tag Table:
Node: Top210
-Node: Introduction893
-Node: Invoking clzip6152
-Node: File format11705
-Node: Algorithm14108
-Node: Examples16933
-Node: Problems18900
-Node: Concept index19426
+Node: Introduction952
+Node: Invoking clzip6164
+Ref: --trailing-error6730
+Node: File format12728
+Node: Algorithm15150
+Node: Trailing data17980
+Node: Examples19355
+Ref: concat-example20537
+Node: Problems21544
+Node: Concept index22070

End Tag Table
diff --git a/doc/clzip.texi b/doc/clzip.texi
index e2ca889..331d4eb 100644
--- a/doc/clzip.texi
+++ b/doc/clzip.texi
@@ -6,8 +6,8 @@
@finalout
@c %**end of header
-@set UPDATED 7 July 2015
-@set VERSION 1.7
+@set UPDATED 13 May 2016
+@set VERSION 1.8
@dircategory Data Compression
@direntry
@@ -39,13 +39,14 @@ This manual is for Clzip (version @value{VERSION}, @value{UPDATED}).
* Invoking clzip:: Command line interface
* File format:: Detailed format of the compressed file
* Algorithm:: How clzip compresses the data
+* Trailing data:: Extra data appended to the file
* Examples:: A small tutorial with examples
* Problems:: Reporting bugs
* Concept index:: Index of concepts
@end menu
@sp 1
-Copyright @copyright{} 2010-2015 Antonio Diaz Diaz.
+Copyright @copyright{} 2010-2016 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission
to copy, distribute and modify it.
@@ -78,7 +79,7 @@ program can repair bit-flip errors (one of the most common forms of data
corruption) in lzip files, and provides data recovery capabilities,
including error-checked merging of damaged copies of a file.
@ifnothtml
-@ref{Data safety,,,lziprecover}.
+@xref{Data safety,,,lziprecover}.
@end ifnothtml
@item
@@ -101,14 +102,14 @@ corrupt byte near the beginning is a thing of the past.
The member trailer stores the 32-bit CRC of the original data, the size
of the original data and the size of the member. These values, together
-with the value remaining in the range decoder and the end-of-stream
-marker, provide a 4 factor integrity checking which guarantees that the
-decompressed version of the data is identical to the original. This
-guards against corruption of the compressed data, and against undetected
-bugs in clzip (hopefully very unlikely). The chances of data corruption
-going undetected are microscopic. Be aware, though, that the check
-occurs upon decompression, so it can only tell you that something is
-wrong. It can't help you recover the original uncompressed data.
+with the end-of-stream marker, provide a 3 factor integrity checking
+which guarantees that the decompressed version of the data is identical
+to the original. This guards against corruption of the compressed data,
+and against undetected bugs in clzip (hopefully very unlikely). The
+chances of data corruption going undetected are microscopic. Be aware,
+though, that the check occurs upon decompression, so it can only tell
+you that something is wrong. It can't help you recover the original
+uncompressed data.
Clzip uses the same well-defined exit status values used by lzip and
bzip2, which makes it safer than compressors returning ambiguous warning
@@ -157,14 +158,14 @@ or more compressed files. The result is the concatenation of the
corresponding uncompressed files. Integrity testing of concatenated
compressed files is also supported.
-Clzip can produce multi-member files and safely recover, with
+Clzip can produce multimember files and safely recover, with
lziprecover, the undamaged members in case of file damage. Clzip can
also split the compressed output in volumes of a given size, even when
reading from standard input. This allows the direct creation of
multivolume compressed tar archives.
Clzip is able to compress and decompress streams of unlimited size by
-automatically creating multi-member output. The members so created are
+automatically creating multimember output. The members so created are
large, about 2 PiB each.
@@ -181,6 +182,11 @@ The format for running clzip is:
clzip [@var{options}] [@var{files}]
@end example
+@noindent
+@samp{-} used as a @var{file} argument means standard input. It can be
+mixed with other @var{files} and is read just once, the first time it
+appears in the command line.
+
Clzip supports the following options:
@table @code
@@ -192,6 +198,13 @@ Print an informative help message describing the options and exit.
@itemx --version
Print the version number of clzip on the standard output and exit.
+@anchor{--trailing-error}
+@item -a
+@itemx --trailing-error
+Exit with error status 2 if any remaining input is detected after
+decompressing the last member. Such remaining input is usually trailing
+garbage that can be safely ignored. @xref{concat-example}.
+
@item -b @var{bytes}
@itemx --member-size=@var{bytes}
Set the member size limit to @var{bytes}. A small member size may
@@ -200,13 +213,18 @@ range from 100 kB to 2 PiB. Defaults to 2 PiB.
@item -c
@itemx --stdout
-Compress or decompress to standard output. Needed when reading from a
-named pipe (fifo) or from a device. Use it to recover as much of the
-uncompressed data as possible when decompressing a corrupt file.
+Compress or decompress to standard output; keep input files unchanged.
+If compressing several files, each file is compressed independently.
+This option is needed when reading from a named pipe (fifo) or from a
+device. Use it also to recover as much of the uncompressed data as
+possible when decompressing a corrupt file.
@item -d
@itemx --decompress
-Decompress.
+Decompress the specified file(s). If a file does not exist or can't be
+opened, clzip continues decompressing the rest of the files. If a file
+fails to decompress, clzip exits immediately without decompressing the
+rest of the files.
@item -f
@itemx --force
@@ -242,11 +260,13 @@ Quiet operation. Suppress all messages.
@item -s @var{bytes}
@itemx --dictionary-size=@var{bytes}
-Set the dictionary size limit in bytes. Valid values range from 4 KiB to
-512 MiB. Clzip will use the smallest possible dictionary size for each
-file without exceeding this limit. Note that dictionary sizes are
-quantized. If the specified size does not match one of the valid sizes,
-it will be rounded upwards by adding up to (@var{bytes} / 16) to it.
+Set the dictionary size limit in bytes. Clzip will use the smallest
+possible dictionary size for each file without exceeding this limit.
+Valid values range from 4 KiB to 512 MiB. Values 12 to 29 are
+interpreted as powers of two, meaning 2^12 to 2^29 bytes. Note that
+dictionary sizes are quantized. If the specified size does not match one
+of the valid sizes, it will be rounded upwards by adding up to
+@w{(@var{bytes} / 8)} to it.
For maximum compression you should use a dictionary size limit as large
as possible, but keep in mind that the decompression memory requirement
@@ -257,7 +277,7 @@ is affected at compression time by the choice of dictionary size limit.
Split the compressed output into several volume files with names
@samp{original_name00001.lz}, @samp{original_name00002.lz}, etc, and set
the volume size limit to @var{bytes}. Each volume is a complete, maybe
-multi-member, lzip file. A small volume size may degrade compression
+multimember, lzip file. A small volume size may degrade compression
ratio, so use it only when needed. Valid values range from 100 kB to 4
EiB.
@@ -265,7 +285,8 @@ EiB.
@itemx --test
Check integrity of the specified file(s), but don't decompress them.
This really performs a trial decompression and throws away the result.
-Use it together with @samp{-v} to see information about the file.
+Use it together with @samp{-v} to see information about the file(s). If
+a file fails the test, clzip continues checking the rest of the files.
@item -v
@itemx --verbose
@@ -274,18 +295,19 @@ When compressing, show the compression ratio for each file processed. A
second @samp{-v} shows the progress of compression.@*
When decompressing or testing, further -v's (up to 4) increase the
verbosity level, showing status, compression ratio, dictionary size,
-and trailer contents (CRC, data size, member size).
+trailer contents (CRC, data size, member size), and up to 6 bytes of
+trailing data (if any).
@item -0 .. -9
Set the compression parameters (dictionary size and match length limit)
-as shown in the table below. Note that @samp{-9} can be much slower than
-@samp{-0}. These options have no effect when decompressing.
+as shown in the table below. The default compression level is @samp{-6}.
+Note that @samp{-9} can be much slower than @samp{-0}. These options
+have no effect when decompressing.
The bidimensional parameter space of LZMA can't be mapped to a linear
scale optimal for all files. If your files are large, very repetitive,
-etc, you may need to use the @samp{--match-length} and
-@samp{--dictionary-size} options directly to achieve optimal
-performance.
+etc, you may need to use the @samp{--dictionary-size} and
+@samp{--match-length} options directly to achieve optimal performance.
@multitable {Level} {Dictionary size} {Match length limit}
@item Level @tab Dictionary size @tab Match length limit
@@ -364,14 +386,14 @@ additional information before, between, or after them.
Each member has the following structure:
@verbatim
+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-| ID string | VN | DS | Lzma stream | CRC32 | Data size | Member size |
+| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size |
+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@end verbatim
All multibyte values are stored in little endian order.
@table @samp
-@item ID string
+@item ID string (the "magic" bytes)
A four byte string, identifying the lzip format, with the value "LZIP"
(0x4C, 0x5A, 0x49, 0x50).
@@ -388,8 +410,8 @@ from the base size to obtain the dictionary size.@*
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
Valid values for dictionary size range from 4 KiB to 512 MiB.
-@item Lzma stream
-The lzma stream, finished by an end of stream marker. Uses default
+@item LZMA stream
+The LZMA stream, finished by an end of stream marker. Uses default
values for encoder properties.
@ifnothtml
@xref{Stream format,,,lzip},
@@ -409,7 +431,7 @@ Size of the uncompressed original data.
@item Member size (8 bytes)
Total size of the member, including header and trailer. This field acts
as a distributed index, allows the verification of stream integrity, and
-facilitates safe recovery of undamaged members from multi-member files.
+facilitates safe recovery of undamaged members from multimember files.
@end table
@@ -480,6 +502,44 @@ range encoding), Igor Pavlov (for putting all the above together in
LZMA), and Julian Seward (for bzip2's CLI).
+@node Trailing data
+@chapter Extra data appended to the file
+@cindex trailing data
+
+Sometimes extra data is found appended to a lzip file after the last
+member. Such trailing data may be:
+
+@itemize @bullet
+@item
+Padding added to make the file size a multiple of some block size, for
+example when writing to a tape.
+
+@item
+Garbage added by some not totally successful copy operation.
+
+@item
+Useful data added by the user; a cryptographically secure hash, a
+description of file contents, etc.
+
+@item
+Malicious data added to the file in order to make its total size and
+hash value (for a chosen hash) coincide with those of another file.
+
+@item
+In very rare cases, trailing data could be the corrupt header of another
+member. In multimember or concatenated files the probability of
+corruption happening in the magic bytes is 5 times smaller than the
+probability of getting a false positive caused by the corruption of the
+integrity information itself. Therefore it can be considered to be below
+the noise level.
+@end itemize
+
+Trailing data can be safely ignored in most cases. In some cases, like
+that of user-added data, it is expected to be ignored. In those cases
+where a file containing trailing data must be rejected, the option
+@samp{--trailing-error} can be used. @xref{--trailing-error}.
+
+
@node Examples
@chapter A small tutorial with examples
@cindex examples
@@ -487,7 +547,7 @@ LZMA), and Julian Seward (for bzip2's CLI).
WARNING! Even if clzip is bug-free, other causes may result in a corrupt
compressed file (bugs in the system libraries, memory errors, etc).
Therefore, if the data you are going to compress are important, give the
-@samp{--keep} option to clzip and do not remove the original file until
+@samp{--keep} option to clzip and don't remove the original file until
you verify the compressed file with a command like
@w{@samp{clzip -cd file.lz | cmp file -}}.
@@ -502,7 +562,7 @@ clzip -v file
@sp 1
@noindent
-Example 2: Like example 1 but the created @samp{file.lz} is multi-member
+Example 2: Like example 1 but the created @samp{file.lz} is multimember
with a member size of 1 MiB. The compression ratio is not shown.
@example
@@ -530,16 +590,29 @@ clzip -tv file.lz
@sp 1
@noindent
-Example 5: Compress a whole floppy in /dev/fd0 and send the output to
+Example 5: Compress a whole device in /dev/sdc and send the output to
@samp{file.lz}.
@example
-clzip -c /dev/fd0 > file.lz
+clzip -c /dev/sdc > file.lz
+@end example
+
+@sp 1
+@anchor{concat-example}
+@noindent
+Example 6: The right way of concatenating compressed files.
+@xref{Trailing data}.
+
+@example
+Don't do this
+ cat file1.lz file2.lz file3.lz | clzip -d
+Do this instead
+ clzip -cd file1.lz file2.lz file3.lz
@end example
@sp 1
@noindent
-Example 6: Decompress @samp{file.lz} partially until 10 KiB of
+Example 7: Decompress @samp{file.lz} partially until 10 KiB of
decompressed data are produced.
@example
@@ -548,7 +621,7 @@ clzip -cd file.lz | dd bs=1024 count=10
@sp 1
@noindent
-Example 7: Decompress @samp{file.lz} partially from decompressed byte
+Example 8: Decompress @samp{file.lz} partially from decompressed byte
10000 to decompressed byte 15000 (5000 bytes are produced).
@example
@@ -557,7 +630,7 @@ clzip -cd file.lz | dd bs=1000 skip=10 count=5
@sp 1
@noindent
-Example 8: Create a multivolume compressed tar archive with a volume
+Example 9: Create a multivolume compressed tar archive with a volume
size of 1440 KiB.
@example
@@ -566,7 +639,7 @@ tar -c some_directory | clzip -S 1440KiB -o volume_name
@sp 1
@noindent
-Example 9: Extract a multivolume compressed tar archive.
+Example 10: Extract a multivolume compressed tar archive.
@example
clzip -cd volume_name*.lz | tar -xf -
@@ -574,8 +647,8 @@ clzip -cd volume_name*.lz | tar -xf -
@sp 1
@noindent
-Example 10: Create a multivolume compressed backup of a large database
-file with a volume size of 650 MB, where each volume is a multi-member
+Example 11: Create a multivolume compressed backup of a large database
+file with a volume size of 650 MB, where each volume is a multimember
file with a member size of 32 MiB.
@example
diff --git a/encoder.c b/encoder.c
index 1a027f5..ce0ddf2 100644
--- a/encoder.c
+++ b/encoder.c
@@ -1,5 +1,5 @@
/* Clzip - LZMA lossless data compressor
- Copyright (C) 2010-2015 Antonio Diaz Diaz.
+ Copyright (C) 2010-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -28,6 +28,9 @@
#include "encoder.h"
+CRC32 crc32;
+
+
int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs )
{
int32_t * ptr0 = e->eb.mb.pos_array + ( e->eb.mb.cyclic_pos << 1 );
@@ -40,7 +43,7 @@ int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs )
const int min_pos = ( e->eb.mb.pos > e->eb.mb.dictionary_size ) ?
e->eb.mb.pos - e->eb.mb.dictionary_size : 0;
const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb );
- int count, delta, key2, key3, key4, newpos;
+ int count, key2, key3, key4, newpos;
unsigned tmp;
int len_limit = e->match_len_limit;
@@ -76,7 +79,7 @@ int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs )
}
if( num_pairs > 0 )
{
- delta = pos1 - np2;
+ const int delta = pos1 - np2;
while( maxlen < len_limit && data[maxlen-delta] == data[maxlen] )
++maxlen;
pairs[num_pairs-1].len = maxlen;
@@ -92,6 +95,7 @@ int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs )
for( count = e->cycles; ; )
{
+ int delta;
if( newpos <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; }
delta = pos1 - newpos;
@@ -196,16 +200,16 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
}
if( replens[rep_index] >= e->match_len_limit )
{
- e->trials[0].dis = rep_index;
e->trials[0].price = replens[rep_index];
+ e->trials[0].dis = rep_index;
LZe_move_and_update( e, replens[rep_index] );
return replens[rep_index];
}
if( main_len >= e->match_len_limit )
{
- e->trials[0].dis = e->pairs[num_pairs-1].dis + num_rep_distances;
e->trials[0].price = main_len;
+ e->trials[0].dis = e->pairs[num_pairs-1].dis + num_rep_distances;
LZe_move_and_update( e, main_len );
return main_len;
}
@@ -218,13 +222,12 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
const uint8_t cur_byte = Mb_peek( &e->eb.mb, 0 );
const uint8_t match_byte = Mb_peek( &e->eb.mb, reps[0] + 1 );
- e->trials[0].state = state;
- e->trials[1].dis = -1; /* literal */
e->trials[1].price = price0( e->eb.bm_match[state][pos_state] );
if( St_is_char( state ) )
e->trials[1].price += LZeb_price_literal( &e->eb, prev_byte, cur_byte );
else
e->trials[1].price += LZeb_price_matched( &e->eb, prev_byte, cur_byte, match_byte );
+ e->trials[1].dis = -1; /* literal */
if( match_byte == cur_byte )
Tr_update( &e->trials[1], rep_match_price +
@@ -234,16 +237,15 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
if( num_trials < min_match_len )
{
- e->trials[0].dis = e->trials[1].dis;
e->trials[0].price = 1;
+ e->trials[0].dis = e->trials[1].dis;
Mb_move_pos( &e->eb.mb );
return 1;
}
+ e->trials[0].state = state;
for( i = 0; i < num_rep_distances; ++i )
e->trials[0].reps[i] = reps[i];
- e->trials[1].prev_index = 0;
- e->trials[1].prev_index2 = single_step_trial;
for( len = min_match_len; len <= num_trials; ++len )
e->trials[len].price = infinite_price;
@@ -556,8 +558,8 @@ bool LZe_encode_member( struct LZ_encoder * const e,
{
const int pos_state =
( Mb_data_position( &e->eb.mb ) - ahead ) & pos_state_mask;
- const int dis = e->trials[i].dis;
const int len = e->trials[i].price;
+ const int dis = e->trials[i].dis;
bool bit = ( dis < 0 );
Re_encode_bit( &e->eb.renc, &e->eb.bm_match[state][pos_state], !bit );
diff --git a/encoder.h b/encoder.h
index a8793e2..99670b1 100644
--- a/encoder.h
+++ b/encoder.h
@@ -1,5 +1,5 @@
/* Clzip - LZMA lossless data compressor
- Copyright (C) 2010-2015 Antonio Diaz Diaz.
+ Copyright (C) 2010-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -306,6 +306,8 @@ static inline bool LZe_init( struct LZ_encoder * const e,
Lp_init( &e->rep_len_prices, &e->eb.rep_len_model, e->match_len_limit );
e->pending_num_pairs = 0;
e->num_dis_slots = 2 * real_bits( e->eb.mb.dictionary_size - 1 );
+ e->trials[1].prev_index = 0;
+ e->trials[1].prev_index2 = single_step_trial;
return true;
}
diff --git a/encoder_base.c b/encoder_base.c
index 9ce4563..31cad3f 100644
--- a/encoder_base.c
+++ b/encoder_base.c
@@ -1,5 +1,5 @@
/* Clzip - LZMA lossless data compressor
- Copyright (C) 2010-2015 Antonio Diaz Diaz.
+ Copyright (C) 2010-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/encoder_base.h b/encoder_base.h
index a72442f..54fecd1 100644
--- a/encoder_base.h
+++ b/encoder_base.h
@@ -1,5 +1,5 @@
/* Clzip - LZMA lossless data compressor
- Copyright (C) 2010-2015 Antonio Diaz Diaz.
+ Copyright (C) 2010-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -113,8 +113,7 @@ static inline int price_symbol_reversed( const Bit_model bm[], int symbol,
}
-static inline int price_matched( const Bit_model bm[], int symbol,
- int match_byte )
+static inline int price_matched( const Bit_model bm[], int symbol, int match_byte )
{
int price = 0;
int mask = 0x100;
@@ -409,8 +408,8 @@ static inline bool LZeb_init( struct LZ_encoder_base * const eb,
const int before, const int dict_size,
const int after_size, const int dict_factor,
const int num_prev_positions23,
- const int pos_array_factor, const int ifd,
- const int outfd )
+ const int pos_array_factor,
+ const int ifd, const int outfd )
{
if( !Mb_init( &eb->mb, before, dict_size, after_size, dict_factor,
num_prev_positions23, pos_array_factor, ifd ) ) return false;
diff --git a/fast_encoder.c b/fast_encoder.c
index 211f74d..941c0e2 100644
--- a/fast_encoder.c
+++ b/fast_encoder.c
@@ -1,5 +1,5 @@
/* Clzip - LZMA lossless data compressor
- Copyright (C) 2010-2015 Antonio Diaz Diaz.
+ Copyright (C) 2010-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -43,7 +43,6 @@ int FLZe_longest_match_len( struct FLZ_encoder * const fe, int * const distance
newpos = fe->eb.mb.prev_positions[fe->key4];
fe->eb.mb.prev_positions[fe->key4] = pos1;
-
for( count = 4; ; )
{
if( --count < 0 || newpos <= 0 ) { *ptr0 = 0; break; }
diff --git a/fast_encoder.h b/fast_encoder.h
index 797649b..df1741d 100644
--- a/fast_encoder.h
+++ b/fast_encoder.h
@@ -1,5 +1,5 @@
/* Clzip - LZMA lossless data compressor
- Copyright (C) 2010-2015 Antonio Diaz Diaz.
+ Copyright (C) 2010-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/lzip.h b/lzip.h
index 40e69d7..5274500 100644
--- a/lzip.h
+++ b/lzip.h
@@ -1,5 +1,5 @@
/* Clzip - LZMA lossless data compressor
- Copyright (C) 2010-2015 Antonio Diaz Diaz.
+ Copyright (C) 2010-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -50,6 +50,7 @@ enum {
max_dictionary_bits = 29,
max_dictionary_size = 1 << max_dictionary_bits,
literal_context_bits = 3,
+ literal_pos_state_bits = 0, /* not used */
pos_state_bits = 2,
pos_states = 1 << pos_state_bits,
pos_state_mask = pos_states - 1,
@@ -90,8 +91,8 @@ typedef int Bit_model;
static inline void Bm_init( Bit_model * const probability )
{ *probability = bit_model_total / 2; }
-static inline void Bm_array_init( Bit_model * const p, const int size )
- { int i = 0; while( i < size ) p[i++] = bit_model_total / 2; }
+static inline void Bm_array_init( Bit_model bm[], const int size )
+ { int i; for( i = 0; i < size; ++i ) Bm_init( &bm[i] ); }
struct Len_model
{
@@ -121,7 +122,8 @@ struct Pretty_print
};
static inline void Pp_init( struct Pretty_print * const pp,
- const char * const filenames[], const int num_filenames )
+ const char * const filenames[],
+ const int num_filenames, const int verbosity )
{
unsigned stdin_name_len;
int i;
@@ -131,6 +133,7 @@ static inline void Pp_init( struct Pretty_print * const pp,
pp->first_post = false;
stdin_name_len = strlen( pp->stdin_name );
+ if( verbosity <= 0 ) return;
for( i = 0; i < num_filenames; ++i )
{
const char * const s = filenames[i];
@@ -184,6 +187,11 @@ static inline void CRC32_update_buf( uint32_t * const crc,
}
+static inline bool isvalid_ds( const unsigned dictionary_size )
+ { return ( dictionary_size >= min_dictionary_size &&
+ dictionary_size <= max_dictionary_size ); }
+
+
static inline int real_bits( unsigned value )
{
int bits = 0;
@@ -205,6 +213,14 @@ static inline void Fh_set_magic( File_header data )
static inline bool Fh_verify_magic( const File_header data )
{ return ( memcmp( data, magic_string, 4 ) == 0 ); }
+/* detect truncated header */
+static inline bool Fh_verify_prefix( const File_header data, const int size )
+ {
+ int i; for( i = 0; i < size && i < 4; ++i )
+ if( data[i] != magic_string[i] ) return false;
+ return ( size > 0 );
+ }
+
static inline uint8_t Fh_version( const File_header data )
{ return data[4]; }
@@ -221,21 +237,18 @@ static inline unsigned Fh_get_dictionary_size( const File_header data )
static inline bool Fh_set_dictionary_size( File_header data, const unsigned sz )
{
- if( sz >= min_dictionary_size && sz <= max_dictionary_size )
+ if( !isvalid_ds( sz ) ) return false;
+ data[5] = real_bits( sz - 1 );
+ if( sz > min_dictionary_size )
{
- data[5] = real_bits( sz - 1 );
- if( sz > min_dictionary_size )
- {
- const unsigned base_size = 1 << data[5];
- const unsigned fraction = base_size / 16;
- int i;
- for( i = 7; i >= 1; --i )
- if( base_size - ( i * fraction ) >= sz )
- { data[5] |= ( i << 5 ); break; }
- }
- return true;
+ const unsigned base_size = 1 << data[5];
+ const unsigned fraction = base_size / 16;
+ int i;
+ for( i = 7; i >= 1; --i )
+ if( base_size - ( i * fraction ) >= sz )
+ { data[5] |= ( i << 5 ); break; }
}
- return false;
+ return true;
}
diff --git a/main.c b/main.c
index a080ae8..ecf8dd8 100644
--- a/main.c
+++ b/main.c
@@ -1,5 +1,5 @@
/* Clzip - LZMA lossless data compressor
- Copyright (C) 2010-2015 Antonio Diaz Diaz.
+ Copyright (C) 2010-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -23,6 +23,7 @@
#define _FILE_OFFSET_BITS 64
+#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
@@ -66,10 +67,11 @@
#error "Environments where CHAR_BIT != 8 are not supported."
#endif
+int verbosity = 0;
const char * const Program_name = "Clzip";
const char * const program_name = "clzip";
-const char * const program_year = "2015";
+const char * const program_year = "2016";
const char * invocation_name = 0;
struct { const char * from; const char * to; } const known_extensions[] = {
@@ -87,10 +89,6 @@ enum Mode { m_compress, m_decompress, m_test };
char * output_filename = 0;
int outfd = -1;
-int verbosity = 0;
-const mode_t usr_rw = S_IRUSR | S_IWUSR;
-const mode_t all_rw = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
-mode_t outfd_mode = S_IRUSR | S_IWUSR;
bool delete_output_on_interrupt = false;
@@ -101,14 +99,15 @@ static void show_help( void )
printf( "\nOptions:\n"
" -h, --help display this help and exit\n"
" -V, --version output version information and exit\n"
+ " -a, --trailing-error exit with error status if trailing data\n"
" -b, --member-size=<bytes> set member size limit in bytes\n"
- " -c, --stdout send output to standard output\n"
+ " -c, --stdout write to standard output, keep input files\n"
" -d, --decompress decompress\n"
" -f, --force overwrite existing output files\n"
" -F, --recompress force re-compression of compressed files\n"
" -k, --keep keep (don't delete) input files\n"
" -m, --match-length=<bytes> set match length limit in bytes [36]\n"
- " -o, --output=<file> if reading stdin, place the output into <file>\n"
+ " -o, --output=<file> if reading standard input, write to <file>\n"
" -q, --quiet suppress all messages\n"
" -s, --dictionary-size=<bytes> set dictionary size limit in bytes [8 MiB]\n"
" -S, --volume-size=<bytes> set volume size limit in bytes\n"
@@ -117,13 +116,15 @@ static void show_help( void )
" -0 .. -9 set compression level [default 6]\n"
" --fast alias for -0\n"
" --best alias for -9\n"
- "If no file names are given, clzip compresses or decompresses\n"
- "from standard input to standard output.\n"
+ "If no file names are given, or if a file is '-', clzip compresses or\n"
+ "decompresses from standard input to standard output.\n"
"Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
"Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
- "The bidimensional parameter space of LZMA can't be mapped to a linear\n"
+ "Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12\n"
+ "to 2^29 bytes.\n"
+ "\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n"
"scale optimal for all files. If your files are large, very repetitive,\n"
- "etc, you may need to use the --match-length and --dictionary-size\n"
+ "etc, you may need to use the --dictionary-size and --match-length\n"
"options directly to achieve optimal performance.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
@@ -181,11 +182,10 @@ static unsigned long long getnum( const char * const ptr,
if( !errno && tail[0] )
{
const int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
- int exponent = 0, i;
- bool bad_multiplier = false;
+ int exponent = 0; /* 0 = bad multiplier */
+ int i;
switch( tail[0] )
{
- case ' ': break;
case 'Y': exponent = 8; break;
case 'Z': exponent = 7; break;
case 'E': exponent = 6; break;
@@ -193,13 +193,10 @@ static unsigned long long getnum( const char * const ptr,
case 'T': exponent = 4; break;
case 'G': exponent = 3; break;
case 'M': exponent = 2; break;
- case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true;
- break;
- case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true;
- break;
- default : bad_multiplier = true;
+ case 'K': if( factor == 1024 ) exponent = 1; break;
+ case 'k': if( factor == 1000 ) exponent = 1; break;
}
- if( bad_multiplier )
+ if( exponent <= 0 )
{
show_error( "Bad multiplier in numerical argument.", 0, true );
exit( 1 );
@@ -274,7 +271,7 @@ static int open_instream( const char * const name, struct stat * const in_statsp
const bool can_read = ( i == 0 &&
( S_ISBLK( mode ) || S_ISCHR( mode ) ||
S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
- const bool no_ofile = to_stdout || program_mode == m_test;
+ const bool no_ofile = ( to_stdout || program_mode == m_test );
if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || !no_ofile ) ) )
{
if( verbosity >= 0 )
@@ -340,13 +337,17 @@ static void set_d_outname( const char * const name, const int i )
}
-static bool open_outstream( const bool force )
+static bool open_outstream( const bool force, const bool from_stdin )
{
+ const mode_t usr_rw = S_IRUSR | S_IWUSR;
+ const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
+ const mode_t outfd_mode = from_stdin ? all_rw : usr_rw;
int flags = O_CREAT | O_WRONLY | O_BINARY;
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
outfd = open( output_filename, flags, outfd_mode );
- if( outfd < 0 && verbosity >= 0 )
+ if( outfd >= 0 ) delete_output_on_interrupt = true;
+ else if( verbosity >= 0 )
{
if( errno == EEXIST )
fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
@@ -407,7 +408,11 @@ static void close_and_set_permissions( const struct stat * const in_statsp )
fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 )
warning = true;
}
- if( close( outfd ) != 0 ) cleanup_and_fail( 1 );
+ if( close( outfd ) != 0 )
+ {
+ show_error( "Error closing output file", errno, false );
+ cleanup_and_fail( 1 );
+ }
outfd = -1;
delete_output_on_interrupt = false;
if( in_statsp )
@@ -481,8 +486,8 @@ static int compress( const unsigned long long member_size,
}
if( error )
{
- show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
- cleanup_and_fail( 1 );
+ Pp_show_msg( pp, "Not enough memory. Try a smaller dictionary size." );
+ return 1;
}
}
@@ -508,8 +513,7 @@ static int compress( const unsigned long long member_size,
close_and_set_permissions( in_statsp );
if( !next_filename() )
{ Pp_show_msg( pp, "Too many volume files." ); retval = 1; break; }
- if( !open_outstream( true ) ) { retval = 1; break; }
- delete_output_on_interrupt = true;
+ if( !open_outstream( true, !in_statsp ) ) { retval = 1; break; }
}
}
}
@@ -534,8 +538,51 @@ static int compress( const unsigned long long member_size,
}
+static unsigned char xdigit( const int value )
+ {
+ if( value >= 0 && value <= 9 ) return '0' + value;
+ if( value >= 10 && value <= 15 ) return 'A' + value - 10;
+ return 0;
+ }
+
+
+static bool show_trailing_data( const uint8_t * const data, const int size,
+ struct Pretty_print * const pp, const bool all,
+ const bool ignore_trailing )
+ {
+ if( verbosity >= 4 || !ignore_trailing )
+ {
+ int i;
+ char buf[80];
+ int len = snprintf( buf, sizeof buf, "%strailing data = ",
+ all ? "" : "first bytes of " );
+ bool text = true;
+ for( i = 0; i < size; ++i )
+ if( !isprint( data[i] ) ) { text = false; break; }
+ if( text )
+ {
+ if( len > 0 && len < (int)sizeof buf )
+ snprintf( buf + len, sizeof buf - len, "'%.*s'", size, (const char *)data );
+ }
+ else
+ {
+ for( i = 0; i < size && len > 0 && len + 3 < (int)sizeof buf; ++i )
+ {
+ if( i > 0 ) buf[len++] = ' ';
+ buf[len++] = xdigit( data[i] >> 4 );
+ buf[len++] = xdigit( data[i] & 0x0F );
+ buf[len] = 0;
+ }
+ }
+ Pp_show_msg( pp, buf );
+ if( !ignore_trailing ) show_error( "Trailing data not allowed.", 0, false );
+ }
+ return ignore_trailing;
+ }
+
+
static int decompress( const int infd, struct Pretty_print * const pp,
- const bool testing )
+ const bool ignore_trailing, const bool testing )
{
unsigned long long partial_file_pos = 0;
struct Range_decoder rdec;
@@ -549,24 +596,30 @@ static int decompress( const int infd, struct Pretty_print * const pp,
for( first_member = true; ; first_member = false )
{
- int result;
+ int result, size;
unsigned dictionary_size;
File_header header;
struct LZ_decoder decoder;
Rd_reset_member_position( &rdec );
- Rd_read_data( &rdec, header, Fh_size );
+ size = Rd_read_data( &rdec, header, Fh_size );
if( Rd_finished( &rdec ) ) /* End Of File */
{
- if( first_member )
+ if( first_member || Fh_verify_prefix( header, size ) )
{ Pp_show_msg( pp, "File ends unexpectedly at member header." );
retval = 2; }
+ else if( size > 0 && !show_trailing_data( header, size, pp,
+ true, ignore_trailing ) )
+ retval = 2;
break;
}
if( !Fh_verify_magic( header ) )
{
- if( !first_member ) break; /* trailing garbage */
- Pp_show_msg( pp, "Bad magic number (file not in lzip format)." );
- retval = 2; break;
+ if( first_member )
+ { Pp_show_msg( pp, "Bad magic number (file not in lzip format)." );
+ retval = 2; }
+ else if( !show_trailing_data( header, size, pp, false, ignore_trailing ) )
+ retval = 2;
+ break;
}
if( !Fh_verify_version( header ) )
{
@@ -577,8 +630,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
retval = 2; break;
}
dictionary_size = Fh_get_dictionary_size( header );
- if( dictionary_size < min_dictionary_size ||
- dictionary_size > max_dictionary_size )
+ if( !isvalid_ds( dictionary_size ) )
{ Pp_show_msg( pp, "Invalid dictionary size in member header." );
retval = 2; break; }
@@ -586,10 +638,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
{ Pp_show_msg( pp, 0 ); show_header( dictionary_size ); }
if( !LZd_init( &decoder, &rdec, dictionary_size, outfd ) )
- {
- show_error( "Not enough memory.", 0, false );
- cleanup_and_fail( 1 );
- }
+ { Pp_show_msg( pp, "Not enough memory." ); retval = 1; break; }
result = LZd_decode_member( &decoder, pp );
partial_file_pos += Rd_member_position( &rdec );
LZd_free( &decoder );
@@ -631,18 +680,16 @@ static void set_signals( void )
void show_error( const char * const msg, const int errcode, const bool help )
{
- if( verbosity >= 0 )
+ if( verbosity < 0 ) return;
+ if( msg && msg[0] )
{
- if( msg && msg[0] )
- {
- fprintf( stderr, "%s: %s", program_name, msg );
- if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) );
- fputc( '\n', stderr );
- }
- if( help )
- fprintf( stderr, "Try '%s --help' for more information.\n",
- invocation_name );
+ fprintf( stderr, "%s: %s", program_name, msg );
+ if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) );
+ fputc( '\n', stderr );
}
+ if( help )
+ fprintf( stderr, "Try '%s --help' for more information.\n",
+ invocation_name );
}
@@ -664,18 +711,16 @@ void show_progress( const unsigned long long partial_size,
static const struct Matchfinder_base * mb = 0;
static struct Pretty_print * pp = 0;
- if( verbosity >= 2 )
+ if( verbosity < 2 ) return;
+ if( m ) /* initialize static vars */
+ { csize = cfile_size; psize = partial_size; mb = m; pp = p; }
+ if( mb && pp )
{
- if( m ) /* initialize static vars */
- { csize = cfile_size; psize = partial_size; mb = m; pp = p; }
- if( mb && pp )
- {
- const unsigned long long pos = psize + Mb_data_position( mb );
- if( csize > 0 )
- fprintf( stderr, "%4llu%%", pos / csize );
- fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
- Pp_reset( pp ); Pp_show_msg( pp, 0 ); /* restore cursor position */
- }
+ const unsigned long long pos = psize + Mb_data_position( mb );
+ if( csize > 0 )
+ fprintf( stderr, "%4llu%%", pos / csize );
+ fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
+ Pp_reset( pp ); Pp_show_msg( pp, 0 ); /* restore cursor position */
}
}
@@ -712,7 +757,9 @@ int main( const int argc, const char * const argv[] )
int i;
bool filenames_given = false;
bool force = false;
+ bool ignore_trailing = true;
bool keep_input_files = false;
+ bool stdin_used = false;
bool recompress = false;
bool to_stdout = false;
bool zero = false;
@@ -730,6 +777,7 @@ int main( const int argc, const char * const argv[] )
{ '7', 0, ap_no },
{ '8', 0, ap_no },
{ '9', "best", ap_no },
+ { 'a', "trailing-error", ap_no },
{ 'b', "member-size", ap_yes },
{ 'c', "stdout", ap_no },
{ 'd', "decompress", ap_no },
@@ -769,6 +817,7 @@ int main( const int argc, const char * const argv[] )
case '5': case '6': case '7': case '8': case '9':
zero = ( code == '0' );
encoder_options = option_mapping[code-'0']; break;
+ case 'a': ignore_trailing = false; break;
case 'b': member_size = getnum( arg, 100000, max_member_size ); break;
case 'c': to_stdout = true; break;
case 'd': program_mode = m_decompress; break;
@@ -819,7 +868,7 @@ int main( const int argc, const char * const argv[] )
( filenames_given || default_output_filename[0] ) )
set_signals();
- Pp_init( &pp, filenames, num_filenames );
+ Pp_init( &pp, filenames, num_filenames, verbosity );
output_filename = resize_buffer( output_filename, 1 );
for( i = 0; i < num_filenames; ++i )
@@ -831,6 +880,7 @@ int main( const int argc, const char * const argv[] )
if( !filenames[i][0] || strcmp( filenames[i], "-" ) == 0 )
{
+ if( stdin_used ) continue; else stdin_used = true;
input_filename = "";
infd = STDIN_FILENO;
if( program_mode != m_test )
@@ -844,11 +894,10 @@ int main( const int argc, const char * const argv[] )
else
{
output_filename = resize_buffer( output_filename,
- strlen( default_output_filename ) + 1 );
+ strlen( default_output_filename ) + 1 );
strcpy( output_filename, default_output_filename );
}
- outfd_mode = all_rw;
- if( !open_outstream( force ) )
+ if( !open_outstream( force, true ) )
{
if( retval < 1 ) retval = 1;
close( infd ); infd = -1;
@@ -872,8 +921,7 @@ int main( const int argc, const char * const argv[] )
if( program_mode == m_compress )
set_c_outname( input_filename, volume_size > 0 );
else set_d_outname( input_filename, eindex );
- outfd_mode = usr_rw;
- if( !open_outstream( force ) )
+ if( !open_outstream( force, false ) )
{
if( retval < 1 ) retval = 1;
close( infd ); infd = -1;
@@ -883,17 +931,19 @@ int main( const int argc, const char * const argv[] )
}
}
- if( !check_tty( infd, program_mode ) ) return 1;
+ if( !check_tty( infd, program_mode ) )
+ {
+ if( retval < 1 ) retval = 1;
+ cleanup_and_fail( retval );
+ }
- if( output_filename[0] && !to_stdout && program_mode != m_test )
- delete_output_on_interrupt = true;
in_statsp = input_filename[0] ? &in_stats : 0;
Pp_set_name( &pp, input_filename );
if( program_mode == m_compress )
tmp = compress( member_size, volume_size, infd, &encoder_options, &pp,
in_statsp, zero );
else
- tmp = decompress( infd, &pp, program_mode == m_test );
+ tmp = decompress( infd, &pp, ignore_trailing, program_mode == m_test );
if( tmp > retval ) retval = tmp;
if( tmp && program_mode != m_test ) cleanup_and_fail( retval );
diff --git a/testsuite/check.sh b/testsuite/check.sh
index f64a090..52347b4 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -1,6 +1,6 @@
#! /bin/sh
# check script for Clzip - LZMA lossless data compressor
-# Copyright (C) 2010-2015 Antonio Diaz Diaz.
+# Copyright (C) 2010-2016 Antonio Diaz Diaz.
#
# This script is free software: you have unlimited permission
# to copy, distribute and modify it.
@@ -17,9 +17,16 @@ if [ ! -f "${LZIP}" ] || [ ! -x "${LZIP}" ] ; then
exit 1
fi
+if [ -e "${LZIP}" ] 2> /dev/null ; then true
+else
+ echo "$0: a POSIX shell is required to run the tests"
+ echo "Try bash -c \"$0 $1 $2\""
+ exit 1
+fi
+
if [ -d tmp ] ; then rm -rf tmp ; fi
mkdir tmp
-cd "${objdir}"/tmp
+cd "${objdir}"/tmp || framework_failure
cat "${testdir}"/test.txt > in || framework_failure
in_lz="${testdir}"/test.txt.lz
@@ -27,25 +34,22 @@ fail=0
printf "testing clzip-%s..." "$2"
-"${LZIP}" -cqm4 in > /dev/null
-if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
-"${LZIP}" -cqm274 in > /dev/null
-if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
-"${LZIP}" -cqs-1 in > /dev/null
-if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
-"${LZIP}" -cqs0 in > /dev/null
-if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
-"${LZIP}" -cqs4095 in > /dev/null
-if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
-"${LZIP}" -cqs513MiB in > /dev/null
-if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
-printf " in: Bad magic number (file not in lzip format).\n" > msg
-"${LZIP}" -t in 2> out
-if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi
-printf " (stdin): Bad magic number (file not in lzip format).\n" > msg
-"${LZIP}" -t < in 2> out
-if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi
-rm -f out msg
+"${LZIP}" -fkqm4 in
+if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -fkqm274 in
+if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -fkqs-1 in
+if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -fkqs0 in
+if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -fkqs4095 in
+if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -fkqs513MiB in
+if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -tq in
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -tq < in
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -cdq in
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -cdq < in
@@ -55,26 +59,53 @@ if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" -tq
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
-"${LZIP}" -t "${in_lz}" || fail=1
+printf "\ntesting decompression..."
+
+"${LZIP}" -t "${in_lz}"
+if [ $? = 0 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -cd "${in_lz}" > copy || fail=1
cmp in copy || fail=1
printf .
+rm -f copy
cat "${in_lz}" > copy.lz || framework_failure
-printf "to be overwritten" > copy || framework_failure
-"${LZIP}" -df copy.lz || fail=1
+"${LZIP}" -dk copy.lz || fail=1
cmp in copy || fail=1
-printf .
+printf "to be overwritten" > copy || framework_failure
+"${LZIP}" -dq copy.lz
+if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -df copy.lz
+if [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; then
+ printf . ; else printf - ; fail=1 ; fi
printf "to be overwritten" > copy || framework_failure
"${LZIP}" -df -o copy < "${in_lz}" || fail=1
cmp in copy || fail=1
printf .
+rm -f copy
"${LZIP}" < in > anyothername || fail=1
-"${LZIP}" -d anyothername || fail=1
-cmp in anyothername.out || fail=1
-printf .
+"${LZIP}" -d -o copy - anyothername - < "${in_lz}"
+if [ $? = 0 ] && cmp in copy && cmp in anyothername.out ; then
+ printf . ; else printf - ; fail=1 ; fi
+rm -f copy anyothername.out
+
+"${LZIP}" -tq in "${in_lz}"
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -tq foo.lz "${in_lz}"
+if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -cdq in "${in_lz}" > copy
+if [ $? = 2 ] && cat copy in | cmp in - ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -cdq foo.lz "${in_lz}" > copy
+if [ $? = 1 ] && cmp in copy ; then printf . ; else printf - ; fail=1 ; fi
+rm -f copy
+cat "${in_lz}" > copy.lz || framework_failure
+"${LZIP}" -dq in copy.lz
+if [ $? = 2 ] && [ -e copy.lz ] && [ ! -e copy ] && [ ! -e in.out ] ; then
+ printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -dq foo.lz copy.lz
+if [ $? = 1 ] && [ ! -e copy.lz ] && [ ! -e foo ] && cmp in copy ; then
+ printf . ; else printf - ; fail=1 ; fi
cat in in > in2 || framework_failure
"${LZIP}" -o copy2 < in2 || fail=1
@@ -84,12 +115,23 @@ cmp in2 copy2 || fail=1
printf .
printf "garbage" >> copy2.lz || framework_failure
+rm -f copy2
+"${LZIP}" -atq copy2.lz
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -atq < copy2.lz
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -adkq copy2.lz
+if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -adkq -o copy2 < copy2.lz
+if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi
printf "to be overwritten" > copy2 || framework_failure
"${LZIP}" -df copy2.lz || fail=1
cmp in2 copy2 || fail=1
printf .
-"${LZIP}" -cfq "${in_lz}" > out
+printf "\ntesting compression..."
+
+"${LZIP}" -cfq "${in_lz}" > out # /dev/null is a tty on OS/2
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -cF "${in_lz}" > out || fail=1
"${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1