summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog21
-rw-r--r--INSTALL4
-rw-r--r--Makefile.in9
-rw-r--r--NEWS14
-rw-r--r--README6
-rw-r--r--arg_parser.cc2
-rw-r--r--arg_parser.h2
-rwxr-xr-xconfigure14
-rw-r--r--decoder.cc41
-rw-r--r--decoder.h14
-rw-r--r--doc/lzip.117
-rw-r--r--doc/lzip.info210
-rw-r--r--doc/lzip.texi187
-rw-r--r--encoder.cc7
-rw-r--r--encoder.h2
-rw-r--r--encoder_base.cc6
-rw-r--r--encoder_base.h2
-rw-r--r--fast_encoder.cc4
-rw-r--r--fast_encoder.h2
-rw-r--r--lzip.h40
-rw-r--r--main.cc158
-rwxr-xr-xtestsuite/check.sh80
22 files changed, 502 insertions, 340 deletions
diff --git a/ChangeLog b/ChangeLog
index 113206d..cd488f6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,9 +1,16 @@
-2015-08-13 Antonio Diaz Diaz <antonio@gnu.org>
+2016-05-14 Antonio Diaz Diaz <antonio@gnu.org>
- * Version 1.18-pre1 released.
+ * Version 1.18 released.
* main.cc: Added new option '-a, --trailing-error'.
* Decompression time has been reduced by 2%.
+ * decoder.cc (verify_trailer): Removed test of final code.
+ * main.cc (main): Delete '--output' file if infd is a terminal.
+ * main.cc (main): Don't use stdin more than once.
+ * Removed decompression support for version 0 files.
* lzip.texi: Added chapter 'Trailing data'.
+ * configure: Avoid warning on some shells when testing for g++.
+ * Makefile.in: Detect the existence of install-info.
+ * testsuite/check.sh: A POSIX shell is required to run the tests.
* testsuite/check.sh: Don't check error messages.
2015-07-12 Antonio Diaz Diaz <antonio@gnu.org>
@@ -28,7 +35,7 @@
* Version 1.15 released.
* Show progress of compression at verbosity level 2 (-vv).
- * main.cc (show_header): Do not show header version.
+ * main.cc (show_header): Don't show header version.
* Ignore option '-n, --threads' for compatibility with plzip.
* configure: Options now accept a separate argument.
* lzip.texinfo: Added chapter 'Stream format' and appendix
@@ -71,10 +78,10 @@
by up to 6%.
* Compression time of option '-0' has been reduced by 2%.
* main.cc (decompress): Print only one status line for each
- multi-member file when only one '-v' is specified.
+ multimember file when only one '-v' is specified.
* main.cc (decompress): Print up to 6 bytes of trailing data
when '-vvvv' is specified.
- * main.cc (open_instream): Do not show the message
+ * main.cc (open_instream): Don't show the message
" and '--stdout' was not specified" for directories, etc.
* lziprecover.cc: If '-v' is not specified show errors only.
* testsuite/unzcrash.cc: Use Arg_parser.
@@ -105,7 +112,7 @@
* lziprecover.cc: Added new option '-f, --force'.
* lziprecover.cc: Added new option '-o, --output'.
* lziprecover.cc: Added new option '-s, --split' to select the
- until now only operation of splitting multi-member files.
+ until now only operation of splitting multimember files.
* lziprecover.cc: If no operation is specified, warn the user
and do nothing.
* main.cc: Fixed warning about fchown's return value being ignored.
@@ -256,7 +263,7 @@
* Version 0.1 released.
-Copyright (C) 2008-2015 Antonio Diaz Diaz.
+Copyright (C) 2008-2016 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute and
diff --git a/INSTALL b/INSTALL
index 663205c..28a3751 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,7 +1,7 @@
Requirements
------------
You will need a C++ compiler.
-I use gcc 4.9.1 and 4.1.2, but the code should compile with any
+I use gcc 5.3.0 and 4.1.2, but the code should compile with any
standards compliant compiler.
Gcc is available at http://gcc.gnu.org.
@@ -58,7 +58,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above.
-Copyright (C) 2008-2015 Antonio Diaz Diaz.
+Copyright (C) 2008-2016 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.
diff --git a/Makefile.in b/Makefile.in
index af75ce6..88109fb 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -5,6 +5,7 @@ INSTALL_PROGRAM = $(INSTALL) -m 755
INSTALL_DATA = $(INSTALL) -m 644
INSTALL_DIR = $(INSTALL) -d -m 755
SHELL = /bin/sh
+CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
objs = arg_parser.o encoder_base.o encoder.o fast_encoder.o decoder.o main.o
@@ -69,7 +70,9 @@ install-info :
if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
$(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info"
- -install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info"
+ -if $(CAN_RUN_INSTALLINFO) ; then \
+ install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \
+ fi
install-info-compress : install-info
lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info"
@@ -88,7 +91,9 @@ uninstall-bin :
-rm -f "$(DESTDIR)$(bindir)/$(progname)"
uninstall-info :
- -install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info"
+ -if $(CAN_RUN_INSTALLINFO) ; then \
+ install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \
+ fi
-rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
uninstall-man :
diff --git a/NEWS b/NEWS
index 86b75f3..0bd6dce 100644
--- a/NEWS
+++ b/NEWS
@@ -6,7 +6,17 @@ member, has been added.
Decompression time has been reduced by 2%.
+The test of the value remaining in the range decoder has been removed.
+(After extensive testing it has been found useless to detect corruption
+in the decompressed data. Eliminating it reduces the number of false
+positives for corruption and makes error detection more accurate).
+
+When decompressing, the file specified with the '--output' option is now
+deleted if the input is a terminal.
+
+Decompression support for version 0 files has been removed.
+
The new chapter "Trailing data" has been added to the manual.
-Fixed a harmless check failure on Windows caused by the failed
-comparison of a message in text mode.
+A harmless check failure on Windows, caused by the failed comparison of
+a message in text mode, has been fixed.
diff --git a/README b/README
index 8a31263..b8a399a 100644
--- a/README
+++ b/README
@@ -75,14 +75,14 @@ or more compressed files. The result is the concatenation of the
corresponding uncompressed files. Integrity testing of concatenated
compressed files is also supported.
-Lzip can produce multi-member files and safely recover, with
+Lzip can produce multimember files and safely recover, with
lziprecover, the undamaged members in case of file damage. Lzip can
also split the compressed output in volumes of a given size, even when
reading from standard input. This allows the direct creation of
multivolume compressed tar archives.
Lzip is able to compress and decompress streams of unlimited size by
-automatically creating multi-member output. The members so created are
+automatically creating multimember output. The members so created are
large, about 2 PiB each.
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
@@ -110,7 +110,7 @@ range encoding), Igor Pavlov (for putting all the above together in
LZMA), and Julian Seward (for bzip2's CLI).
-Copyright (C) 2008-2015 Antonio Diaz Diaz.
+Copyright (C) 2008-2016 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.
diff --git a/arg_parser.cc b/arg_parser.cc
index 551264a..82972ad 100644
--- a/arg_parser.cc
+++ b/arg_parser.cc
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
- Copyright (C) 2006-2015 Antonio Diaz Diaz.
+ Copyright (C) 2006-2016 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
diff --git a/arg_parser.h b/arg_parser.h
index 3dc85d0..f45b9ac 100644
--- a/arg_parser.h
+++ b/arg_parser.h
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
- Copyright (C) 2006-2015 Antonio Diaz Diaz.
+ Copyright (C) 2006-2016 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
diff --git a/configure b/configure
index ebf6fb9..caaa245 100755
--- a/configure
+++ b/configure
@@ -1,12 +1,12 @@
#! /bin/sh
# configure script for Lzip - LZMA lossless data compressor
-# Copyright (C) 2008-2015 Antonio Diaz Diaz.
+# Copyright (C) 2008-2016 Antonio Diaz Diaz.
#
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
pkgname=lzip
-pkgversion=1.18-pre1
+pkgversion=1.18
progname=lzip
srctrigger=doc/${pkgname}.texi
@@ -26,8 +26,8 @@ CXXFLAGS='-Wall -W -O2'
LDFLAGS=
# checking whether we are using GNU C++.
-${CXX} --version > /dev/null 2>&1
-if [ $? != 0 ] ; then
+if /bin/sh -c "${CXX} --version" > /dev/null 2>&1 ; then true
+else
CXX=c++
CXXFLAGS='-W -O2'
fi
@@ -139,7 +139,7 @@ if [ -z "${no_create}" ] ; then
rm -f config.status
cat > config.status << EOF
#! /bin/sh
-# This file was generated automatically by configure. Do not edit.
+# This file was generated automatically by configure. Don't edit.
# Run this file to recreate the current configuration.
#
# This script is free software: you have unlimited permission
@@ -165,8 +165,8 @@ echo "LDFLAGS = ${LDFLAGS}"
rm -f Makefile
cat > Makefile << EOF
# Makefile for Lzip - LZMA lossless data compressor
-# Copyright (C) 2008-2015 Antonio Diaz Diaz.
-# This file was generated automatically by configure. Do not edit.
+# Copyright (C) 2008-2016 Antonio Diaz Diaz.
+# This file was generated automatically by configure. Don't edit.
#
# This Makefile is free software: you have unlimited permission
# to copy, distribute and modify it.
diff --git a/decoder.cc b/decoder.cc
index f773e57..1e51e0b 100644
--- a/decoder.cc
+++ b/decoder.cc
@@ -1,5 +1,5 @@
/* Lzip - LZMA lossless data compressor
- Copyright (C) 2008-2015 Antonio Diaz Diaz.
+ Copyright (C) 2008-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -31,9 +31,6 @@
#include "decoder.h"
-const CRC32 crc32;
-
-
void Pretty_print::operator()( const char * const msg ) const
{
if( verbosity >= 0 )
@@ -110,7 +107,8 @@ void LZ_decoder::flush_data()
crc32.update_buf( crc_, buffer + stream_pos, size );
if( outfd >= 0 && writeblock( outfd, buffer + stream_pos, size ) != size )
throw Error( "Write error" );
- if( pos >= dictionary_size ) { partial_data_pos += pos; pos = 0; }
+ if( pos >= dictionary_size )
+ { partial_data_pos += pos; pos = 0; pos_wrapped = true; }
stream_pos = pos;
}
}
@@ -119,12 +117,12 @@ void LZ_decoder::flush_data()
bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
{
File_trailer trailer;
- const int trailer_size = File_trailer::size( member_version );
- const unsigned long long member_size = rdec.member_position() + trailer_size;
+ int size = rdec.read_data( trailer.data, File_trailer::size );
+ const unsigned long long data_size = data_position();
+ const unsigned long long member_size = rdec.member_position();
bool error = false;
- int size = rdec.read_data( trailer.data, trailer_size );
- if( size < trailer_size )
+ if( size < File_trailer::size )
{
error = true;
if( verbosity >= 0 )
@@ -133,16 +131,9 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
std::fprintf( stderr, "Trailer truncated at trailer position %d;"
" some checks may fail.\n", size );
}
- while( size < trailer_size ) trailer.data[size++] = 0;
+ while( size < File_trailer::size ) trailer.data[size++] = 0;
}
- if( member_version == 0 ) trailer.member_size( member_size );
-
- if( !rdec.code_is_zero() )
- {
- error = true;
- pp( "Range decoder final code is not zero." );
- }
if( trailer.data_crc() != crc() )
{
error = true;
@@ -153,14 +144,14 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
trailer.data_crc(), crc() );
}
}
- if( trailer.data_size() != data_position() )
+ if( trailer.data_size() != data_size )
{
error = true;
if( verbosity >= 0 )
{
pp();
std::fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX)\n",
- trailer.data_size(), data_position(), data_position() );
+ trailer.data_size(), data_size, data_size );
}
}
if( trailer.member_size() != member_size )
@@ -173,14 +164,14 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
trailer.member_size(), member_size, member_size );
}
}
- if( !error && verbosity >= 2 && data_position() > 0 && member_size > 0 )
+ if( !error && verbosity >= 2 && data_size > 0 && member_size > 0 )
std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
- (double)data_position() / member_size,
- ( 8.0 * member_size ) / data_position(),
- 100.0 * ( 1.0 - ( (double)member_size / data_position() ) ) );
+ (double)data_size / member_size,
+ ( 8.0 * member_size ) / data_size,
+ 100.0 * ( 1.0 - ( (double)member_size / data_size ) ) );
if( !error && verbosity >= 4 )
std::fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ",
- trailer.data_crc(), trailer.data_size(), trailer.member_size() );
+ crc(), data_size, member_size );
return !error;
}
@@ -296,7 +287,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
}
rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
state.set_match();
- if( rep0 >= dictionary_size || rep0 >= data_position() )
+ if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
{ flush_data(); return 1; }
}
copy_block( rep0, len );
diff --git a/decoder.h b/decoder.h
index f0d2de6..df3b46f 100644
--- a/decoder.h
+++ b/decoder.h
@@ -1,5 +1,5 @@
/* Lzip - LZMA lossless data compressor
- Copyright (C) 2008-2015 Antonio Diaz Diaz.
+ Copyright (C) 2008-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -47,14 +47,14 @@ public:
~Range_decoder() { delete[] buffer; }
- bool code_is_zero() const { return ( code == 0 ); }
bool finished() { return pos >= stream_pos && !read_block(); }
unsigned long long member_position() const { return partial_member_pos + pos; }
void reset_member_position() { partial_member_pos = -pos; }
uint8_t get_byte()
{
- if( finished() ) return 0xAA; // make code != 0
+ // 0xFF avoids decoder error if member is truncated at EOS marker
+ if( finished() ) return 0xFF;
return buffer[pos++];
}
@@ -217,7 +217,7 @@ class LZ_decoder
unsigned stream_pos; // first byte not yet written to file
uint32_t crc_;
const int outfd; // output file descriptor
- const int member_version;
+ bool pos_wrapped;
void flush_data();
bool verify_trailer( const Pretty_print & pp ) const;
@@ -267,17 +267,17 @@ class LZ_decoder
void operator=( const LZ_decoder & ); // declared as private
public:
- LZ_decoder( const File_header & header, Range_decoder & rde, const int ofd )
+ LZ_decoder( Range_decoder & rde, const unsigned dict_size, const int ofd )
:
partial_data_pos( 0 ),
rdec( rde ),
- dictionary_size( header.dictionary_size() ),
+ dictionary_size( dict_size ),
buffer( new uint8_t[dictionary_size] ),
pos( 0 ),
stream_pos( 0 ),
crc_( 0xFFFFFFFFU ),
outfd( ofd ),
- member_version( header.version() )
+ pos_wrapped( false )
{ buffer[dictionary_size-1] = 0; } // prev_byte of first byte
~LZ_decoder() { delete[] buffer; }
diff --git a/doc/lzip.1 b/doc/lzip.1
index aa0e5ff..d0e6649 100644
--- a/doc/lzip.1
+++ b/doc/lzip.1
@@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
-.TH LZIP "1" "August 2015" "lzip 1.18-pre1" "User Commands"
+.TH LZIP "1" "May 2016" "lzip 1.18" "User Commands"
.SH NAME
lzip \- reduces the size of files
.SH SYNOPSIS
@@ -22,7 +22,7 @@ exit with error status if trailing data
set member size limit in bytes
.TP
\fB\-c\fR, \fB\-\-stdout\fR
-send output to standard output
+write to standard output, keep input files
.TP
\fB\-d\fR, \fB\-\-decompress\fR
decompress
@@ -40,7 +40,7 @@ keep (don't delete) input files
set match length limit in bytes [36]
.TP
\fB\-o\fR, \fB\-\-output=\fR<file>
-if reading stdin, place the output into <file>
+if reading standard input, write to <file>
.TP
\fB\-q\fR, \fB\-\-quiet\fR
suppress all messages
@@ -66,13 +66,16 @@ alias for \fB\-0\fR
\fB\-\-best\fR
alias for \fB\-9\fR
.PP
-If no file names are given, lzip compresses or decompresses
-from standard input to standard output.
+If no file names are given, or if a file is '\-', lzip compresses or
+decompresses from standard input to standard output.
Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,
Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...
+Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12
+to 2^29 bytes.
+.PP
The bidimensional parameter space of LZMA can't be mapped to a linear
scale optimal for all files. If your files are large, very repetitive,
-etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR
+etc, you may need to use the \fB\-\-dictionary\-size\fR and \fB\-\-match\-length\fR
options directly to achieve optimal performance.
.PP
Exit status: 0 for a normal exit, 1 for environmental problems (file
@@ -84,7 +87,7 @@ Report bugs to lzip\-bug@nongnu.org
.br
Lzip home page: http://www.nongnu.org/lzip/lzip.html
.SH COPYRIGHT
-Copyright \(co 2015 Antonio Diaz Diaz.
+Copyright \(co 2016 Antonio Diaz Diaz.
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
.br
This is free software: you are free to change and redistribute it.
diff --git a/doc/lzip.info b/doc/lzip.info
index 71d8f8e..0210f9e 100644
--- a/doc/lzip.info
+++ b/doc/lzip.info
@@ -11,7 +11,7 @@ File: lzip.info, Node: Top, Next: Introduction, Up: (dir)
Lzip Manual
***********
-This manual is for Lzip (version 1.18-pre1, 13 August 2015).
+This manual is for Lzip (version 1.18, 14 May 2016).
* Menu:
@@ -28,7 +28,7 @@ This manual is for Lzip (version 1.18-pre1, 13 August 2015).
* Concept index:: Index of concepts
- Copyright (C) 2008-2015 Antonio Diaz Diaz.
+ Copyright (C) 2008-2016 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to
copy, distribute and modify it.
@@ -72,15 +72,14 @@ corrupt byte near the beginning is a thing of the past.
The member trailer stores the 32-bit CRC of the original data, the
size of the original data and the size of the member. These values,
-together with the value remaining in the range decoder and the
-end-of-stream marker, provide a 4 factor integrity checking which
-guarantees that the decompressed version of the data is identical to
-the original. This guards against corruption of the compressed data,
-and against undetected bugs in lzip (hopefully very unlikely). The
-chances of data corruption going undetected are microscopic. Be aware,
-though, that the check occurs upon decompression, so it can only tell
-you that something is wrong. It can't help you recover the original
-uncompressed data.
+together with the end-of-stream marker, provide a 3 factor integrity
+checking which guarantees that the decompressed version of the data is
+identical to the original. This guards against corruption of the
+compressed data, and against undetected bugs in lzip (hopefully very
+unlikely). The chances of data corruption going undetected are
+microscopic. Be aware, though, that the check occurs upon
+decompression, so it can only tell you that something is wrong. It
+can't help you recover the original uncompressed data.
Lzip uses the same well-defined exit status values used by bzip2,
which makes it safer than compressors returning ambiguous warning
@@ -127,14 +126,14 @@ two or more compressed files. The result is the concatenation of the
corresponding uncompressed files. Integrity testing of concatenated
compressed files is also supported.
- Lzip can produce multi-member files and safely recover, with
+ Lzip can produce multimember files and safely recover, with
lziprecover, the undamaged members in case of file damage. Lzip can
also split the compressed output in volumes of a given size, even when
reading from standard input. This allows the direct creation of
multivolume compressed tar archives.
Lzip is able to compress and decompress streams of unlimited size by
-automatically creating multi-member output. The members so created are
+automatically creating multimember output. The members so created are
large, about 2 PiB each.

@@ -147,6 +146,10 @@ The format for running lzip is:
lzip [OPTIONS] [FILES]
+'-' used as a FILE argument means standard input. It can be mixed with
+other FILES and is read just once, the first time it appears in the
+command line.
+
Lzip supports the following options:
'-h'
@@ -172,15 +175,19 @@ The format for running lzip is:
'-c'
'--stdout'
- Compress or decompress to standard output. Needed when reading
- from a named pipe (fifo) or from a device. Use it to recover as
- much of the uncompressed data as possible when decompressing a
- corrupt file.
+ Compress or decompress to standard output; keep input files
+ unchanged. If compressing several files, each file is compressed
+ independently. This option is needed when reading from a named
+ pipe (fifo) or from a device. Use it also to recover as much of
+ the uncompressed data as possible when decompressing a corrupt
+ file.
'-d'
'--decompress'
- Decompress the specified file(s). If a file fails to decompress,
- lzip exits immediately without decompressing the rest of the files.
+ Decompress the specified file(s). If a file does not exist or
+ can't be opened, lzip continues decompressing the rest of the
+ files. If a file fails to decompress, lzip exits immediately
+ without decompressing the rest of the files.
'-f'
'--force'
@@ -218,12 +225,13 @@ The format for running lzip is:
'-s BYTES'
'--dictionary-size=BYTES'
- Set the dictionary size limit in bytes. Valid values range from 4
- KiB to 512 MiB. Lzip will use the smallest possible dictionary
- size for each file without exceeding this limit. Note that
- dictionary sizes are quantized. If the specified size does not
- match one of the valid sizes, it will be rounded upwards by adding
- up to (BYTES / 16) to it.
+ Set the dictionary size limit in bytes. Lzip will use the smallest
+ possible dictionary size for each file without exceeding this
+ limit. Valid values range from 4 KiB to 512 MiB. Values 12 to 29
+ are interpreted as powers of two, meaning 2^12 to 2^29 bytes. Note
+ that dictionary sizes are quantized. If the specified size does
+ not match one of the valid sizes, it will be rounded upwards by
+ adding up to (BYTES / 8) to it.
For maximum compression you should use a dictionary size limit as
large as possible, but keep in mind that the decompression memory
@@ -235,9 +243,9 @@ The format for running lzip is:
Split the compressed output into several volume files with names
'original_name00001.lz', 'original_name00002.lz', etc, and set the
volume size limit to BYTES. Each volume is a complete, maybe
- multi-member, lzip file. A small volume size may degrade
- compression ratio, so use it only when needed. Valid values range
- from 100 kB to 4 EiB.
+ multimember, lzip file. A small volume size may degrade compression
+ ratio, so use it only when needed. Valid values range from 100 kB
+ to 4 EiB.
'-t'
'--test'
@@ -259,14 +267,14 @@ The format for running lzip is:
'-0 .. -9'
Set the compression parameters (dictionary size and match length
- limit) as shown in the table below. Note that '-9' can be much
- slower than '-0'. These options have no effect when decompressing.
+ limit) as shown in the table below. The default compression level
+ is '-6'. Note that '-9' can be much slower than '-0'. These
+ options have no effect when decompressing.
The bidimensional parameter space of LZMA can't be mapped to a
linear scale optimal for all files. If your files are large, very
- repetitive, etc, you may need to use the '--match-length' and
- '--dictionary-size' options directly to achieve optimal
- performance.
+ repetitive, etc, you may need to use the '--dictionary-size' and
+ '--match-length' options directly to achieve optimal performance.
Level Dictionary size Match length limit
-0 64 KiB 16 bytes
@@ -334,21 +342,21 @@ file format.
Today those limitations have mostly disappeared, and the format of
gzip has proved to be unnecessarily complicated. It includes fields
-that were never used, others that have lost its usefulness, and finally
-others that have become too limited.
+that were never used, others that have lost their usefulness, and
+finally others that have become too limited.
Bzip2 was designed 5 years later, and its format is simpler than the
one of gzip.
Probably the worst defect of the gzip format from the point of view
of data safety is the variable size of its header. If the byte at
-offset 3 (flags) of a gzip member gets corrupted, it mat become very
+offset 3 (flags) of a gzip member gets corrupted, it may become very
difficult to recover the data, even if the compressed blocks are
intact, because it can't be known with certainty where the compressed
blocks begin.
By contrast, the header of a lzip member has a fixed length of 6. The
-lzma stream in a lzip member always starts at offset 6, making it
+LZMA stream in a lzip member always starts at offset 6, making it
trivial to recover the data even if the whole header becomes corrupt.
Bzip2 also provides a header of fixed length and marks the begin and
@@ -358,9 +366,24 @@ not store the size of each compressed block, as lzip does.
Lzip provides better data recovery capabilities than any other
gzip-like compressor because its format has been designed from the
-beginning to be simple and safe. It would be very difficult to write an
+beginning to be simple and safe. It also helps that the LZMA data
+stream as used by lzip is extraordinarily safe. It provides embedded
+error detection. Any distance larger than the dictionary size acts as a
+forbidden symbol, allowing the decompressor to detect the approximate
+position of errors, and leaving very little work for the check sequence
+(CRC and data sizes) in the detection of errors. Lzip is usually able
+to detect all posible bit-flips in the compressed data without
+resorting to the check sequence. It would be very difficult to write an
automatic recovery tool like lziprecover for the gzip format. And, as
-far as I know, it has never been writen.
+far as I know, it has never been written.
+
+ Lzip, like gzip and bzip2, uses a CRC32 to check the integrity of the
+decompressed data because it provides more accurate error detection than
+CRC64 up to a compressed size of about 16 GiB, a size larger than that
+of most files. In the case of lzip, the additional detection capability
+of the decompressor reduces the probability of undetected errors more
+than a million times, making CRC32 more accurate than CRC64 up to about
+20 PiB of compressed size.
The lzip format is designed for long-term archiving. Therefore it
excludes any unneeded features that may interfere with the future
@@ -409,7 +432,7 @@ extraction of the uncompressed data.
Bzip2 does not store the uncompressed size of the file.
The lzip format provides a 64-bit field for the uncompressed size.
- Additionaly, lzip produces multi-member output automatically when
+ Additionaly, lzip produces multimember output automatically when
the size is too large for a single member, allowing for an
unlimited uncompressed size.
@@ -428,8 +451,16 @@ extraction of the uncompressed data.
3.2 Quality of implementation
=============================
+'Accurate and robust error detection'
+ The lzip format provides 3 factor integrity checking and the
+ decompressors report mismatches in each factor separately. This
+ way if just one byte in one factor fails but the other two factors
+ match the data, it probably means that the data are intact and the
+ corruption just affects the mismatching factor (CRC or data size)
+ in the check sequence.
+
'Multiple implementations'
- Just like the lzip format provides 4 factor protection against
+ Just like the lzip format provides 3 factor protection against
undetected data corruption, the development methodology of the lzip
family of compressors provides 3 factor protection against
undetected programming errors.
@@ -443,6 +474,11 @@ extraction of the uncompressed data.
serious undiscovered errors. In fact, no errors have been
discovered in lzip since 2009.
+ Additionally, the three implementations have been extensively
+ tested with unzcrash, valgrind and 'american fuzzy lop' without
+ finding a single vulnerability or false negative. *Note Unzcrash:
+ (lziprecover)Unzcrash.
+
'Dictionary size'
Lzip automatically uses the smallest possible dictionary size for
each file. In addition to reducing the amount of memory required
@@ -485,7 +521,7 @@ additional information before, between, or after them.
Each member has the following structure:
+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-| ID string | VN | DS | Lzma stream | CRC32 | Data size | Member size |
+| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size |
+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
All multibyte values are stored in little endian order.
@@ -508,8 +544,8 @@ additional information before, between, or after them.
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB
Valid values for dictionary size range from 4 KiB to 512 MiB.
-'Lzma stream'
- The lzma stream, finished by an end of stream marker. Uses default
+'LZMA stream'
+ The LZMA stream, finished by an end of stream marker. Uses default
values for encoder properties. *Note Stream format::, for a
complete description.
@@ -523,7 +559,7 @@ additional information before, between, or after them.
Total size of the member, including header and trailer. This field
acts as a distributed index, allows the verification of stream
integrity, and facilitates safe recovery of undamaged members from
- multi-member files.
+ multimember files.

@@ -603,7 +639,9 @@ properties", to adjust it for some kinds of binary data. These
parameters are; 'literal_context_bits' (with a default value of 3),
'literal_pos_state_bits' (with a default value of 0), and
'pos_state_bits' (with a default value of 2). As a general purpose
-compressor, lzip only uses the default values for these parameters.
+compressor, lzip only uses the default values for these parameters. In
+particular 'literal_pos_state_bits' has been optimized away and does
+not even appear in the code.
Lzip also finishes the LZMA stream with an "End Of Stream" marker
(the distance-length pair 0xFFFFFFFFU, 2), which in conjunction with the
@@ -655,7 +693,7 @@ Bit sequence Name Description
used distance
- In the following tables, multi-bit sequences are coded in normal
+ In the following tables, multibit sequences are coded in normal
order, from MSB to LSB, except where noted otherwise.
Lengths (the 'len' in the table above) are coded as follows:
@@ -676,10 +714,10 @@ You may first send the position of the most significant bit that is set
to 1, which you may find by making a bit scan from the left (from the
MSB). A position of 0 means that the number is 0 (no bit is set), 1
means the LSB is the first bit set (the number is 1), and 32 means the
-MSB is set (the number is >= 0x80000000). Lets call this bit position a
-"slot". Then, if slot is > 1, you send the remaining slot - 1 bits.
-Lets call these bits "direct_bits" because they are coded directly by
-value instead of indirectly by position.
+MSB is set (i.e., the number is >= 0x80000000). Lets call this bit
+position a "slot". Then, if slot is > 1, you send the remaining slot -
+1 bits. Lets call these bits "direct_bits" because they are coded
+directly by value instead of indirectly by position.
The inconvenient of this simple method is that it needs 6 bits to
code the slot, but it just uses 33 of the 64 possible values, wasting
@@ -849,15 +887,15 @@ member. Such trailing data may be:
file.
* In very rare cases, trailing data could be the corrupt header of
- another member. In multi-member or concatenated files the
+ another member. In multimember or concatenated files the
probability of corruption happening in the magic bytes is 5 times
smaller than the probability of getting a false positive caused by
the corruption of the integrity information itself. Therefore it
can be considered to be below the noise level.
Trailing data can be safely ignored in most cases. In some cases,
-like user-added data, it is expected to be ignored. In those cases
-where a file containing trailing data must be rejected, the option
+like that of user-added data, it is expected to be ignored. In those
+cases where a file containing trailing data must be rejected, the option
'--trailing-error' can be used. *Note --trailing-error::.

@@ -869,7 +907,7 @@ File: lzip.info, Node: Examples, Next: Problems, Prev: Trailing data, Up: To
WARNING! Even if lzip is bug-free, other causes may result in a corrupt
compressed file (bugs in the system libraries, memory errors, etc).
Therefore, if the data you are going to compress are important, give the
-'--keep' option to lzip and do not remove the original file until you
+'--keep' option to lzip and don't remove the original file until you
verify the compressed file with a command like
'lzip -cd file.lz | cmp file -'.
@@ -880,8 +918,8 @@ and show the compression ratio.
lzip -v file
-Example 2: Like example 1 but the created 'file.lz' is multi-member
-with a member size of 1 MiB. The compression ratio is not shown.
+Example 2: Like example 1 but the created 'file.lz' is multimember with
+a member size of 1 MiB. The compression ratio is not shown.
lzip -b 1MiB file
@@ -898,10 +936,10 @@ show status.
lzip -tv file.lz
-Example 5: Compress a whole floppy in /dev/fd0 and send the output to
+Example 5: Compress a whole device in /dev/sdc and send the output to
'file.lz'.
- lzip -c /dev/fd0 > file.lz
+ lzip -c /dev/sdc > file.lz
Example 6: The right way of concatenating compressed files. *Note
@@ -937,7 +975,7 @@ Example 10: Extract a multivolume compressed tar archive.
Example 11: Create a multivolume compressed backup of a large database
-file with a volume size of 650 MB, where each volume is a multi-member
+file with a volume size of 650 MB, where each volume is a multimember
file with a member size of 32 MiB.
lzip -b 32MiB -S 650MB big_db
@@ -964,10 +1002,18 @@ Appendix A Reference source code
********************************
/* Lzd - Educational decompressor for the lzip format
- Copyright (C) 2013-2015 Antonio Diaz Diaz.
+ Copyright (C) 2013-2016 Antonio Diaz Diaz.
+
+ This program is free software. Redistribution and use in source and
+ binary forms, with or without modification, are permitted provided
+ that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
- This program is free software: you have unlimited permission
- to copy, distribute and modify it.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -1017,6 +1063,7 @@ enum {
min_dictionary_size = 1 << 12,
max_dictionary_size = 1 << 29,
literal_context_bits = 3,
+ literal_pos_state_bits = 0, // not used
pos_state_bits = 2,
pos_states = 1 << pos_state_bits,
pos_state_mask = pos_states - 1,
@@ -1203,6 +1250,7 @@ class LZ_decoder
unsigned pos; // current pos in buffer
unsigned stream_pos; // first byte not yet written to stdout
uint32_t crc_;
+ bool pos_wrapped;
void flush_data();
@@ -1227,7 +1275,8 @@ public:
buffer( new uint8_t[dictionary_size] ),
pos( 0 ),
stream_pos( 0 ),
- crc_( 0xFFFFFFFFU )
+ crc_( 0xFFFFFFFFU ),
+ pos_wrapped( false )
{ buffer[dictionary_size-1] = 0; } // prev_byte of first byte
~LZ_decoder() { delete[] buffer; }
@@ -1249,7 +1298,8 @@ void LZ_decoder::flush_data()
if( std::fwrite( buffer + stream_pos, 1, size, stdout ) != size )
{ std::fprintf( stderr, "Write error: %s\n", std::strerror( errno ) );
std::exit( 1 ); }
- if( pos >= dictionary_size ) { partial_data_pos += pos; pos = 0; }
+ if( pos >= dictionary_size )
+ { partial_data_pos += pos; pos = 0; pos_wrapped = true; }
stream_pos = pos;
}
}
@@ -1345,7 +1395,7 @@ bool LZ_decoder::decode_member() // Returns false if error
}
}
state.set_match();
- if( rep0 >= dictionary_size || rep0 >= data_position() )
+ if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
{ flush_data(); return false; }
}
for( int i = 0; i < len; ++i ) put_byte( peek( rep0 ) );
@@ -1367,7 +1417,7 @@ int main( const int argc, const char * const argv[] )
"It is not safe to use lzd for any real work.\n"
"\nUsage: %s < file.lz > file\n", argv[0] );
std::printf( "Lzd decompresses from standard input to standard output.\n"
- "\nCopyright (C) 2015 Antonio Diaz Diaz.\n"
+ "\nCopyright (C) 2016 Antonio Diaz Diaz.\n"
"This is free software: you are free to change and redistribute it.\n"
"There is NO WARRANTY, to the extent permitted by law.\n"
"Report bugs to lzip-bug@nongnu.org\n"
@@ -1445,19 +1495,19 @@ Concept index

Tag Table:
Node: Top208
-Node: Introduction1153
-Node: Invoking lzip6126
-Ref: --trailing-error6536
-Node: Quality assurance12171
-Node: File format18728
-Node: Algorithm21133
-Node: Stream format23959
-Node: Trailing data34502
-Node: Examples35873
-Ref: concat-example37048
-Node: Problems38049
-Node: Reference source code38579
-Node: Concept index52232
+Node: Introduction1145
+Node: Invoking lzip6071
+Ref: --trailing-error6635
+Node: Quality assurance12628
+Node: File format20782
+Node: Algorithm23186
+Node: Stream format26012
+Node: Trailing data36660
+Node: Examples38038
+Ref: concat-example39211
+Node: Problems40211
+Node: Reference source code40741
+Node: Concept index54957

End Tag Table
diff --git a/doc/lzip.texi b/doc/lzip.texi
index 845cb42..27feeff 100644
--- a/doc/lzip.texi
+++ b/doc/lzip.texi
@@ -6,8 +6,8 @@
@finalout
@c %**end of header
-@set UPDATED 13 August 2015
-@set VERSION 1.18-pre1
+@set UPDATED 14 May 2016
+@set VERSION 1.18
@dircategory Data Compression
@direntry
@@ -49,7 +49,7 @@ This manual is for Lzip (version @value{VERSION}, @value{UPDATED}).
@end menu
@sp 1
-Copyright @copyright{} 2008-2015 Antonio Diaz Diaz.
+Copyright @copyright{} 2008-2016 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission
to copy, distribute and modify it.
@@ -100,14 +100,14 @@ corrupt byte near the beginning is a thing of the past.
The member trailer stores the 32-bit CRC of the original data, the size
of the original data and the size of the member. These values, together
-with the value remaining in the range decoder and the end-of-stream
-marker, provide a 4 factor integrity checking which guarantees that the
-decompressed version of the data is identical to the original. This
-guards against corruption of the compressed data, and against undetected
-bugs in lzip (hopefully very unlikely). The chances of data corruption
-going undetected are microscopic. Be aware, though, that the check
-occurs upon decompression, so it can only tell you that something is
-wrong. It can't help you recover the original uncompressed data.
+with the end-of-stream marker, provide a 3 factor integrity checking
+which guarantees that the decompressed version of the data is identical
+to the original. This guards against corruption of the compressed data,
+and against undetected bugs in lzip (hopefully very unlikely). The
+chances of data corruption going undetected are microscopic. Be aware,
+though, that the check occurs upon decompression, so it can only tell
+you that something is wrong. It can't help you recover the original
+uncompressed data.
Lzip uses the same well-defined exit status values used by bzip2, which
makes it safer than compressors returning ambiguous warning values (like
@@ -156,14 +156,14 @@ or more compressed files. The result is the concatenation of the
corresponding uncompressed files. Integrity testing of concatenated
compressed files is also supported.
-Lzip can produce multi-member files and safely recover, with
-lziprecover, the undamaged members in case of file damage. Lzip can
-also split the compressed output in volumes of a given size, even when
-reading from standard input. This allows the direct creation of
-multivolume compressed tar archives.
+Lzip can produce multimember files and safely recover, with lziprecover,
+the undamaged members in case of file damage. Lzip can also split the
+compressed output in volumes of a given size, even when reading from
+standard input. This allows the direct creation of multivolume
+compressed tar archives.
Lzip is able to compress and decompress streams of unlimited size by
-automatically creating multi-member output. The members so created are
+automatically creating multimember output. The members so created are
large, about 2 PiB each.
@@ -180,6 +180,11 @@ The format for running lzip is:
lzip [@var{options}] [@var{files}]
@end example
+@noindent
+@samp{-} used as a @var{file} argument means standard input. It can be
+mixed with other @var{files} and is read just once, the first time it
+appears in the command line.
+
Lzip supports the following options:
@table @code
@@ -206,14 +211,18 @@ range from 100 kB to 2 PiB. Defaults to 2 PiB.
@item -c
@itemx --stdout
-Compress or decompress to standard output. Needed when reading from a
-named pipe (fifo) or from a device. Use it to recover as much of the
-uncompressed data as possible when decompressing a corrupt file.
+Compress or decompress to standard output; keep input files unchanged.
+If compressing several files, each file is compressed independently.
+This option is needed when reading from a named pipe (fifo) or from a
+device. Use it also to recover as much of the uncompressed data as
+possible when decompressing a corrupt file.
@item -d
@itemx --decompress
-Decompress the specified file(s). If a file fails to decompress, lzip
-exits immediately without decompressing the rest of the files.
+Decompress the specified file(s). If a file does not exist or can't be
+opened, lzip continues decompressing the rest of the files. If a file
+fails to decompress, lzip exits immediately without decompressing the
+rest of the files.
@item -f
@itemx --force
@@ -249,11 +258,13 @@ Quiet operation. Suppress all messages.
@item -s @var{bytes}
@itemx --dictionary-size=@var{bytes}
-Set the dictionary size limit in bytes. Valid values range from 4 KiB to
-512 MiB. Lzip will use the smallest possible dictionary size for each
-file without exceeding this limit. Note that dictionary sizes are
-quantized. If the specified size does not match one of the valid sizes,
-it will be rounded upwards by adding up to (@var{bytes} / 16) to it.
+Set the dictionary size limit in bytes. Lzip will use the smallest
+possible dictionary size for each file without exceeding this limit.
+Valid values range from 4 KiB to 512 MiB. Values 12 to 29 are
+interpreted as powers of two, meaning 2^12 to 2^29 bytes. Note that
+dictionary sizes are quantized. If the specified size does not match one
+of the valid sizes, it will be rounded upwards by adding up to
+@w{(@var{bytes} / 8)} to it.
For maximum compression you should use a dictionary size limit as large
as possible, but keep in mind that the decompression memory requirement
@@ -264,7 +275,7 @@ is affected at compression time by the choice of dictionary size limit.
Split the compressed output into several volume files with names
@samp{original_name00001.lz}, @samp{original_name00002.lz}, etc, and set
the volume size limit to @var{bytes}. Each volume is a complete, maybe
-multi-member, lzip file. A small volume size may degrade compression
+multimember, lzip file. A small volume size may degrade compression
ratio, so use it only when needed. Valid values range from 100 kB to 4
EiB.
@@ -287,14 +298,14 @@ trailing data (if any).
@item -0 .. -9
Set the compression parameters (dictionary size and match length limit)
-as shown in the table below. Note that @samp{-9} can be much slower than
-@samp{-0}. These options have no effect when decompressing.
+as shown in the table below. The default compression level is @samp{-6}.
+Note that @samp{-9} can be much slower than @samp{-0}. These options
+have no effect when decompressing.
The bidimensional parameter space of LZMA can't be mapped to a linear
scale optimal for all files. If your files are large, very repetitive,
-etc, you may need to use the @samp{--match-length} and
-@samp{--dictionary-size} options directly to achieve optimal
-performance.
+etc, you may need to use the @samp{--dictionary-size} and
+@samp{--match-length} options directly to achieve optimal performance.
@multitable {Level} {Dictionary size} {Match length limit}
@item Level @tab Dictionary size @tab Match length limit
@@ -365,7 +376,7 @@ file format.
Today those limitations have mostly disappeared, and the format of gzip
has proved to be unnecessarily complicated. It includes fields that were
-never used, others that have lost its usefulness, and finally others
+never used, others that have lost their usefulness, and finally others
that have become too limited.
Bzip2 was designed 5 years later, and its format is simpler than the one
@@ -373,12 +384,12 @@ of gzip.
Probably the worst defect of the gzip format from the point of view of
data safety is the variable size of its header. If the byte at offset 3
-(flags) of a gzip member gets corrupted, it mat become very difficult to
+(flags) of a gzip member gets corrupted, it may become very difficult to
recover the data, even if the compressed blocks are intact, because it
can't be known with certainty where the compressed blocks begin.
By contrast, the header of a lzip member has a fixed length of 6. The
-lzma stream in a lzip member always starts at offset 6, making it
+LZMA stream in a lzip member always starts at offset 6, making it
trivial to recover the data even if the whole header becomes corrupt.
Bzip2 also provides a header of fixed length and marks the begin and end
@@ -388,9 +399,24 @@ not store the size of each compressed block, as lzip does.
Lzip provides better data recovery capabilities than any other gzip-like
compressor because its format has been designed from the beginning to be
-simple and safe. It would be very difficult to write an automatic
-recovery tool like lziprecover for the gzip format. And, as far as I
-know, it has never been writen.
+simple and safe. It also helps that the LZMA data stream as used by lzip
+is extraordinarily safe. It provides embedded error detection. Any
+distance larger than the dictionary size acts as a forbidden symbol,
+allowing the decompressor to detect the approximate position of errors,
+and leaving very little work for the check sequence (CRC and data sizes)
+in the detection of errors. Lzip is usually able to detect all posible
+bit-flips in the compressed data without resorting to the check
+sequence. It would be very difficult to write an automatic recovery tool
+like lziprecover for the gzip format. And, as far as I know, it has
+never been written.
+
+Lzip, like gzip and bzip2, uses a CRC32 to check the integrity of the
+decompressed data because it provides more accurate error detection than
+CRC64 up to a compressed size of about 16 GiB, a size larger than that
+of most files. In the case of lzip, the additional detection capability
+of the decompressor reduces the probability of undetected errors more
+than a million times, making CRC32 more accurate than CRC64 up to about
+20 PiB of compressed size.
The lzip format is designed for long-term archiving. Therefore it
excludes any unneeded features that may interfere with the future
@@ -441,7 +467,7 @@ size. The size of any file larger than 4 GiB gets truncated.
Bzip2 does not store the uncompressed size of the file.
The lzip format provides a 64-bit field for the uncompressed size.
-Additionaly, lzip produces multi-member output automatically when the
+Additionaly, lzip produces multimember output automatically when the
size is too large for a single member, allowing for an unlimited
uncompressed size.
@@ -462,9 +488,17 @@ uncompressed size.
@section Quality of implementation
@table @samp
+@item Accurate and robust error detection
+
+The lzip format provides 3 factor integrity checking and the
+decompressors report mismatches in each factor separately. This way if
+just one byte in one factor fails but the other two factors match the
+data, it probably means that the data are intact and the corruption just
+affects the mismatching factor (CRC or data size) in the check sequence.
+
@item Multiple implementations
-Just like the lzip format provides 4 factor protection against
+Just like the lzip format provides 3 factor protection against
undetected data corruption, the development methodology of the lzip
family of compressors provides 3 factor protection against undetected
programming errors.
@@ -477,6 +511,15 @@ guarantees that all three implement the same algorithm, and makes it
unlikely that any of them may contain serious undiscovered errors. In
fact, no errors have been discovered in lzip since 2009.
+Additionally, the three implementations have been extensively tested
+with
+@uref{http://www.nongnu.org/lzip/manual/lziprecover_manual.html#Unzcrash,,unzcrash},
+valgrind and @samp{american fuzzy lop} without finding a single
+vulnerability or false negative.
+@ifnothtml
+@xref{Unzcrash,,,lziprecover}.
+@end ifnothtml
+
@item Dictionary size
Lzip automatically uses the smallest possible dictionary size for each
@@ -525,7 +568,7 @@ additional information before, between, or after them.
Each member has the following structure:
@verbatim
+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-| ID string | VN | DS | Lzma stream | CRC32 | Data size | Member size |
+| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size |
+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@end verbatim
@@ -549,8 +592,8 @@ from the base size to obtain the dictionary size.@*
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
Valid values for dictionary size range from 4 KiB to 512 MiB.
-@item Lzma stream
-The lzma stream, finished by an end of stream marker. Uses default
+@item LZMA stream
+The LZMA stream, finished by an end of stream marker. Uses default
values for encoder properties. @xref{Stream format}, for a complete
description.
@@ -563,7 +606,7 @@ Size of the uncompressed original data.
@item Member size (8 bytes)
Total size of the member, including header and trailer. This field acts
as a distributed index, allows the verification of stream integrity, and
-facilitates safe recovery of undamaged members from multi-member files.
+facilitates safe recovery of undamaged members from multimember files.
@end table
@@ -643,7 +686,9 @@ properties", to adjust it for some kinds of binary data. These
parameters are; @samp{literal_context_bits} (with a default value of 3),
@samp{literal_pos_state_bits} (with a default value of 0), and
@samp{pos_state_bits} (with a default value of 2). As a general purpose
-compressor, lzip only uses the default values for these parameters.
+compressor, lzip only uses the default values for these parameters. In
+particular @samp{literal_pos_state_bits} has been optimized away and
+does not even appear in the code.
Lzip also finishes the LZMA stream with an "End Of Stream" marker (the
distance-length pair 0xFFFFFFFFU, 2), which in conjunction with the
@@ -695,7 +740,7 @@ latest used distance
@end multitable
@sp 1
-In the following tables, multi-bit sequences are coded in normal order,
+In the following tables, multibit sequences are coded in normal order,
from MSB to LSB, except where noted otherwise.
Lengths (the @samp{len} in the table above) are coded as follows:
@@ -717,9 +762,9 @@ first send the position of the most significant bit that is set to 1,
which you may find by making a bit scan from the left (from the MSB). A
position of 0 means that the number is 0 (no bit is set), 1 means the
LSB is the first bit set (the number is 1), and 32 means the MSB is set
-(the number is >= 0x80000000). Lets call this bit position a "slot".
-Then, if slot is > 1, you send the remaining slot - 1 bits. Lets call
-these bits "direct_bits" because they are coded directly by value
+(i.e., the number is >= 0x80000000). Lets call this bit position a
+"slot". Then, if slot is > 1, you send the remaining slot - 1 bits. Lets
+call these bits "direct_bits" because they are coded directly by value
instead of indirectly by position.
The inconvenient of this simple method is that it needs 6 bits to code
@@ -902,7 +947,7 @@ hash value (for a chosen hash) coincide with those of another file.
@item
In very rare cases, trailing data could be the corrupt header of another
-member. In multi-member or concatenated files the probability of
+member. In multimember or concatenated files the probability of
corruption happening in the magic bytes is 5 times smaller than the
probability of getting a false positive caused by the corruption of the
integrity information itself. Therefore it can be considered to be below
@@ -910,8 +955,8 @@ the noise level.
@end itemize
Trailing data can be safely ignored in most cases. In some cases, like
-user-added data, it is expected to be ignored. In those cases where a
-file containing trailing data must be rejected, the option
+that of user-added data, it is expected to be ignored. In those cases
+where a file containing trailing data must be rejected, the option
@samp{--trailing-error} can be used. @xref{--trailing-error}.
@@ -922,7 +967,7 @@ file containing trailing data must be rejected, the option
WARNING! Even if lzip is bug-free, other causes may result in a corrupt
compressed file (bugs in the system libraries, memory errors, etc).
Therefore, if the data you are going to compress are important, give the
-@samp{--keep} option to lzip and do not remove the original file until
+@samp{--keep} option to lzip and don't remove the original file until
you verify the compressed file with a command like
@w{@samp{lzip -cd file.lz | cmp file -}}.
@@ -937,7 +982,7 @@ lzip -v file
@sp 1
@noindent
-Example 2: Like example 1 but the created @samp{file.lz} is multi-member
+Example 2: Like example 1 but the created @samp{file.lz} is multimember
with a member size of 1 MiB. The compression ratio is not shown.
@example
@@ -965,11 +1010,11 @@ lzip -tv file.lz
@sp 1
@noindent
-Example 5: Compress a whole floppy in /dev/fd0 and send the output to
+Example 5: Compress a whole device in /dev/sdc and send the output to
@samp{file.lz}.
@example
-lzip -c /dev/fd0 > file.lz
+lzip -c /dev/sdc > file.lz
@end example
@sp 1
@@ -1023,7 +1068,7 @@ lzip -cd volume_name*.lz | tar -xf -
@sp 1
@noindent
Example 11: Create a multivolume compressed backup of a large database
-file with a volume size of 650 MB, where each volume is a multi-member
+file with a volume size of 650 MB, where each volume is a multimember
file with a member size of 32 MiB.
@example
@@ -1052,10 +1097,18 @@ find by running @w{@code{lzip --version}}.
@verbatim
/* Lzd - Educational decompressor for the lzip format
- Copyright (C) 2013-2015 Antonio Diaz Diaz.
+ Copyright (C) 2013-2016 Antonio Diaz Diaz.
+
+ This program is free software. Redistribution and use in source and
+ binary forms, with or without modification, are permitted provided
+ that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
- This program is free software: you have unlimited permission
- to copy, distribute and modify it.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -1105,6 +1158,7 @@ enum {
min_dictionary_size = 1 << 12,
max_dictionary_size = 1 << 29,
literal_context_bits = 3,
+ literal_pos_state_bits = 0, // not used
pos_state_bits = 2,
pos_states = 1 << pos_state_bits,
pos_state_mask = pos_states - 1,
@@ -1291,6 +1345,7 @@ class LZ_decoder
unsigned pos; // current pos in buffer
unsigned stream_pos; // first byte not yet written to stdout
uint32_t crc_;
+ bool pos_wrapped;
void flush_data();
@@ -1315,7 +1370,8 @@ public:
buffer( new uint8_t[dictionary_size] ),
pos( 0 ),
stream_pos( 0 ),
- crc_( 0xFFFFFFFFU )
+ crc_( 0xFFFFFFFFU ),
+ pos_wrapped( false )
{ buffer[dictionary_size-1] = 0; } // prev_byte of first byte
~LZ_decoder() { delete[] buffer; }
@@ -1337,7 +1393,8 @@ void LZ_decoder::flush_data()
if( std::fwrite( buffer + stream_pos, 1, size, stdout ) != size )
{ std::fprintf( stderr, "Write error: %s\n", std::strerror( errno ) );
std::exit( 1 ); }
- if( pos >= dictionary_size ) { partial_data_pos += pos; pos = 0; }
+ if( pos >= dictionary_size )
+ { partial_data_pos += pos; pos = 0; pos_wrapped = true; }
stream_pos = pos;
}
}
@@ -1433,7 +1490,7 @@ bool LZ_decoder::decode_member() // Returns false if error
}
}
state.set_match();
- if( rep0 >= dictionary_size || rep0 >= data_position() )
+ if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
{ flush_data(); return false; }
}
for( int i = 0; i < len; ++i ) put_byte( peek( rep0 ) );
@@ -1455,7 +1512,7 @@ int main( const int argc, const char * const argv[] )
"It is not safe to use lzd for any real work.\n"
"\nUsage: %s < file.lz > file\n", argv[0] );
std::printf( "Lzd decompresses from standard input to standard output.\n"
- "\nCopyright (C) 2015 Antonio Diaz Diaz.\n"
+ "\nCopyright (C) 2016 Antonio Diaz Diaz.\n"
"This is free software: you are free to change and redistribute it.\n"
"There is NO WARRANTY, to the extent permitted by law.\n"
"Report bugs to lzip-bug@nongnu.org\n"
diff --git a/encoder.cc b/encoder.cc
index 3b24c44..282a9b2 100644
--- a/encoder.cc
+++ b/encoder.cc
@@ -1,5 +1,5 @@
/* Lzip - LZMA lossless data compressor
- Copyright (C) 2008-2015 Antonio Diaz Diaz.
+ Copyright (C) 2008-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -30,6 +30,9 @@
#include "encoder.h"
+const CRC32 crc32;
+
+
int LZ_encoder::get_match_pairs( Pair * pairs )
{
int len_limit = match_len_limit;
@@ -485,7 +488,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
bool LZ_encoder::encode_member( const unsigned long long member_size )
{
const unsigned long long member_size_limit =
- member_size - File_trailer::size() - max_marker_size;
+ member_size - File_trailer::size - max_marker_size;
const bool best = ( match_len_limit > 12 );
const int dis_price_count = best ? 1 : 512;
const int align_price_count = best ? 1 : dis_align_size;
diff --git a/encoder.h b/encoder.h
index 8bb7258..351c4cc 100644
--- a/encoder.h
+++ b/encoder.h
@@ -1,5 +1,5 @@
/* Lzip - LZMA lossless data compressor
- Copyright (C) 2008-2015 Antonio Diaz Diaz.
+ Copyright (C) 2008-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/encoder_base.cc b/encoder_base.cc
index a8bbbd7..cfc058e 100644
--- a/encoder_base.cc
+++ b/encoder_base.cc
@@ -1,5 +1,5 @@
/* Lzip - LZMA lossless data compressor
- Copyright (C) 2008-2015 Antonio Diaz Diaz.
+ Copyright (C) 2008-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -153,8 +153,8 @@ void LZ_encoder_base::full_flush( const State state )
File_trailer trailer;
trailer.data_crc( crc() );
trailer.data_size( data_position() );
- trailer.member_size( renc.member_position() + File_trailer::size() );
- for( int i = 0; i < File_trailer::size(); ++i )
+ trailer.member_size( renc.member_position() + File_trailer::size );
+ for( int i = 0; i < File_trailer::size; ++i )
renc.put_byte( trailer.data[i] );
renc.flush_data();
}
diff --git a/encoder_base.h b/encoder_base.h
index b032fae..9ce622c 100644
--- a/encoder_base.h
+++ b/encoder_base.h
@@ -1,5 +1,5 @@
/* Lzip - LZMA lossless data compressor
- Copyright (C) 2008-2015 Antonio Diaz Diaz.
+ Copyright (C) 2008-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/fast_encoder.cc b/fast_encoder.cc
index 90361e9..939259f 100644
--- a/fast_encoder.cc
+++ b/fast_encoder.cc
@@ -1,5 +1,5 @@
/* Lzip - LZMA lossless data compressor
- Copyright (C) 2008-2015 Antonio Diaz Diaz.
+ Copyright (C) 2008-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -81,7 +81,7 @@ int FLZ_encoder::longest_match_len( int * const distance )
bool FLZ_encoder::encode_member( const unsigned long long member_size )
{
const unsigned long long member_size_limit =
- member_size - File_trailer::size() - max_marker_size;
+ member_size - File_trailer::size - max_marker_size;
int rep = 0;
int reps[num_rep_distances];
State state;
diff --git a/fast_encoder.h b/fast_encoder.h
index 36177df..2e0bd50 100644
--- a/fast_encoder.h
+++ b/fast_encoder.h
@@ -1,5 +1,5 @@
/* Lzip - LZMA lossless data compressor
- Copyright (C) 2008-2015 Antonio Diaz Diaz.
+ Copyright (C) 2008-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/lzip.h b/lzip.h
index 291e3e3..e7afe3c 100644
--- a/lzip.h
+++ b/lzip.h
@@ -1,5 +1,5 @@
/* Lzip - LZMA lossless data compressor
- Copyright (C) 2008-2015 Antonio Diaz Diaz.
+ Copyright (C) 2008-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -44,6 +44,7 @@ enum {
max_dictionary_bits = 29,
max_dictionary_size = 1 << max_dictionary_bits,
literal_context_bits = 3,
+ literal_pos_state_bits = 0, // not used
pos_state_bits = 2,
pos_states = 1 << pos_state_bits,
pos_state_mask = pos_states - 1,
@@ -175,6 +176,11 @@ public:
extern const CRC32 crc32;
+inline bool isvalid_ds( const unsigned dictionary_size )
+ { return ( dictionary_size >= min_dictionary_size &&
+ dictionary_size <= max_dictionary_size ); }
+
+
inline int real_bits( unsigned value )
{
int bits = 0;
@@ -195,9 +201,15 @@ struct File_header
void set_magic() { std::memcpy( data, magic_string, 4 ); data[4] = 1; }
bool verify_magic() const
{ return ( std::memcmp( data, magic_string, 4 ) == 0 ); }
+ bool verify_prefix( const int size ) const // detect truncated header
+ {
+ for( int i = 0; i < size && i < 4; ++i )
+ if( data[i] != magic_string[i] ) return false;
+ return ( size > 0 );
+ }
uint8_t version() const { return data[4]; }
- bool verify_version() const { return ( data[4] <= 1 ); }
+ bool verify_version() const { return ( data[4] == 1 ); }
unsigned dictionary_size() const
{
@@ -209,20 +221,17 @@ struct File_header
bool dictionary_size( const unsigned sz )
{
- if( sz >= min_dictionary_size && sz <= max_dictionary_size )
+ if( !isvalid_ds( sz ) ) return false;
+ data[5] = real_bits( sz - 1 );
+ if( sz > min_dictionary_size )
{
- data[5] = real_bits( sz - 1 );
- if( sz > min_dictionary_size )
- {
- const unsigned base_size = 1 << data[5];
- const unsigned fraction = base_size / 16;
- for( int i = 7; i >= 1; --i )
- if( base_size - ( i * fraction ) >= sz )
- { data[5] |= ( i << 5 ); break; }
- }
- return true;
+ const unsigned base_size = 1 << data[5];
+ const unsigned fraction = base_size / 16;
+ for( int i = 7; i >= 1; --i )
+ if( base_size - ( i * fraction ) >= sz )
+ { data[5] |= ( i << 5 ); break; }
}
- return false;
+ return true;
}
};
@@ -233,8 +242,7 @@ struct File_trailer
// 4-11 size of the uncompressed data
// 12-19 member size including header and trailer
- static int size( const int version = 1 )
- { return ( ( version >= 1 ) ? 20 : 12 ); }
+ enum { size = 20 };
unsigned data_crc() const
{
diff --git a/main.cc b/main.cc
index 97e27b6..8b2eedd 100644
--- a/main.cc
+++ b/main.cc
@@ -1,5 +1,5 @@
/* Lzip - LZMA lossless data compressor
- Copyright (C) 2008-2015 Antonio Diaz Diaz.
+ Copyright (C) 2008-2016 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -69,12 +69,13 @@
#error "Environments where CHAR_BIT != 8 are not supported."
#endif
+int verbosity = 0;
namespace {
const char * const Program_name = "Lzip";
const char * const program_name = "lzip";
-const char * const program_year = "2015";
+const char * const program_year = "2016";
const char * invocation_name = 0;
struct { const char * from; const char * to; } const known_extensions[] = {
@@ -92,9 +93,6 @@ enum Mode { m_compress, m_decompress, m_test };
std::string output_filename;
int outfd = -1;
-const mode_t usr_rw = S_IRUSR | S_IWUSR;
-const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
-mode_t outfd_mode = usr_rw;
bool delete_output_on_interrupt = false;
@@ -107,13 +105,13 @@ void show_help()
" -V, --version output version information and exit\n"
" -a, --trailing-error exit with error status if trailing data\n"
" -b, --member-size=<bytes> set member size limit in bytes\n"
- " -c, --stdout send output to standard output\n"
+ " -c, --stdout write to standard output, keep input files\n"
" -d, --decompress decompress\n"
" -f, --force overwrite existing output files\n"
" -F, --recompress force re-compression of compressed files\n"
" -k, --keep keep (don't delete) input files\n"
" -m, --match-length=<bytes> set match length limit in bytes [36]\n"
- " -o, --output=<file> if reading stdin, place the output into <file>\n"
+ " -o, --output=<file> if reading standard input, write to <file>\n"
" -q, --quiet suppress all messages\n"
" -s, --dictionary-size=<bytes> set dictionary size limit in bytes [8 MiB]\n"
" -S, --volume-size=<bytes> set volume size limit in bytes\n"
@@ -122,13 +120,15 @@ void show_help()
" -0 .. -9 set compression level [default 6]\n"
" --fast alias for -0\n"
" --best alias for -9\n"
- "If no file names are given, lzip compresses or decompresses\n"
- "from standard input to standard output.\n"
+ "If no file names are given, or if a file is '-', lzip compresses or\n"
+ "decompresses from standard input to standard output.\n"
"Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
"Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
- "The bidimensional parameter space of LZMA can't be mapped to a linear\n"
+ "Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12\n"
+ "to 2^29 bytes.\n"
+ "\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n"
"scale optimal for all files. If your files are large, very repetitive,\n"
- "etc, you may need to use the --match-length and --dictionary-size\n"
+ "etc, you may need to use the --dictionary-size and --match-length\n"
"options directly to achieve optimal performance.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
@@ -149,7 +149,7 @@ void show_version()
}
-void show_header( const File_header & header )
+void show_header( const unsigned dictionary_size )
{
if( verbosity >= 3 )
{
@@ -158,14 +158,12 @@ void show_header( const File_header & header )
enum { factor = 1024 };
const char * p = "";
const char * np = " ";
- unsigned num = header.dictionary_size();
+ unsigned num = dictionary_size;
bool exact = ( num % factor == 0 );
for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
{ num /= factor; if( num % factor != 0 ) exact = false;
p = prefix[i]; np = ""; }
- if( verbosity >= 4 && header.version() != 1 )
- std::fprintf( stderr, "version %d, ", header.version() );
std::fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p );
}
}
@@ -187,11 +185,9 @@ unsigned long long getnum( const char * const ptr,
if( !errno && tail[0] )
{
const int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
- int exponent = 0;
- bool bad_multiplier = false;
+ int exponent = 0; // 0 = bad multiplier
switch( tail[0] )
{
- case ' ': break;
case 'Y': exponent = 8; break;
case 'Z': exponent = 7; break;
case 'E': exponent = 6; break;
@@ -199,13 +195,10 @@ unsigned long long getnum( const char * const ptr,
case 'T': exponent = 4; break;
case 'G': exponent = 3; break;
case 'M': exponent = 2; break;
- case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true;
- break;
- case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true;
- break;
- default : bad_multiplier = true;
+ case 'K': if( factor == 1024 ) exponent = 1; break;
+ case 'k': if( factor == 1000 ) exponent = 1; break;
}
- if( bad_multiplier )
+ if( exponent <= 0 )
{
show_error( "Bad multiplier in numerical argument.", 0, true );
std::exit( 1 );
@@ -321,13 +314,17 @@ void set_d_outname( const std::string & name, const int i )
}
-bool open_outstream( const bool force )
+bool open_outstream( const bool force, const bool from_stdin )
{
+ const mode_t usr_rw = S_IRUSR | S_IWUSR;
+ const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
+ const mode_t outfd_mode = from_stdin ? all_rw : usr_rw;
int flags = O_CREAT | O_WRONLY | O_BINARY;
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
outfd = open( output_filename.c_str(), flags, outfd_mode );
- if( outfd < 0 && verbosity >= 0 )
+ if( outfd >= 0 ) delete_output_on_interrupt = true;
+ else if( verbosity >= 0 )
{
if( errno == EEXIST )
std::fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
@@ -388,7 +385,11 @@ void close_and_set_permissions( const struct stat * const in_statsp )
fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 )
warning = true;
}
- if( close( outfd ) != 0 ) cleanup_and_fail( 1 );
+ if( close( outfd ) != 0 )
+ {
+ show_error( "Error closing output file", errno );
+ cleanup_and_fail( 1 );
+ }
outfd = -1;
delete_output_on_interrupt = false;
if( in_statsp )
@@ -463,8 +464,7 @@ int compress( const unsigned long long member_size,
close_and_set_permissions( in_statsp );
if( !next_filename() )
{ pp( "Too many volume files." ); retval = 1; break; }
- if( !open_outstream( true ) ) { retval = 1; break; }
- delete_output_on_interrupt = true;
+ if( !open_outstream( true, !in_statsp ) ) { retval = 1; break; }
}
}
}
@@ -505,9 +505,9 @@ unsigned char xdigit( const int value )
bool show_trailing_data( const uint8_t * const data, const int size,
const Pretty_print & pp, const bool all,
- const bool ignore_garbage )
+ const bool ignore_trailing )
{
- if( verbosity >= 4 || !ignore_garbage )
+ if( verbosity >= 4 || !ignore_trailing )
{
std::string msg;
if( !all ) msg = "first bytes of ";
@@ -531,14 +531,14 @@ bool show_trailing_data( const uint8_t * const data, const int size,
}
}
pp( msg.c_str() );
- if( !ignore_garbage ) show_error( "Trailing data not allowed." );
+ if( !ignore_trailing ) show_error( "Trailing data not allowed." );
}
- return ignore_garbage;
+ return ignore_trailing;
}
int decompress( const int infd, const Pretty_print & pp,
- const bool ignore_garbage, const bool testing )
+ const bool ignore_trailing, const bool testing )
{
int retval = 0;
@@ -552,10 +552,10 @@ int decompress( const int infd, const Pretty_print & pp,
const int size = rdec.read_data( header.data, File_header::size );
if( rdec.finished() ) // End Of File
{
- if( first_member )
+ if( first_member || header.verify_prefix( size ) )
{ pp( "File ends unexpectedly at member header." ); retval = 2; }
else if( size > 0 && !show_trailing_data( header.data, size, pp,
- true, ignore_garbage ) )
+ true, ignore_trailing ) )
retval = 2;
break;
}
@@ -563,7 +563,7 @@ int decompress( const int infd, const Pretty_print & pp,
{
if( first_member )
{ pp( "Bad magic number (file not in lzip format)." ); retval = 2; }
- else if( !show_trailing_data( header.data, size, pp, false, ignore_garbage ) )
+ else if( !show_trailing_data( header.data, size, pp, false, ignore_trailing ) )
retval = 2;
break;
}
@@ -576,14 +576,13 @@ int decompress( const int infd, const Pretty_print & pp,
retval = 2; break;
}
const unsigned dictionary_size = header.dictionary_size();
- if( dictionary_size < min_dictionary_size ||
- dictionary_size > max_dictionary_size )
+ if( !isvalid_ds( dictionary_size ) )
{ pp( "Invalid dictionary size in member header." ); retval = 2; break; }
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
- { pp(); show_header( header ); }
+ { pp(); show_header( dictionary_size ); }
- LZ_decoder decoder( header, rdec, outfd );
+ LZ_decoder decoder( rdec, dictionary_size, outfd );
const int result = decoder.decode_member( pp );
partial_file_pos += rdec.member_position();
if( result != 0 )
@@ -626,24 +625,18 @@ void set_signals()
} // end namespace
-int verbosity = 0;
-
-
void show_error( const char * const msg, const int errcode, const bool help )
{
- if( verbosity >= 0 )
+ if( verbosity < 0 ) return;
+ if( msg && msg[0] )
{
- if( msg && msg[0] )
- {
- std::fprintf( stderr, "%s: %s", program_name, msg );
- if( errcode > 0 )
- std::fprintf( stderr, ": %s", std::strerror( errcode ) );
- std::fputc( '\n', stderr );
- }
- if( help )
- std::fprintf( stderr, "Try '%s --help' for more information.\n",
- invocation_name );
+ std::fprintf( stderr, "%s: %s", program_name, msg );
+ if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) );
+ std::fputc( '\n', stderr );
}
+ if( help )
+ std::fprintf( stderr, "Try '%s --help' for more information.\n",
+ invocation_name );
}
@@ -665,18 +658,16 @@ void show_progress( const unsigned long long partial_size,
static const Matchfinder_base * mb = 0;
static const Pretty_print * pp = 0;
- if( verbosity >= 2 )
+ if( verbosity < 2 ) return;
+ if( m ) // initialize static vars
+ { csize = cfile_size; psize = partial_size; mb = m; pp = p; }
+ if( mb && pp )
{
- if( m ) // initialize static vars
- { csize = cfile_size; psize = partial_size; mb = m; pp = p; }
- if( mb && pp )
- {
- const unsigned long long pos = psize + mb->data_position();
- if( csize > 0 )
- std::fprintf( stderr, "%4llu%%", pos / csize );
- std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
- pp->reset(); (*pp)(); // restore cursor position
- }
+ const unsigned long long pos = psize + mb->data_position();
+ if( csize > 0 )
+ std::fprintf( stderr, "%4llu%%", pos / csize );
+ std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
+ pp->reset(); (*pp)(); // restore cursor position
}
}
@@ -708,7 +699,7 @@ int main( const int argc, const char * const argv[] )
int infd = -1;
Mode program_mode = m_compress;
bool force = false;
- bool ignore_garbage = true;
+ bool ignore_trailing = true;
bool keep_input_files = false;
bool recompress = false;
bool to_stdout = false;
@@ -744,7 +735,7 @@ int main( const int argc, const char * const argv[] )
{ 't', "test", Arg_parser::no },
{ 'v', "verbose", Arg_parser::no },
{ 'V', "version", Arg_parser::no },
- { 0 , 0, Arg_parser::no } };
+ { 0 , 0, Arg_parser::no } };
const Arg_parser parser( argc, argv, options );
if( parser.error().size() ) // bad option
@@ -756,15 +747,15 @@ int main( const int argc, const char * const argv[] )
const int code = parser.code( argind );
if( !code ) break; // no more options
const std::string & arg = parser.argument( argind );
+ const char * const ptr = arg.c_str();
switch( code )
{
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
zero = ( code == '0' );
encoder_options = option_mapping[code-'0']; break;
- case 'a': ignore_garbage = false; break;
- case 'b': member_size = getnum( arg.c_str(), 100000, max_member_size );
- break;
+ case 'a': ignore_trailing = false; break;
+ case 'b': member_size = getnum( ptr, 100000, max_member_size ); break;
case 'c': to_stdout = true; break;
case 'd': program_mode = m_decompress; break;
case 'f': force = true; break;
@@ -772,15 +763,14 @@ int main( const int argc, const char * const argv[] )
case 'h': show_help(); return 0;
case 'k': keep_input_files = true; break;
case 'm': encoder_options.match_len_limit =
- getnum( arg.c_str(), min_match_len_limit, max_match_len );
+ getnum( ptr, min_match_len_limit, max_match_len );
zero = false; break;
case 'n': break;
case 'o': default_output_filename = arg; break;
case 'q': verbosity = -1; break;
- case 's': encoder_options.dictionary_size = get_dict_size( arg.c_str() );
+ case 's': encoder_options.dictionary_size = get_dict_size( ptr );
zero = false; break;
- case 'S': volume_size = getnum( arg.c_str(), 100000, max_volume_size );
- break;
+ case 'S': volume_size = getnum( ptr, 100000, max_volume_size ); break;
case 't': program_mode = m_test; break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0;
@@ -816,6 +806,7 @@ int main( const int argc, const char * const argv[] )
Pretty_print pp( filenames, verbosity );
int retval = 0;
+ bool stdin_used = false;
for( unsigned i = 0; i < filenames.size(); ++i )
{
struct stat in_stats;
@@ -823,6 +814,7 @@ int main( const int argc, const char * const argv[] )
if( filenames[i].empty() || filenames[i] == "-" )
{
+ if( stdin_used ) continue; else stdin_used = true;
input_filename.clear();
infd = STDIN_FILENO;
if( program_mode != m_test )
@@ -834,8 +826,7 @@ int main( const int argc, const char * const argv[] )
if( program_mode == m_compress )
set_c_outname( default_output_filename, volume_size > 0 );
else output_filename = default_output_filename;
- outfd_mode = all_rw;
- if( !open_outstream( force ) )
+ if( !open_outstream( force, true ) )
{
if( retval < 1 ) retval = 1;
close( infd ); infd = -1;
@@ -859,8 +850,7 @@ int main( const int argc, const char * const argv[] )
if( program_mode == m_compress )
set_c_outname( input_filename, volume_size > 0 );
else set_d_outname( input_filename, eindex );
- outfd_mode = usr_rw;
- if( !open_outstream( force ) )
+ if( !open_outstream( force, false ) )
{
if( retval < 1 ) retval = 1;
close( infd ); infd = -1;
@@ -870,10 +860,12 @@ int main( const int argc, const char * const argv[] )
}
}
- if( !check_tty( infd, program_mode ) ) return 1;
+ if( !check_tty( infd, program_mode ) )
+ {
+ if( retval < 1 ) retval = 1;
+ cleanup_and_fail( retval );
+ }
- if( output_filename.size() && !to_stdout && program_mode != m_test )
- delete_output_on_interrupt = true;
const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0;
pp.set_name( input_filename );
int tmp;
@@ -881,7 +873,7 @@ int main( const int argc, const char * const argv[] )
tmp = compress( member_size, volume_size, infd, encoder_options, pp,
in_statsp, zero );
else
- tmp = decompress( infd, pp, ignore_garbage, program_mode == m_test );
+ tmp = decompress( infd, pp, ignore_trailing, program_mode == m_test );
if( tmp > retval ) retval = tmp;
if( tmp && program_mode != m_test ) cleanup_and_fail( retval );
diff --git a/testsuite/check.sh b/testsuite/check.sh
index ba86c74..d94e8f3 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -1,6 +1,6 @@
#! /bin/sh
# check script for Lzip - LZMA lossless data compressor
-# Copyright (C) 2008-2015 Antonio Diaz Diaz.
+# Copyright (C) 2008-2016 Antonio Diaz Diaz.
#
# This script is free software: you have unlimited permission
# to copy, distribute and modify it.
@@ -17,9 +17,16 @@ if [ ! -f "${LZIP}" ] || [ ! -x "${LZIP}" ] ; then
exit 1
fi
+if [ -e "${LZIP}" ] 2> /dev/null ; then true
+else
+ echo "$0: a POSIX shell is required to run the tests"
+ echo "Try bash -c \"$0 $1 $2\""
+ exit 1
+fi
+
if [ -d tmp ] ; then rm -rf tmp ; fi
mkdir tmp
-cd "${objdir}"/tmp
+cd "${objdir}"/tmp || framework_failure
cat "${testdir}"/test.txt > in || framework_failure
in_lz="${testdir}"/test.txt.lz
@@ -27,18 +34,18 @@ fail=0
printf "testing lzip-%s..." "$2"
-"${LZIP}" -cqm4 in > /dev/null
-if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
-"${LZIP}" -cqm274 in > /dev/null
-if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
-"${LZIP}" -cqs-1 in > /dev/null
-if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
-"${LZIP}" -cqs0 in > /dev/null
-if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
-"${LZIP}" -cqs4095 in > /dev/null
-if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
-"${LZIP}" -cqs513MiB in > /dev/null
-if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -fkqm4 in
+if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -fkqm274 in
+if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -fkqs-1 in
+if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -fkqs0 in
+if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -fkqs4095 in
+if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -fkqs513MiB in
+if [ $? = 1 ] && [ ! -e in.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -tq in
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -tq < in
@@ -52,26 +59,53 @@ if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" -tq
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
-"${LZIP}" -t "${in_lz}" || fail=1
+printf "\ntesting decompression..."
+
+"${LZIP}" -t "${in_lz}"
+if [ $? = 0 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -cd "${in_lz}" > copy || fail=1
cmp in copy || fail=1
printf .
+rm -f copy
cat "${in_lz}" > copy.lz || framework_failure
-printf "to be overwritten" > copy || framework_failure
-"${LZIP}" -df copy.lz || fail=1
+"${LZIP}" -dk copy.lz || fail=1
cmp in copy || fail=1
-printf .
+printf "to be overwritten" > copy || framework_failure
+"${LZIP}" -dq copy.lz
+if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -df copy.lz
+if [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; then
+ printf . ; else printf - ; fail=1 ; fi
printf "to be overwritten" > copy || framework_failure
"${LZIP}" -df -o copy < "${in_lz}" || fail=1
cmp in copy || fail=1
printf .
+rm -f copy
"${LZIP}" < in > anyothername || fail=1
-"${LZIP}" -d anyothername || fail=1
-cmp in anyothername.out || fail=1
-printf .
+"${LZIP}" -d -o copy - anyothername - < "${in_lz}"
+if [ $? = 0 ] && cmp in copy && cmp in anyothername.out ; then
+ printf . ; else printf - ; fail=1 ; fi
+rm -f copy anyothername.out
+
+"${LZIP}" -tq in "${in_lz}"
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -tq foo.lz "${in_lz}"
+if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -cdq in "${in_lz}" > copy
+if [ $? = 2 ] && cat copy in | cmp in - ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -cdq foo.lz "${in_lz}" > copy
+if [ $? = 1 ] && cmp in copy ; then printf . ; else printf - ; fail=1 ; fi
+rm -f copy
+cat "${in_lz}" > copy.lz || framework_failure
+"${LZIP}" -dq in copy.lz
+if [ $? = 2 ] && [ -e copy.lz ] && [ ! -e copy ] && [ ! -e in.out ] ; then
+ printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -dq foo.lz copy.lz
+if [ $? = 1 ] && [ ! -e copy.lz ] && [ ! -e foo ] && cmp in copy ; then
+ printf . ; else printf - ; fail=1 ; fi
cat in in > in2 || framework_failure
"${LZIP}" -o copy2 < in2 || fail=1
@@ -95,7 +129,9 @@ printf "to be overwritten" > copy2 || framework_failure
cmp in2 copy2 || fail=1
printf .
-"${LZIP}" -cfq "${in_lz}" > /dev/null
+printf "\ntesting compression..."
+
+"${LZIP}" -cfq "${in_lz}" > out # /dev/null is a tty on OS/2
if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -cF "${in_lz}" > out || fail=1
"${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1