From a12430a7bff80cea63fa05ffd716f0d5e91ddb6d Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 10:59:28 +0100 Subject: Merging upstream version 1.15~pre3. Signed-off-by: Daniel Baumann --- ChangeLog | 5 +++ INSTALL | 2 +- NEWS | 4 +- README | 39 ++++++++++--------- configure | 26 ++++++------- decoder.cc | 18 ++++----- decoder.h | 2 +- doc/lzip.1 | 4 +- doc/lzip.info | 109 +++++++++++++++++++++++++++-------------------------- doc/lzip.texinfo | 74 +++++++++++++++++++----------------- encoder.cc | 55 ++++++++++++++------------- encoder.h | 32 ++++++++-------- fast_encoder.cc | 36 +++++++++--------- fast_encoder.h | 2 +- lzip.h | 15 +++++--- main.cc | 48 +++++++++++++++++++---- testsuite/check.sh | 27 ++++++------- 17 files changed, 275 insertions(+), 223 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3323da8..cc24306 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2013-07-15 Antonio Diaz Diaz + + * Version 1.15-pre3 released. + * Show progress of compression at verbosity level 2 (-vv). + 2013-05-11 Antonio Diaz Diaz * Version 1.15-pre2 released. diff --git a/INSTALL b/INSTALL index c8c1fa4..9981a00 100644 --- a/INSTALL +++ b/INSTALL @@ -1,7 +1,7 @@ Requirements ------------ You will need a C++ compiler. -I use gcc 4.8.0 and 3.3.6, but the code should compile with any +I use gcc 4.8.1 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. diff --git a/NEWS b/NEWS index 795ce51..6f5ad8c 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,7 @@ Changes in version 1.15: +Lzip now shows the progress of compression at verbosity level 2 (-vv). + Decompression time has been reduced by 1%. File version is now shown only if verbosity >= 4. @@ -7,7 +9,7 @@ File version is now shown only if verbosity >= 4. Option "-n, --threads" is now accepted and ignored for compatibility with plzip. -"configure" now accepts options with a separate argument. +The configure script now accepts options with a separate argument. The chapter "Stream Format" and the appendix "Reference source code" have been added to the manual. diff --git a/README b/README index 675b494..4b8fae3 100644 --- a/README +++ b/README @@ -1,17 +1,32 @@ Description -Lzip is a lossless data compressor based on the LZMA algorithm, with -very safe integrity checking and a user interface similar to the one of -gzip or bzip2. Lzip decompresses almost as fast as gzip and compresses -better than bzip2, which makes it well suited for software distribution -and data archiving. +Lzip is a lossless data compressor with a user interface similar to the +one of gzip or bzip2. Lzip decompresses almost as fast as gzip and +compresses more than bzip2, which makes it well suited for software +distribution and data archiving. Lzip is a clean implementation of the +LZMA algorithm. Lzip uses the same well-defined exit status values used by bzip2, which makes it safer when used in pipes or scripts than compressors returning ambiguous warning values, like gzip. +The lzip file format is designed for long-term data archiving and +provides very safe integrity checking. The member trailer stores the +32-bit CRC of the original data, the size of the original data and the +size of the member. These values, together with the value remaining in +the range decoder and the end-of-stream marker, provide a 4 factor +integrity checking which guarantees that the decompressed version of the +data is identical to the original. This guards against corruption of the +compressed data, and against undetected bugs in lzip (hopefully very +unlikely). The chances of data corruption going undetected are +microscopic. Be aware, though, that the check occurs upon decompression, +so it can only tell you that something is wrong. It can't help you +recover the original uncompressed data. + If you ever need to recover data from a damaged lzip file, try the -lziprecover program. +lziprecover program. Lziprecover makes lzip files resistant to bit-flip +(one of the most common forms of data corruption), and provides data +recovery capabilities, including error-checked merging of damaged files. Lzip replaces every file given in the command line with a compressed version of itself, with the name "original_name.lz". Each compressed @@ -45,18 +60,6 @@ without exceeding the given limit. Keep in mind that the decompression memory requirement is affected at compression time by the choice of dictionary size limit. -As a self-check for your protection, lzip stores in the member trailer -the 32-bit CRC of the original data, the size of the original data and -the size of the member. These values, together with the value remaining -in the range decoder and the end-of-stream marker, provide a very safe 4 -factor integrity checking which guarantees that the decompressed version -of the data is identical to the original. This guards against corruption -of the compressed data, and against undetected bugs in lzip (hopefully -very unlikely). The chances of data corruption going undetected are -microscopic. Be aware, though, that the check occurs upon decompression, -so it can only tell you that something is wrong. It can't help you -recover the original uncompressed data. - Lzip implements a simplified version of the LZMA (Lempel-Ziv-Markov chain-Algorithm) algorithm. The high compression of LZMA comes from combining two basic, well-proven compression ideas: sliding dictionaries diff --git a/configure b/configure index 3632211..1919c1b 100755 --- a/configure +++ b/configure @@ -1,12 +1,12 @@ #! /bin/sh -# configure script for Lzip - Data compressor based on the LZMA algorithm +# configure script for Lzip - LZMA lossless data compressor # Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=lzip -pkgversion=1.15-pre2 +pkgversion=1.15-pre3 progname=lzip srctrigger=doc/lzip.texinfo @@ -26,9 +26,8 @@ CXXFLAGS='-Wall -W -O2' LDFLAGS= # checking whether we are using GNU C++. -if [ ! -x /bin/g++ ] && - [ ! -x /usr/bin/g++ ] && - [ ! -x /usr/local/bin/g++ ] ; then +${CXX} --version > /dev/null 2>&1 +if [ $? != 0 ] ; then CXX=c++ CXXFLAGS='-W -O2' fi @@ -96,16 +95,19 @@ while [ $# != 0 ] ; do CXXFLAGS=*) CXXFLAGS=${optarg} ;; LDFLAGS=*) LDFLAGS=${optarg} ;; - --* | *=* | *-*-*) ;; + --*) + echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; + *=* | *-*-*) ;; *) - echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2 + echo "configure: unrecognized option: '${option}'" 1>&2 + echo "Try 'configure --help' for more information." 1>&2 exit 1 ;; esac # Check if the option took a separate argument if [ "${arg2}" = yes ] ; then if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift - else echo "configure: Missing argument to \"${option}\"" 1>&2 + else echo "configure: Missing argument to '${option}'" 1>&2 exit 1 fi fi @@ -123,10 +125,8 @@ if [ -z "${srcdir}" ] ; then fi if [ ! -r "${srcdir}/${srctrigger}" ] ; then - exec 1>&2 - echo - echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" - echo "configure: (At least ${srctrigger} is missing)." + echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2 + echo "configure: (At least ${srctrigger} is missing)." 1>&2 exit 1 fi @@ -164,7 +164,7 @@ echo "CXXFLAGS = ${CXXFLAGS}" echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF -# Makefile for Lzip - Data compressor based on the LZMA algorithm +# Makefile for Lzip - LZMA lossless data compressor # Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. # This file was generated automatically by configure. Do not edit. # diff --git a/decoder.cc b/decoder.cc index 5f14fca..6f1627c 100644 --- a/decoder.cc +++ b/decoder.cc @@ -1,4 +1,4 @@ -/* Lzip - Data compressor based on the LZMA algorithm +/* Lzip - LZMA lossless data compressor Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify @@ -36,7 +36,7 @@ const CRC32 crc32; void Pretty_print::operator()( const char * const msg ) const { - if( verbosity_ >= 0 ) + if( verbosity >= 0 ) { if( first_post ) { @@ -129,7 +129,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const if( size < trailer_size ) { error = true; - if( pp.verbosity() >= 0 ) + if( verbosity >= 0 ) { pp(); std::fprintf( stderr, "Trailer truncated at trailer position %d;" @@ -148,7 +148,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const if( trailer.data_crc() != crc() ) { error = true; - if( pp.verbosity() >= 0 ) + if( verbosity >= 0 ) { pp(); std::fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X.\n", @@ -158,7 +158,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const if( trailer.data_size() != data_position() ) { error = true; - if( pp.verbosity() >= 0 ) + if( verbosity >= 0 ) { pp(); std::fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX).\n", @@ -168,19 +168,19 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const if( trailer.member_size() != member_size ) { error = true; - if( pp.verbosity() >= 0 ) + if( verbosity >= 0 ) { pp(); std::fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX).\n", trailer.member_size(), member_size, member_size ); } } - if( !error && pp.verbosity() >= 2 && data_position() > 0 && member_size > 0 ) + if( !error && verbosity >= 2 && data_position() > 0 && member_size > 0 ) std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", (double)data_position() / member_size, ( 8.0 * member_size ) / data_position(), 100.0 * ( 1.0 - ( (double)member_size / data_position() ) ) ); - if( !error && pp.verbosity() >= 4 ) + if( !error && verbosity >= 4 ) std::fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ", trailer.data_crc(), trailer.data_size(), trailer.member_size() ); return !error; @@ -283,7 +283,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) { rdec.load(); continue; } - if( pp.verbosity() >= 0 ) + if( verbosity >= 0 ) { pp(); std::fprintf( stderr, "Unsupported marker code '%d'.\n", len ); diff --git a/decoder.h b/decoder.h index 1722c14..167a05e 100644 --- a/decoder.h +++ b/decoder.h @@ -1,4 +1,4 @@ -/* Lzip - Data compressor based on the LZMA algorithm +/* Lzip - LZMA lossless data compressor Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify diff --git a/doc/lzip.1 b/doc/lzip.1 index 3baec79..b7ea022 100644 --- a/doc/lzip.1 +++ b/doc/lzip.1 @@ -1,12 +1,12 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH LZIP "1" "May 2013" "Lzip 1.15-pre2" "User Commands" +.TH LZIP "1" "July 2013" "Lzip 1.15-pre3" "User Commands" .SH NAME Lzip \- reduces the size of files .SH SYNOPSIS .B lzip [\fIoptions\fR] [\fIfiles\fR] .SH DESCRIPTION -Lzip \- Data compressor based on the LZMA algorithm. +Lzip \- LZMA lossless data compressor. .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR diff --git a/doc/lzip.info b/doc/lzip.info index 9c4e874..97d2bbb 100644 --- a/doc/lzip.info +++ b/doc/lzip.info @@ -2,7 +2,7 @@ This is lzip.info, produced by makeinfo version 4.13 from lzip.texinfo. INFO-DIR-SECTION Data Compression START-INFO-DIR-ENTRY -* Lzip: (lzip). Data compressor based on the LZMA algorithm +* Lzip: (lzip). LZMA lossless data compressor END-INFO-DIR-ENTRY  @@ -11,19 +11,19 @@ File: lzip.info, Node: Top, Next: Introduction, Up: (dir) Lzip Manual *********** -This manual is for Lzip (version 1.15-pre2, 11 May 2013). +This manual is for Lzip (version 1.15-pre3, 15 July 2013). * Menu: * Introduction:: Purpose and features of lzip * Algorithm:: How lzip compresses the data -* Invoking Lzip:: Command line interface -* File Format:: Detailed format of the compressed file -* Stream Format:: Format of the LZMA stream in lzip files +* Invoking lzip:: Command line interface +* File format:: Detailed format of the compressed file +* Stream format:: Format of the LZMA stream in lzip files * Examples:: A small tutorial with examples * Problems:: Reporting bugs * Reference source code:: Source code illustrating stream format -* Concept Index:: Index of concepts +* Concept index:: Index of concepts Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. @@ -37,18 +37,33 @@ File: lzip.info, Node: Introduction, Next: Algorithm, Prev: Top, Up: Top 1 Introduction ************** -Lzip is a lossless data compressor based on the LZMA algorithm, with -very safe integrity checking and a user interface similar to the one of -gzip or bzip2. Lzip decompresses almost as fast as gzip and compresses -better than bzip2, which makes it well suited for software distribution -and data archiving. +Lzip is a lossless data compressor with a user interface similar to the +one of gzip or bzip2. Lzip decompresses almost as fast as gzip and +compresses more than bzip2, which makes it well suited for software +distribution and data archiving. Lzip is a clean implementation of the +LZMA algorithm. Lzip uses the same well-defined exit status values used by bzip2, which makes it safer when used in pipes or scripts than compressors returning ambiguous warning values, like gzip. + The lzip file format is designed for long-term data archiving and +provides very safe integrity checking. The member trailer stores the +32-bit CRC of the original data, the size of the original data and the +size of the member. These values, together with the value remaining in +the range decoder and the end-of-stream marker, provide a 4 factor +integrity checking which guarantees that the decompressed version of the +data is identical to the original. This guards against corruption of the +compressed data, and against undetected bugs in lzip (hopefully very +unlikely). The chances of data corruption going undetected are +microscopic. Be aware, though, that the check occurs upon decompression, +so it can only tell you that something is wrong. It can't help you +recover the original uncompressed data. + If you ever need to recover data from a damaged lzip file, try the -lziprecover program. +lziprecover program. Lziprecover makes lzip files resistant to bit-flip +(one of the most common forms of data corruption), and provides data +recovery capabilities, including error-checked merging of damaged files. Lzip replaces every file given in the command line with a compressed version of itself, with the name "original_name.lz". Each compressed @@ -96,20 +111,8 @@ filename.lz becomes filename filename.tlz becomes filename.tar anyothername becomes anyothername.out - As a self-check for your protection, lzip stores in the member -trailer the 32-bit CRC of the original data, the size of the original -data and the size of the member. These values, together with the value -remaining in the range decoder and the end-of-stream marker, provide a -very safe 4 factor integrity checking which guarantees that the -decompressed version of the data is identical to the original. This -guards against corruption of the compressed data, and against -undetected bugs in lzip (hopefully very unlikely). The chances of data -corruption going undetected are microscopic. Be aware, though, that the -check occurs upon decompression, so it can only tell you that something -is wrong. It can't help you recover the original uncompressed data. -  -File: lzip.info, Node: Algorithm, Next: Invoking Lzip, Prev: Introduction, Up: Top +File: lzip.info, Node: Algorithm, Next: Invoking lzip, Prev: Introduction, Up: Top 2 Algorithm *********** @@ -170,9 +173,9 @@ range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI and the idea of unzcrash).  -File: lzip.info, Node: Invoking Lzip, Next: File Format, Prev: Algorithm, Up: Top +File: lzip.info, Node: Invoking lzip, Next: File format, Prev: Algorithm, Up: Top -3 Invoking Lzip +3 Invoking lzip *************** The format for running lzip is: @@ -275,10 +278,10 @@ The format for running lzip is: `--verbose' Verbose mode. When compressing, show the compression ratio for each file - processed. + processed. A second -v shows the progress of compression. When decompressing or testing, further -v's (up to 4) increase the - verbosity level, showing status, dictionary size, compression - ratio, trailer contents (CRC, data size, member size), and up to 6 + verbosity level, showing status, compression ratio, dictionary + size, trailer contents (CRC, data size, member size), and up to 6 bytes of trailing garbage (if any). `-0 .. -9' @@ -332,9 +335,9 @@ invalid input file, 3 for an internal consistency error (eg, bug) which caused lzip to panic.  -File: lzip.info, Node: File Format, Next: Stream Format, Prev: Invoking Lzip, Up: Top +File: lzip.info, Node: File format, Next: Stream format, Prev: Invoking lzip, Up: Top -4 File Format +4 File format ************* Perfection is reached, not when there is no longer anything to add, but @@ -404,7 +407,7 @@ additional information before, between, or after them.  -File: lzip.info, Node: Stream Format, Next: Examples, Prev: File Format, Up: Top +File: lzip.info, Node: Stream format, Next: Examples, Prev: File format, Up: Top 5 Format of the LZMA stream in lzip files ***************************************** @@ -596,7 +599,7 @@ with the appropiate contexts to decode the different coding sequences Stream" marker is decoded.  -File: lzip.info, Node: Examples, Next: Problems, Prev: Stream Format, Up: Top +File: lzip.info, Node: Examples, Next: Problems, Prev: Stream format, Up: Top 6 A small tutorial with examples ******************************** @@ -684,7 +687,7 @@ for all eternity, if not longer. by running `lzip --version'.  -File: lzip.info, Node: Reference source code, Next: Concept Index, Prev: Problems, Up: Top +File: lzip.info, Node: Reference source code, Next: Concept index, Prev: Problems, Up: Top Appendix A Reference source code ******************************** @@ -1137,9 +1140,9 @@ int main( const int argc, const char * const argv[] ) }  -File: lzip.info, Node: Concept Index, Prev: Reference source code, Up: Top +File: lzip.info, Node: Concept index, Prev: Reference source code, Up: Top -Concept Index +Concept index ************* [index] @@ -1148,29 +1151,29 @@ Concept Index * algorithm: Algorithm. (line 6) * bugs: Problems. (line 6) * examples: Examples. (line 6) -* file format: File Format. (line 6) -* format of the LZMA stream: Stream Format. (line 6) +* file format: File format. (line 6) +* format of the LZMA stream: Stream format. (line 6) * getting help: Problems. (line 6) * introduction: Introduction. (line 6) -* invoking: Invoking Lzip. (line 6) -* options: Invoking Lzip. (line 6) +* invoking: Invoking lzip. (line 6) +* options: Invoking lzip. (line 6) * reference source code: Reference source code. (line 6) -* usage: Invoking Lzip. (line 6) -* version: Invoking Lzip. (line 6) +* usage: Invoking lzip. (line 6) +* version: Invoking lzip. (line 6)  Tag Table: -Node: Top224 -Node: Introduction1065 -Node: Algorithm4786 -Node: Invoking Lzip7304 -Node: File Format12895 -Node: Stream Format15328 -Node: Examples24042 -Node: Problems25991 -Node: Reference source code26521 -Node: Concept Index39768 +Node: Top210 +Node: Introduction1052 +Node: Algorithm5006 +Node: Invoking lzip7524 +Node: File format13162 +Node: Stream format15595 +Node: Examples24309 +Node: Problems26258 +Node: Reference source code26788 +Node: Concept index40035  End Tag Table diff --git a/doc/lzip.texinfo b/doc/lzip.texinfo index 484b5ac..1c04f2c 100644 --- a/doc/lzip.texinfo +++ b/doc/lzip.texinfo @@ -6,19 +6,19 @@ @finalout @c %**end of header -@set UPDATED 11 May 2013 -@set VERSION 1.15-pre2 +@set UPDATED 15 July 2013 +@set VERSION 1.15-pre3 @dircategory Data Compression @direntry -* Lzip: (lzip). Data compressor based on the LZMA algorithm +* Lzip: (lzip). LZMA lossless data compressor @end direntry @ifnothtml @titlepage @title Lzip -@subtitle Data compressor based on the LZMA algorithm +@subtitle LZMA lossless data compressor @subtitle for Lzip version @value{VERSION}, @value{UPDATED} @author by Antonio Diaz Diaz @@ -37,13 +37,13 @@ This manual is for Lzip (version @value{VERSION}, @value{UPDATED}). @menu * Introduction:: Purpose and features of lzip * Algorithm:: How lzip compresses the data -* Invoking Lzip:: Command line interface -* File Format:: Detailed format of the compressed file -* Stream Format:: Format of the LZMA stream in lzip files +* Invoking lzip:: Command line interface +* File format:: Detailed format of the compressed file +* Stream format:: Format of the LZMA stream in lzip files * Examples:: A small tutorial with examples * Problems:: Reporting bugs * Reference source code:: Source code illustrating stream format -* Concept Index:: Index of concepts +* Concept index:: Index of concepts @end menu @sp 1 @@ -58,18 +58,33 @@ to copy, distribute and modify it. @chapter Introduction @cindex introduction -Lzip is a lossless data compressor based on the LZMA algorithm, with -very safe integrity checking and a user interface similar to the one of -gzip or bzip2. Lzip decompresses almost as fast as gzip and compresses -better than bzip2, which makes it well suited for software distribution -and data archiving. +Lzip is a lossless data compressor with a user interface similar to the +one of gzip or bzip2. Lzip decompresses almost as fast as gzip and +compresses more than bzip2, which makes it well suited for software +distribution and data archiving. Lzip is a clean implementation of the +LZMA algorithm. Lzip uses the same well-defined exit status values used by bzip2, which makes it safer when used in pipes or scripts than compressors returning ambiguous warning values, like gzip. +The lzip file format is designed for long-term data archiving and +provides very safe integrity checking. The member trailer stores the +32-bit CRC of the original data, the size of the original data and the +size of the member. These values, together with the value remaining in +the range decoder and the end-of-stream marker, provide a 4 factor +integrity checking which guarantees that the decompressed version of the +data is identical to the original. This guards against corruption of the +compressed data, and against undetected bugs in lzip (hopefully very +unlikely). The chances of data corruption going undetected are +microscopic. Be aware, though, that the check occurs upon decompression, +so it can only tell you that something is wrong. It can't help you +recover the original uncompressed data. + If you ever need to recover data from a damaged lzip file, try the -lziprecover program. +lziprecover program. Lziprecover makes lzip files resistant to bit-flip +(one of the most common forms of data corruption), and provides data +recovery capabilities, including error-checked merging of damaged files. Lzip replaces every file given in the command line with a compressed version of itself, with the name "original_name.lz". Each compressed @@ -119,18 +134,6 @@ file from that of the compressed file as follows: @item anyothername @tab becomes @tab anyothername.out @end multitable -As a self-check for your protection, lzip stores in the member trailer -the 32-bit CRC of the original data, the size of the original data and -the size of the member. These values, together with the value remaining -in the range decoder and the end-of-stream marker, provide a very safe 4 -factor integrity checking which guarantees that the decompressed version -of the data is identical to the original. This guards against corruption -of the compressed data, and against undetected bugs in lzip (hopefully -very unlikely). The chances of data corruption going undetected are -microscopic. Be aware, though, that the check occurs upon decompression, -so it can only tell you that something is wrong. It can't help you -recover the original uncompressed data. - @node Algorithm @chapter Algorithm @@ -193,8 +196,8 @@ range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI and the idea of unzcrash). -@node Invoking Lzip -@chapter Invoking Lzip +@node Invoking lzip +@chapter Invoking lzip @cindex invoking @cindex options @cindex usage @@ -295,9 +298,10 @@ Use it together with @samp{-v} to see information about the file. @item -v @itemx --verbose Verbose mode.@* -When compressing, show the compression ratio for each file processed.@* +When compressing, show the compression ratio for each file processed. A +second -v shows the progress of compression.@* When decompressing or testing, further -v's (up to 4) increase the -verbosity level, showing status, dictionary size, compression ratio, +verbosity level, showing status, compression ratio, dictionary size, trailer contents (CRC, data size, member size), and up to 6 bytes of trailing garbage (if any). @@ -357,8 +361,8 @@ invalid input file, 3 for an internal consistency error (eg, bug) which caused lzip to panic. -@node File Format -@chapter File Format +@node File format +@chapter File format @cindex file format Perfection is reached, not when there is no longer anything to add, but @@ -432,7 +436,7 @@ facilitates safe recovery of undamaged members from multi-member files. @end table -@node Stream Format +@node Stream format @chapter Format of the LZMA stream in lzip files @cindex format of the LZMA stream @@ -1204,8 +1208,8 @@ int main( const int argc, const char * const argv[] ) @end verbatim -@node Concept Index -@unnumbered Concept Index +@node Concept index +@unnumbered Concept index @printindex cp diff --git a/encoder.cc b/encoder.cc index a764f1e..0876742 100644 --- a/encoder.cc +++ b/encoder.cc @@ -1,4 +1,4 @@ -/* Lzip - Data compressor based on the LZMA algorithm +/* Lzip - LZMA lossless data compressor Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify @@ -240,33 +240,34 @@ void Range_encoder::flush_data() throw Error( "Write error" ); partial_member_pos += pos; pos = 0; + if( verbosity >= 2 ) show_progress(); } } -void Len_encoder::encode( Range_encoder & range_encoder, int symbol, +void Len_encoder::encode( Range_encoder & renc, int symbol, const int pos_state ) { symbol -= min_match_len; if( symbol < len_low_symbols ) { - range_encoder.encode_bit( choice1, 0 ); - range_encoder.encode_tree( bm_low[pos_state], symbol, len_low_bits ); + renc.encode_bit( choice1, 0 ); + renc.encode_tree( bm_low[pos_state], symbol, len_low_bits ); } else { - range_encoder.encode_bit( choice1, 1 ); + renc.encode_bit( choice1, 1 ); if( symbol < len_low_symbols + len_mid_symbols ) { - range_encoder.encode_bit( choice2, 0 ); - range_encoder.encode_tree( bm_mid[pos_state], symbol - len_low_symbols, - len_mid_bits ); + renc.encode_bit( choice2, 0 ); + renc.encode_tree( bm_mid[pos_state], symbol - len_low_symbols, + len_mid_bits ); } else { - range_encoder.encode_bit( choice2, 1 ); - range_encoder.encode_tree( bm_high, symbol - len_low_symbols - len_mid_symbols, - len_high_bits ); + renc.encode_bit( choice2, 1 ); + renc.encode_tree( bm_high, symbol - len_low_symbols - len_mid_symbols, + len_high_bits ); } } if( --counters[pos_state] <= 0 ) update_prices( pos_state ); @@ -278,17 +279,17 @@ void LZ_encoder_base::full_flush( const unsigned long long data_position, const State state ) { const int pos_state = data_position & pos_state_mask; - range_encoder.encode_bit( bm_match[state()][pos_state], 1 ); - range_encoder.encode_bit( bm_rep[state()], 0 ); + renc.encode_bit( bm_match[state()][pos_state], 1 ); + renc.encode_bit( bm_rep[state()], 0 ); encode_pair( 0xFFFFFFFFU, min_match_len, pos_state ); - range_encoder.flush(); + renc.flush(); File_trailer trailer; trailer.data_crc( crc() ); trailer.data_size( data_position ); - trailer.member_size( range_encoder.member_position() + File_trailer::size() ); + trailer.member_size( renc.member_position() + File_trailer::size() ); for( int i = 0; i < File_trailer::size(); ++i ) - range_encoder.put_byte( trailer.data[i] ); - range_encoder.flush_data(); + renc.put_byte( trailer.data[i] ); + renc.flush_data(); } @@ -680,14 +681,14 @@ bool LZ_encoder::encode_member( const unsigned long long member_size ) for( int i = 0; i < num_rep_distances; ++i ) rep_distances[i] = 0; if( matchfinder.data_position() != 0 || - range_encoder.member_position() != File_header::size ) + renc.member_position() != File_header::size ) return false; // can be called only once if( !matchfinder.finished() ) // encode first byte { const uint8_t prev_byte = 0; const uint8_t cur_byte = matchfinder[0]; - range_encoder.encode_bit( bm_match[state()][0], 0 ); + renc.encode_bit( bm_match[state()][0], 0 ); encode_literal( prev_byte, cur_byte ); crc32.update( crc_, cur_byte ); matchfinder.get_match_pairs(); @@ -714,7 +715,7 @@ bool LZ_encoder::encode_member( const unsigned long long member_size ) const int len = trials[i].price; bool bit = ( dis < 0 && len == 1 ); - range_encoder.encode_bit( bm_match[state()][pos_state], !bit ); + renc.encode_bit( bm_match[state()][pos_state], !bit ); if( bit ) // literal byte { const uint8_t prev_byte = matchfinder[-ahead-1]; @@ -734,23 +735,23 @@ bool LZ_encoder::encode_member( const unsigned long long member_size ) crc32.update( crc_, matchfinder.ptr_to_current_pos() - ahead, len ); mtf_reps( dis, rep_distances ); bit = ( dis < num_rep_distances ); - range_encoder.encode_bit( bm_rep[state()], bit ); + renc.encode_bit( bm_rep[state()], bit ); if( bit ) { bit = ( dis == 0 ); - range_encoder.encode_bit( bm_rep0[state()], !bit ); + renc.encode_bit( bm_rep0[state()], !bit ); if( bit ) - range_encoder.encode_bit( bm_len[state()][pos_state], len > 1 ); + renc.encode_bit( bm_len[state()][pos_state], len > 1 ); else { - range_encoder.encode_bit( bm_rep1[state()], dis > 1 ); + renc.encode_bit( bm_rep1[state()], dis > 1 ); if( dis > 1 ) - range_encoder.encode_bit( bm_rep2[state()], dis > 2 ); + renc.encode_bit( bm_rep2[state()], dis > 2 ); } if( len == 1 ) state.set_short_rep(); else { - rep_len_encoder.encode( range_encoder, len, pos_state ); + rep_len_encoder.encode( renc, len, pos_state ); state.set_rep(); } } @@ -764,7 +765,7 @@ bool LZ_encoder::encode_member( const unsigned long long member_size ) } } ahead -= len; i += len; - if( range_encoder.member_position() >= member_size_limit ) + if( renc.member_position() >= member_size_limit ) { if( !matchfinder.dec_pos( ahead ) ) return false; full_flush( matchfinder.data_position(), state ); diff --git a/encoder.h b/encoder.h index d3f3e77..973ba6c 100644 --- a/encoder.h +++ b/encoder.h @@ -1,4 +1,4 @@ -/* Lzip - Data compressor based on the LZMA algorithm +/* Lzip - LZMA lossless data compressor Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify @@ -405,8 +405,7 @@ public: for( int i = 0; i < pos_states; ++i ) update_prices( i ); } - void encode( Range_encoder & range_encoder, int symbol, - const int pos_state ); + void encode( Range_encoder & renc, int symbol, const int pos_state ); int price( const int symbol, const int pos_state ) const { return prices[pos_state][symbol - min_match_len]; } @@ -432,7 +431,7 @@ protected: Bit_model bm_dis[modeled_distances-end_dis_model]; Bit_model bm_align[dis_align_size]; - Range_encoder range_encoder; + Range_encoder renc; Len_encoder match_len_encoder; Len_encoder rep_len_encoder; @@ -444,13 +443,13 @@ protected: const int match_len_limit, const int outfd ) : crc_( 0xFFFFFFFFU ), - range_encoder( outfd ), + renc( outfd ), match_len_encoder( match_len_limit ), rep_len_encoder( match_len_limit ), num_dis_slots( 2 * real_bits( dictionary_size - 1 ) ) { for( int i = 0; i < File_header::size; ++i ) - range_encoder.put_byte( header.data[i] ); + renc.put_byte( header.data[i] ); } // move-to-front dis in/into reps @@ -478,18 +477,18 @@ protected: symbol, match_byte ); } void encode_literal( const uint8_t prev_byte, const uint8_t symbol ) - { range_encoder.encode_tree( bm_literal[get_lit_state(prev_byte)], symbol, 8 ); } + { renc.encode_tree( bm_literal[get_lit_state(prev_byte)], symbol, 8 ); } void encode_matched( const uint8_t prev_byte, const uint8_t symbol, const uint8_t match_byte ) - { range_encoder.encode_matched( bm_literal[get_lit_state(prev_byte)], - symbol, match_byte ); } + { renc.encode_matched( bm_literal[get_lit_state(prev_byte)], symbol, + match_byte ); } void encode_pair( const uint32_t dis, const int len, const int pos_state ) { - match_len_encoder.encode( range_encoder, len, pos_state ); + match_len_encoder.encode( renc, len, pos_state ); const int dis_slot = get_slot( dis ); - range_encoder.encode_tree( bm_dis_slot[get_dis_state(len)], dis_slot, dis_slot_bits ); + renc.encode_tree( bm_dis_slot[get_dis_state(len)], dis_slot, dis_slot_bits ); if( dis_slot >= start_dis_model ) { @@ -498,12 +497,12 @@ protected: const uint32_t direct_dis = dis - base; if( dis_slot < end_dis_model ) - range_encoder.encode_tree_reversed( bm_dis + base - dis_slot - 1, - direct_dis, direct_bits ); + renc.encode_tree_reversed( bm_dis + base - dis_slot - 1, direct_dis, + direct_bits ); else { - range_encoder.encode( direct_dis >> dis_align_bits, direct_bits - dis_align_bits ); - range_encoder.encode_tree_reversed( bm_align, direct_dis, dis_align_bits ); + renc.encode( direct_dis >> dis_align_bits, direct_bits - dis_align_bits ); + renc.encode_tree_reversed( bm_align, direct_dis, dis_align_bits ); } } } @@ -511,8 +510,7 @@ protected: void full_flush( const unsigned long long data_position, const State state ); public: - unsigned long long member_position() const - { return range_encoder.member_position(); } + unsigned long long member_position() const { return renc.member_position(); } }; diff --git a/fast_encoder.cc b/fast_encoder.cc index fc5f891..21e1a2b 100644 --- a/fast_encoder.cc +++ b/fast_encoder.cc @@ -1,4 +1,4 @@ -/* Lzip - Data compressor based on the LZMA algorithm +/* Lzip - LZMA lossless data compressor Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify @@ -124,21 +124,21 @@ bool FLZ_encoder::encode_member( const unsigned long long member_size ) for( int i = 0; i < num_rep_distances; ++i ) reps[i] = 0; if( fmatchfinder.data_position() != 0 || - range_encoder.member_position() != File_header::size ) + renc.member_position() != File_header::size ) return false; // can be called only once if( !fmatchfinder.finished() ) // encode first byte { const uint8_t prev_byte = 0; const uint8_t cur_byte = fmatchfinder[0]; - range_encoder.encode_bit( bm_match[state()][0], 0 ); + renc.encode_bit( bm_match[state()][0], 0 ); encode_literal( prev_byte, cur_byte ); crc32.update( crc_, cur_byte ); fmatchfinder.longest_match_len( 1 ); } while( !fmatchfinder.finished() && - range_encoder.member_position() < member_size_limit ) + renc.member_position() < member_size_limit ) { int match_distance; const int main_len = fmatchfinder.longest_match_len( &match_distance ); @@ -154,22 +154,22 @@ bool FLZ_encoder::encode_member( const unsigned long long member_size ) if( len > min_match_len && len + 4 > main_len ) { crc32.update( crc_, fmatchfinder.ptr_to_current_pos(), len ); - range_encoder.encode_bit( bm_match[state()][pos_state], 1 ); - range_encoder.encode_bit( bm_rep[state()], 1 ); + renc.encode_bit( bm_match[state()][pos_state], 1 ); + renc.encode_bit( bm_rep[state()], 1 ); const bool bit = ( dis == 0 ); - range_encoder.encode_bit( bm_rep0[state()], !bit ); + renc.encode_bit( bm_rep0[state()], !bit ); if( bit ) - range_encoder.encode_bit( bm_len[state()][pos_state], 1 ); + renc.encode_bit( bm_len[state()][pos_state], 1 ); else { const int distance = reps[dis]; for( int i = dis; i > 0; --i ) reps[i] = reps[i-1]; reps[0] = distance; - range_encoder.encode_bit( bm_rep1[state()], dis > 1 ); + renc.encode_bit( bm_rep1[state()], dis > 1 ); if( dis > 1 ) - range_encoder.encode_bit( bm_rep2[state()], dis > 2 ); + renc.encode_bit( bm_rep2[state()], dis > 2 ); } - rep_len_encoder.encode( range_encoder, len, pos_state ); + rep_len_encoder.encode( renc, len, pos_state ); state.set_rep(); move_pos( len ); continue; @@ -180,8 +180,8 @@ bool FLZ_encoder::encode_member( const unsigned long long member_size ) { crc32.update( crc_, fmatchfinder.ptr_to_current_pos(), main_len ); dis = match_distance; - range_encoder.encode_bit( bm_match[state()][pos_state], 1 ); - range_encoder.encode_bit( bm_rep[state()], 0 ); + renc.encode_bit( bm_match[state()][pos_state], 1 ); + renc.encode_bit( bm_rep[state()], 0 ); encode_pair( dis, main_len, pos_state ); state.set_match(); move_pos( main_len ); @@ -209,17 +209,17 @@ bool FLZ_encoder::encode_member( const unsigned long long member_size ) price0( bm_len[state()][pos_state] ); if( short_rep_price < price ) { - range_encoder.encode_bit( bm_match[state()][pos_state], 1 ); - range_encoder.encode_bit( bm_rep[state()], 1 ); - range_encoder.encode_bit( bm_rep0[state()], 0 ); - range_encoder.encode_bit( bm_len[state()][pos_state], 0 ); + renc.encode_bit( bm_match[state()][pos_state], 1 ); + renc.encode_bit( bm_rep[state()], 1 ); + renc.encode_bit( bm_rep0[state()], 0 ); + renc.encode_bit( bm_len[state()][pos_state], 0 ); state.set_short_rep(); continue; } } // literal byte - range_encoder.encode_bit( bm_match[state()][pos_state], 0 ); + renc.encode_bit( bm_match[state()][pos_state], 0 ); if( state.is_char() ) encode_literal( prev_byte, cur_byte ); else diff --git a/fast_encoder.h b/fast_encoder.h index 5b5169f..3f8125f 100644 --- a/fast_encoder.h +++ b/fast_encoder.h @@ -1,4 +1,4 @@ -/* Lzip - Data compressor based on the LZMA algorithm +/* Lzip - LZMA lossless data compressor Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify diff --git a/lzip.h b/lzip.h index ffa2fb3..20912cf 100644 --- a/lzip.h +++ b/lzip.h @@ -1,4 +1,4 @@ -/* Lzip - Data compressor based on the LZMA algorithm +/* Lzip - LZMA lossless data compressor Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify @@ -100,13 +100,11 @@ class Pretty_print std::string name_; const char * const stdin_name; unsigned longest_name; - const int verbosity_; mutable bool first_post; public: - Pretty_print( const std::vector< std::string > & filenames, const int v ) - : stdin_name( "(stdin)" ), longest_name( 0 ), verbosity_( v ), - first_post( false ) + explicit Pretty_print( const std::vector< std::string > & filenames ) + : stdin_name( "(stdin)" ), longest_name( 0 ), first_post( false ) { const unsigned stdin_name_len = std::strlen( stdin_name ); for( unsigned i = 0; i < filenames.size(); ++i ) @@ -127,7 +125,6 @@ public: void reset() const { if( name_.size() ) first_post = true; } const char * name() const { return name_.c_str(); } - int verbosity() const { return verbosity_; } void operator()( const char * const msg = 0 ) const; }; @@ -272,6 +269,12 @@ int readblock( const int fd, uint8_t * const buf, const int size ); int writeblock( const int fd, const uint8_t * const buf, const int size ); // defined in main.cc +extern int verbosity; void show_error( const char * const msg, const int errcode = 0, const bool help = false ); void internal_error( const char * const msg ); +class Matchfinder_base; +void show_progress( const unsigned long long partial_size = 0, + const Matchfinder_base * const m = 0, + const Pretty_print * const p = 0, + const struct stat * const in_statsp = 0 ); diff --git a/main.cc b/main.cc index 2c4e594..921684a 100644 --- a/main.cc +++ b/main.cc @@ -1,4 +1,4 @@ -/* Lzip - Data compressor based on the LZMA algorithm +/* Lzip - LZMA lossless data compressor Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify @@ -94,7 +94,6 @@ const unsigned long long max_volume_size = 0x7FFFFFFFFFFFFFFFULL; std::string output_filename; int outfd = -1; -int verbosity = 0; const mode_t usr_rw = S_IRUSR | S_IWUSR; const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; mode_t outfd_mode = usr_rw; @@ -103,7 +102,7 @@ bool delete_output_on_interrupt = false; void show_help() { - std::printf( "%s - Data compressor based on the LZMA algorithm.\n", Program_name ); + std::printf( "%s - LZMA lossless data compressor.\n", Program_name ); std::printf( "\nUsage: %s [options] [files]\n", invocation_name ); std::printf( "\nOptions:\n" " -h, --help display this help and exit\n" @@ -434,8 +433,9 @@ int compress( const unsigned long long member_size, while( true ) // encode one member per iteration { LZ_encoder encoder( matchfinder, header, outfd ); - const unsigned long long size = ( ( volume_size > 0 ) ? - std::min( member_size, volume_size - partial_volume_size ) : member_size ); + const unsigned long long size = ( volume_size > 0 ) ? + std::min( member_size, volume_size - partial_volume_size ) : member_size; + show_progress( in_size, &matchfinder, &pp, in_statsp ); // init if( !encoder.encode_member( size ) ) { pp( "Encoder error" ); retval = 1; break; } in_size += matchfinder.data_position(); @@ -501,8 +501,9 @@ int fcompress( const unsigned long long member_size, while( true ) // encode one member per iteration { FLZ_encoder encoder( fmatchfinder, header, outfd ); - const unsigned long long size = ( ( volume_size > 0 ) ? - std::min( member_size, volume_size - partial_volume_size ) : member_size ); + const unsigned long long size = ( volume_size > 0 ) ? + std::min( member_size, volume_size - partial_volume_size ) : member_size; + show_progress( in_size, &fmatchfinder, &pp, in_statsp ); // init if( !encoder.encode_member( size ) ) { pp( "Encoder error" ); retval = 1; break; } in_size += fmatchfinder.data_position(); @@ -679,6 +680,9 @@ void set_signals() } // end namespace +int verbosity = 0; + + void show_error( const char * const msg, const int errcode, const bool help ) { if( verbosity >= 0 ) @@ -705,6 +709,34 @@ void internal_error( const char * const msg ) } +void show_progress( const unsigned long long partial_size, + const Matchfinder_base * const m, + const Pretty_print * const p, + const struct stat * const in_statsp ) + { + static unsigned long long cfile_size = 0; // file_size / 100 + static unsigned long long psize = 0; + static const Matchfinder_base * mb = 0; + static const Pretty_print * pp = 0; + + if( m ) // initialize static vars + { + psize = partial_size; mb = m; pp = p; + cfile_size = ( in_statsp && S_ISREG( in_statsp->st_mode ) ) ? + in_statsp->st_size / 100 : 0; + return; + } + if( mb && pp ) + { + const unsigned long long pos = psize + mb->data_position(); + if( cfile_size > 0 ) + std::fprintf( stderr, "%4llu%%", pos / cfile_size ); + std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 ); + pp->reset(); (*pp)(); // restore cursor position + } + } + + int main( const int argc, const char * const argv[] ) { // Mapping from gzip/bzip2 style 1..9 compression modes @@ -831,7 +863,7 @@ int main( const int argc, const char * const argv[] ) ( filenames_given || default_output_filename.size() ) ) set_signals(); - Pretty_print pp( filenames, verbosity ); + Pretty_print pp( filenames ); int retval = 0; for( unsigned i = 0; i < filenames.size(); ++i ) diff --git a/testsuite/check.sh b/testsuite/check.sh index 64f481c..8adf23b 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -22,27 +22,28 @@ mkdir tmp cd "${objdir}"/tmp cat "${testdir}"/test.txt > in || framework_failure +in_lz="${testdir}"/test.txt.lz fail=0 printf "testing lzip-%s..." "$2" "${LZIP}" -cqs-1 in > /dev/null -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIP}" -cqs0 in > /dev/null -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIP}" -cqs4095 in > /dev/null -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIP}" -cqm274 in > /dev/null -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi -"${LZIP}" -t "${testdir}"/test.txt.lz || fail=1 -"${LZIP}" -cd "${testdir}"/test.txt.lz > copy || fail=1 +"${LZIP}" -t "${in_lz}" || fail=1 +"${LZIP}" -cd "${in_lz}" > copy || fail=1 cmp in copy || fail=1 printf . -"${LZIP}" -cfq "${testdir}"/test.txt.lz > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIP}" -cF "${testdir}"/test.txt.lz > out || fail=1 +"${LZIP}" -cfq "${in_lz}" > out +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi +"${LZIP}" -cF "${in_lz}" > out || fail=1 "${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1 cmp in copy || fail=1 printf . @@ -53,30 +54,30 @@ for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do printf "garbage" >> copy.lz || fail=1 "${LZIP}" -df copy.lz || fail=1 cmp in copy || fail=1 - printf . done +printf . for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do "${LZIP}" -c -$i in > out || fail=1 printf "g" >> out || fail=1 "${LZIP}" -cd out > copy || fail=1 cmp in copy || fail=1 - printf . done +printf . for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do "${LZIP}" -$i < in > out || fail=1 "${LZIP}" -d < out > copy || fail=1 cmp in copy || fail=1 - printf . done +printf . for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do "${LZIP}" -f -$i -o out < in || fail=1 "${LZIP}" -df -o copy < out.lz || fail=1 cmp in copy || fail=1 - printf . done +printf . "${LZIP}" < in > anyothername || fail=1 "${LZIP}" -d anyothername || fail=1 -- cgit v1.2.3