From 59633c24402bfc44a0853e799bed9a1b1fdd0520 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 08:57:19 +0100 Subject: Adding upstream version 1.12. Signed-off-by: Daniel Baumann --- ChangeLog | 92 ++++++++------ INSTALL | 6 +- Makefile.in | 23 ++-- NEWS | 55 +++------ README | 7 +- arg_parser.cc | 4 +- arg_parser.h | 4 +- configure | 8 +- decoder.cc | 62 +++++++++- decoder.h | 4 +- doc/lzip.1 | 7 +- doc/lzip.info | 97 ++++++++------- doc/lzip.texinfo | 128 ++++++++++--------- doc/lziprecover.1 | 4 +- encoder.cc | 46 ++++--- encoder.h | 37 +++--- fast_encoder.cc | 24 ++-- fast_encoder.h | 7 +- lzip.h | 5 +- lziprecover.cc | 87 +++---------- main.cc | 183 ++++++++++++++------------- testsuite/check.sh | 34 +++-- testsuite/unzcrash.cc | 335 ++++++++++++++++++++++++++++++++++++++++++++------ 23 files changed, 786 insertions(+), 473 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7d0eed3..88accb1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,25 @@ +2011-04-30 Antonio Diaz Diaz + + * Version 1.12 released. + * main.cc: Added new option `-F, --recompress'. + * encoder.h (update_prices): Update high length symbol prices + independently of the value of `pos_state'. This gives better + compression for large values of `--match-length' without being + slower. + * encoder.h encoder.cc: Optimize pair price calculations. This + reduces compression time for large values of `--match-length' + by up to 6%. + * Compression time of option `-0' has been reduced by 2%. + * main.cc (decompress): Print only one status line for each + multimember file when only one `-v' is specified. + * main.cc (decompress): Print up to 6 bytes of trailing garbage + when `-vvvv' is specified. + * main.cc (open_instream): Do not show the message + " and `--stdout' was not specified" for directories, etc. + * lziprecover.cc: If `-v' is not specified show errors only. + * testsuite/unzcrash.cc: Use Arg_parser. + * testsuite/unzcrash.cc: Added new options `-b', `-p' and `-s'. + 2010-09-16 Antonio Diaz Diaz * Version 1.11 released. @@ -9,26 +31,26 @@ compress less but faster. (-1 now takes 43% less time for only 20% larger compressed size). * encoder.cc: Compression of option -9 has been slightly increased. - * lziprecover.cc: Added new option "--merge" which tries to + * lziprecover.cc: Added new option `--merge' which tries to produce a correct file merging the good parts of two or more damaged copies. - * lziprecover.cc: Added new option "--repair" for repairing a + * lziprecover.cc: Added new option `--repair' for repairing a 1-byte error in single-member files. * decoder.cc (decode_member): Detect file errors earlier to improve efficiency of lziprecover's new repair capability. This change also prevents (harmless) access to uninitialized memory when decompressing a corrupt file. - * lziprecover.cc: Added new option "--force". - * lziprecover.cc: Added new option "--output". - * lziprecover.cc: Added new option "--split" to select the until + * lziprecover.cc: Added new option `--force'. + * lziprecover.cc: Added new option `--output'. + * lziprecover.cc: Added new option `--split' to select the until now only operation of splitting multimember files. * lziprecover.cc: If no operation is specified, warn the user and do nothing. * main.cc: Fixed warning about fchown's return value being ignored. - * decoder.cc: "-tvvvv" now also shows compression ratio. + * decoder.cc: `-tvvvv' now also shows compression ratio. * main.cc: Set stdin/stdout in binary mode on MSVC and OS2. * New examples have been added to the manual. - * testsuite: "test1" renamed to "test.txt" + * testsuite: `test1' renamed to `test.txt'. Added new tests. * Matchfinder types HC4 (4 bytes hash-chain) and HT4 (4 bytes hash-table) have been tested and found no better than the current BT4. @@ -37,9 +59,9 @@ * Version 1.10 released. * decoder.h: Input_buffer integrated in Range_decoder. - * main.cc: File specified with option "-o" is now created with + * main.cc: File specified with option `-o' is now created with mode 0666 if umask allows it, deleted if interrupted by user. - * main.cc: New constant "o_binary". + * main.cc: New constant `o_binary'. * main.cc: Dictionary size for options -2, -3, -4 and -8 has been changed to improve linearity of compressed sizes. * lzip.h: Fixed warnings produced by over-optimization (-O3). @@ -49,8 +71,8 @@ * Version 1.9 released. * main.cc (main): Return at least 1 if closing stdout fails. - * Makefile.in: Added "--name" option to help2man invocation. - * testsuite/check.sh: Use "test1" instead of "COPYING" for testing. + * Makefile.in: Added `--name' option to help2man invocation. + * testsuite/check.sh: Use `test1' instead of `COPYING' for testing. 2009-09-02 Antonio Diaz Diaz @@ -75,25 +97,25 @@ is smaller than dictionary size limit. * decoder.cc: Added extra flush calls to improve partial decompression of corrupt files. - * "--test" no more needs "/dev/null". - * Removed some "bashisms" from lzdiff and lzgrep. - * Dictionary size for options "-1" to "-4" has been changed. + * `--test' no more needs `/dev/null'. + * Removed some `bashisms' from lzdiff and lzgrep. + * Dictionary size for options `-1' to `-4' has been changed. * main.cc (signal_handler): Declared as `extern "C"'. * Makefile.in: Extra files are now installed by default. * testsuite/check.sh: Test lziprecover. - * Added "export LC_ALL=C" to all scripts. + * Added `export LC_ALL=C' to all scripts. 2009-04-12 Antonio Diaz Diaz * Version 1.5 released. * lzip.h: Coded dictionary size implemented in File_header. * Fixed some includes that prevented compilation with GCC 4.4. - * "member_size" and "volume_size" are now accurate limits. + * `member_size' and `volume_size' are now accurate limits. * Compression speed has been improved. * Implemented bt4 type matchfinder. - * Added chapter "Algorithm" to the manual. - * Lzdiff and lzgrep now accept "-h" for "--help" and - "-V" for "--version". + * Added chapter `Algorithm' to the manual. + * Lzdiff and lzgrep now accept `-h' for `--help' and + `-V' for `--version'. * Makefile.in: Man page is now installed by default. * testsuite/check.sh: Verify that files are open in binary mode. @@ -101,11 +123,11 @@ * Version 1.4 released. * Implemented compression of version 1 files. - * Added new option "--member-size". - * Added new option "--volume-size". - * Added new option "--output". - * main.cc: Read from non regular files if "--stdout" is specified. - * Added "lziprecover", a member recoverer program. + * Added new option `--member-size'. + * Added new option `--volume-size'. + * Added new option `--output'. + * main.cc: Read from non regular files if `--stdout' is specified. + * Added `lziprecover', a member recoverer program. * testsuite/unzcrash.cc: Test all 1-byte errors. 2008-12-21 Antonio Diaz Diaz @@ -115,7 +137,7 @@ dictionary size for each file during compression, saving memory during decompression. * Implemented decompression of version 1 files. - * testsuite/check.sh: Replaced "diff -q" with "cmp". + * testsuite/check.sh: Replaced `diff -q' with `cmp'. 2008-12-10 Antonio Diaz Diaz @@ -123,23 +145,23 @@ * encoder.cc: A 1-byte read outside allocated memory has been fixed. * lzip.h: Dictionary size limit has been reduced to 512MiB because setting it to 1GiB causes overflow of a 32 bit integer. - * Added "lzdiff", a diff/cmp wrapper for gzip, bzip2, lzip and + * Added `lzdiff', a diff/cmp wrapper for gzip, bzip2, lzip and non-compressed files. - * Added "lzgrep", a grep wrapper for gzip, bzip2, lzip and + * Added `lzgrep', a grep wrapper for gzip, bzip2, lzip and non-compressed files. - * "make install-info" should now work on Debian and OS X. + * `make install-info' should now work on Debian and OS X. 2008-11-17 Antonio Diaz Diaz * Version 1.1 released. - * Changed short name of option "--dictionary-size" to "-s". - * Changed short name of option "--match-length" to "-m". + * Changed short name of option `--dictionary-size' to `-s'. + * Changed short name of option `--match-length' to `-m'. * Changed LONG_LONG_MAX to LLONG_MAX. 2008-10-14 Antonio Diaz Diaz * Version 1.0 released. - * "-tvv" shows file version and dictionary size. + * `-tvv' shows file version and dictionary size. 2008-09-30 Antonio Diaz Diaz @@ -149,7 +171,7 @@ 2008-09-23 Antonio Diaz Diaz * Version 0.4 released. - * Code cleanup for global variable "verbosity". + * Code cleanup for global variable `verbosity'. * Regained the compression ratio of 0.2 with 5% faster speed. * Fixed compilation on sistems where size_t != unsigned int. @@ -157,8 +179,8 @@ * Version 0.3 released. * encoder.cc: Compression is now 15% faster, 1% worse. - * main.cc (main): Make "-t" option override "-c". - * main.cc (decompress): Show "done" instead of "ok" when not testing. + * main.cc (main): Make `-t' option override `-c'. + * main.cc (decompress): Show `done' instead of `ok' when not testing. * encoder.h: Use trials[] to return the list of pairs. 2008-09-09 Antonio Diaz Diaz @@ -172,7 +194,7 @@ * Version 0.1 released. -Copyright (C) 2008, 2009, 2010 Antonio Diaz Diaz. +Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and diff --git a/INSTALL b/INSTALL index f841b73..b516f57 100644 --- a/INSTALL +++ b/INSTALL @@ -1,7 +1,7 @@ Requirements ------------ You will need a C++ compiler. -I use gcc 4.3.4 and 3.3.6, but the code should compile with any +I use gcc 4.3.5 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -29,7 +29,7 @@ the main archive. 4. Optionally, type `make check' to run the tests that come with lzip. -5. Type `make install' to install the program and any data files and +5. Type `make install' to install the programs and any data files and documentation. @@ -50,7 +50,7 @@ After running `configure', you can run `make' and `make install' as explained above. -Copyright (C) 2008, 2009, 2010 Antonio Diaz Diaz. +Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Makefile.in b/Makefile.in index c366eb3..30aba74 100644 --- a/Makefile.in +++ b/Makefile.in @@ -8,6 +8,7 @@ SHELL = /bin/sh objs = arg_parser.o decoder.o encoder.o fast_encoder.o main.o recobjs = arg_parser.o decoder.o lziprecover.o +unzobjs = arg_parser.o unzcrash.o .PHONY : all install install-info install-man install-strip \ @@ -17,16 +18,16 @@ recobjs = arg_parser.o decoder.o lziprecover.o all : $(progname) lziprecover $(progname) : $(objs) - $(CXX) $(LDFLAGS) -o $(progname) $(objs) + $(CXX) $(LDFLAGS) -o $@ $(objs) $(progname)_profiled : $(objs) - $(CXX) $(LDFLAGS) -pg -o $(progname)_profiled $(objs) + $(CXX) $(LDFLAGS) -pg -o $@ $(objs) lziprecover : $(recobjs) - $(CXX) $(LDFLAGS) -o lziprecover $(recobjs) + $(CXX) $(LDFLAGS) -o $@ $(recobjs) -unzcrash : testsuite/unzcrash.cc - $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ $< +unzcrash : $(unzobjs) + $(CXX) $(LDFLAGS) -o $@ $(unzobjs) main.o : main.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< @@ -34,6 +35,9 @@ main.o : main.cc lziprecover.o : lziprecover.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< +unzcrash.o : testsuite/unzcrash.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< + %.o : %.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< @@ -44,6 +48,7 @@ encoder.o : lzip.h encoder.h fast_encoder.o : lzip.h encoder.h fast_encoder.h main.o : arg_parser.h lzip.h decoder.h encoder.h fast_encoder.h lziprecover.o : arg_parser.h lzip.h decoder.h Makefile +unzcrash.o : arg_parser.h Makefile doc : info man @@ -57,11 +62,11 @@ man : $(VPATH)/doc/$(progname).1 $(VPATH)/doc/lziprecover.1 $(VPATH)/doc/$(progname).1 : $(progname) help2man -n 'reduces the size of files' \ - -o $(VPATH)/doc/$(progname).1 ./$(progname) + -o $@ ./$(progname) $(VPATH)/doc/lziprecover.1 : lziprecover help2man -n 'recovers data from damaged lzip files' \ - -o $(VPATH)/doc/lziprecover.1 --no-info ./lziprecover + -o $@ --no-info ./lziprecover Makefile : $(VPATH)/configure $(VPATH)/Makefile.in ./config.status @@ -111,9 +116,9 @@ dist : doc $(DISTNAME)/README \ $(DISTNAME)/configure \ $(DISTNAME)/doc/$(progname).1 \ + $(DISTNAME)/doc/lziprecover.1 \ $(DISTNAME)/doc/$(pkgname).info \ $(DISTNAME)/doc/$(pkgname).texinfo \ - $(DISTNAME)/doc/lziprecover.1 \ $(DISTNAME)/testsuite/check.sh \ $(DISTNAME)/testsuite/test.txt \ $(DISTNAME)/testsuite/test_bad[1-5].lz \ @@ -127,7 +132,7 @@ dist : doc clean : -rm -f $(progname) $(progname)_profiled $(objs) - -rm -f lziprecover lziprecover.o + -rm -f lziprecover lziprecover.o unzcrash unzcrash.o distclean : clean -rm -f Makefile config.status *.tar *.tar.lz diff --git a/NEWS b/NEWS index e2a2c99..affec93 100644 --- a/NEWS +++ b/NEWS @@ -1,49 +1,22 @@ -Changes in version 1.11: +Changes in version 1.12: -The option "-0", which produces a compression speed and ratio comparable -to those of "gzip -9", has been added to lzip. +The option "-F, --recompress", which forces recompression of files whose +name already has the ".lz" or ".tlz" suffix, has been added. -Match length limit set by options -1 to -8 has been reduced to extend -range of use towards gzip. Lower numbers now compress less but faster. -(-1 now takes 43% less time for only 20% larger compressed size). +For large values of "--match-length", compression ratio has been +slightly increased and compression time has been reduced by up to 6%. -(Note that the bidimensional parameter space of LZMA can't be mapped to -a linear scale optimal for all files. If your files are large, very -repetitive, etc, you may need to use the --match-length and ---dictionary-size options directly to achieve optimal performance). +Compression time of option "-0" has been reduced by 2%. -Compression of option -9 has been slightly increased. +Print only one status line for each multimember file when only one "-v" +is specified. -The option "--merge", which tries to produce a correct file merging the -good parts of two or more damaged copies, has been added to lziprecover. +Print up to 6 bytes of trailing garbage when "-vvvv" is specified. -(To give you an idea of --merge's possibilities, when merging two copies -each of them with one damaged area affecting 1 percent of the copy, the -probability of obtaining a correct file is about 98 percent. With three -such copies the probability rises to 99.97 percent. For large files with -small errors, the probability approaches 100 percent even with only two -copies). +Do not show the message "and `--stdout' was not specified" for file +types that can't be read (directories, etc). -The option "--repair", which repairs any 1-byte error in the lzma stream -of lzip files, has been added to lziprecover. +If "--verbose" is not specified, lziprecover now only shows errors and +warnings. -Decompressor has been modified to detect file errors earlier, improving -efficiency of lziprecover's new repair capability. - -The option "--force", which forces overwriting of existing output files, -has been added to lziprecover. - -The option "--output", which sets the name of the output file, has been -added to lziprecover. - -The option "--split", which selects the until now only operation of -splitting multimember files, has been added to lziprecover. - -Lziprecover now needs the operation to be specified. Else it warns the -user and does nothing. - -A warning about fchown's return value being ignored has been fixed. - -"lzip -tvvvv" now also shows file compression ratio. - -Some new examples have been added to the manual. +Options "--bits", "--position" and "--size" has been added to unzcrash. diff --git a/README b/README index 7c158d9..d075ab3 100644 --- a/README +++ b/README @@ -8,7 +8,10 @@ and data archiving. Lziprecover is a data recovery tool for lzip compressed files able to repair slightly damaged files, recover badly damaged files from two or -more copies, and extract undamaged members from multi-member files. +more copies, and extract undamaged members from multi-member files. If +the cause of file corruption is damaged media, the combination GNU +ddrescue + lziprecover is the best option for recovering data from +multiple damaged copies. Lzip replaces every file given in the command line with a compressed version of itself, with the name "original_name.lz". Each compressed @@ -64,7 +67,7 @@ range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI and the idea of unzcrash). -Copyright (C) 2008, 2009, 2010 Antonio Diaz Diaz. +Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/arg_parser.cc b/arg_parser.cc index cc9f87d..bc4b4a3 100644 --- a/arg_parser.cc +++ b/arg_parser.cc @@ -1,5 +1,5 @@ -/* Arg_parser - A POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006, 2007, 2008, 2009, 2010 Antonio Diaz Diaz. +/* Arg_parser - POSIX/GNU command line argument parser. (C++ version) + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/arg_parser.h b/arg_parser.h index da1cc94..d1e5c02 100644 --- a/arg_parser.h +++ b/arg_parser.h @@ -1,5 +1,5 @@ -/* Arg_parser - A POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006, 2007, 2008, 2009, 2010 Antonio Diaz Diaz. +/* Arg_parser - POSIX/GNU command line argument parser. (C++ version) + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/configure b/configure index eee97dc..8b291a6 100755 --- a/configure +++ b/configure @@ -1,16 +1,14 @@ #! /bin/sh # configure script for Lzip - Data compressor based on the LZMA algorithm -# Copyright (C) 2008, 2009, 2010 Antonio Diaz Diaz. +# Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. -# -# Date of this version: 2010-09-16 args= no_create= pkgname=lzip -pkgversion=1.11 +pkgversion=1.12 progname=lzip srctrigger=lzip.h @@ -167,7 +165,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lzip - Data compressor based on the LZMA algorithm -# Copyright (C) 2008, 2009, 2010 Antonio Diaz Diaz. +# Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz. # This file was generated automatically by configure. Do not edit. # # This Makefile is free software: you have unlimited permission diff --git a/decoder.cc b/decoder.cc index 03d2ca6..dafd73d 100644 --- a/decoder.cc +++ b/decoder.cc @@ -1,5 +1,5 @@ /* Lzip - Data compressor based on the LZMA algorithm - Copyright (C) 2008, 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,6 +25,7 @@ #include #include #include +#include #include "lzip.h" #include "decoder.h" @@ -32,6 +33,61 @@ const CRC32 crc32; + +void Pretty_print::operator()( const char * const msg ) const throw() + { + if( verbosity_ >= 0 ) + { + if( first_post ) + { + first_post = false; + std::fprintf( stderr, " %s: ", name_.c_str() ); + for( unsigned int i = 0; i < longest_name - name_.size(); ++i ) + std::fprintf( stderr, " " ); + if( !msg ) std::fflush( stderr ); + } + if( msg ) std::fprintf( stderr, "%s.\n", msg ); + } + } + + +// Returns the number of bytes really read. +// If (returned value < size) and (errno == 0), means EOF was reached. +// +int readblock( const int fd, uint8_t * const buf, const int size ) throw() + { + int rest = size; + errno = 0; + while( rest > 0 ) + { + errno = 0; + const int n = read( fd, buf + size - rest, rest ); + if( n > 0 ) rest -= n; + else if( n == 0 ) break; + else if( errno != EINTR && errno != EAGAIN ) break; + } + return ( rest > 0 ) ? size - rest : size; + } + + +// Returns the number of bytes really written. +// If (returned value < size), it is always an error. +// +int writeblock( const int fd, const uint8_t * const buf, const int size ) throw() + { + int rest = size; + errno = 0; + while( rest > 0 ) + { + errno = 0; + const int n = write( fd, buf + size - rest, rest ); + if( n > 0 ) rest -= n; + else if( errno && errno != EINTR && errno != EAGAIN ) break; + } + return ( rest > 0 ) ? size - rest : size; + } + + bool Range_decoder::read_block() { if( !at_stream_end ) @@ -120,12 +176,12 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const trailer.member_size(), member_size, member_size ); } } - if( !error && pp.verbosity() >= 4 && data_position() > 0 && member_size > 0 ) + if( !error && pp.verbosity() >= 3 && data_position() > 0 && member_size > 0 ) std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", (double)data_position() / member_size, ( 8.0 * member_size ) / data_position(), 100.0 * ( 1.0 - ( (double)member_size / data_position() ) ) ); - if( !error && pp.verbosity() >= 3 ) + if( !error && pp.verbosity() >= 4 ) std::fprintf( stderr, "data CRC %08X, data size %9lld, member size %8lld. ", (unsigned int)trailer.data_crc(), trailer.data_size(), trailer.member_size() ); diff --git a/decoder.h b/decoder.h index 9fd1423..a974b33 100644 --- a/decoder.h +++ b/decoder.h @@ -1,5 +1,5 @@ /* Lzip - Data compressor based on the LZMA algorithm - Copyright (C) 2008, 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -52,7 +52,7 @@ public: uint8_t get_byte() { - if( finished() ) return 0; + if( finished() ) return 0x55; // make code != 0 return buffer[pos++]; } diff --git a/doc/lzip.1 b/doc/lzip.1 index 670d15e..b0cf9a5 100644 --- a/doc/lzip.1 +++ b/doc/lzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH LZIP "1" "September 2010" "Lzip 1.11" "User Commands" +.TH LZIP "1" "April 2011" "Lzip 1.12" "User Commands" .SH NAME Lzip \- reduces the size of files .SH SYNOPSIS @@ -27,6 +27,9 @@ decompress \fB\-f\fR, \fB\-\-force\fR overwrite existing output files .TP +\fB\-F\fR, \fB\-\-recompress\fR +force recompression of compressed files +.TP \fB\-k\fR, \fB\-\-keep\fR keep (don't delete) input files .TP @@ -69,7 +72,7 @@ Report bugs to lzip\-bug@nongnu.org .br Lzip home page: http://www.nongnu.org/lzip/lzip.html .SH COPYRIGHT -Copyright \(co 2010 Antonio Diaz Diaz. +Copyright \(co 2011 Antonio Diaz Diaz. License GPLv3+: GNU GPL version 3 or later .br This is free software: you are free to change and redistribute it. diff --git a/doc/lzip.info b/doc/lzip.info index 00cf933..fa26348 100644 --- a/doc/lzip.info +++ b/doc/lzip.info @@ -11,7 +11,7 @@ File: lzip.info, Node: Top, Next: Introduction, Up: (dir) Lzip Manual *********** -This manual is for Lzip (version 1.11, 16 September 2010). +This manual is for Lzip (version 1.12, 30 April 2011). * Menu: @@ -26,7 +26,7 @@ This manual is for Lzip (version 1.11, 16 September 2010). * Concept Index:: Index of concepts - Copyright (C) 2008, 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -70,8 +70,8 @@ multivolume compressed tar archives. or 2 times the dictionary size limit (1 if input file size is less than dictionary size limit, else 2) plus 8 times the dictionary size really used. The option `-0' is special and only requires about 1.5 MiB at -most. The amount of memory required for decompression is a little more -than the dictionary size really used. +most. The amount of memory required for decompression is only a few tens +of KiB larger than the dictionary size really used. Lzip will automatically use the smallest possible dictionary size without exceeding the given limit. Keep in mind that the decompression @@ -174,50 +174,55 @@ The format for running lzip is: Lzip supports the following options: -`--help' `-h' +`--help' Print an informative help message describing the options and exit. -`--version' `-V' +`--version' Print the version number of lzip on the standard output and exit. -`--member-size=SIZE' `-b SIZE' +`--member-size=SIZE' Produce a multimember file and set the member size limit to SIZE bytes. Minimum member size limit is 100kB. Small member size may degrade compression ratio, so use it only when needed. The default is to produce single-member files. -`--stdout' `-c' +`--stdout' Compress or decompress to standard output. Needed when reading from a named pipe (fifo) or from a device. Use it to recover as much of the uncompressed data as possible when decompressing a corrupt file. -`--decompress' `-d' +`--decompress' Decompress. -`--force' `-f' +`--force' Force overwrite of output file. -`--keep' +`-F' +`--recompress' + Force recompression of files whose name already has the `.lz' or + `.tlz' suffix. + `-k' +`--keep' Keep (don't delete) input files during compression or decompression. -`--match-length=LENGTH' `-m LENGTH' +`--match-length=LENGTH' Set the match length limit in bytes. After a match this long is found, the search is finished. Valid values range from 5 to 273. Larger values usually give better compression ratios but longer compression times. -`--output=FILE' `-o FILE' +`--output=FILE' When reading from standard input and `--stdout' has not been specified, use `FILE' as the virtual name of the uncompressed file. This produces a file named `FILE' when decompressing, a file @@ -225,25 +230,26 @@ The format for running lzip is: `FILE00001.lz', `FILE00002.lz', etc, when compressing and splitting the output in volumes. -`--quiet' `-q' +`--quiet' Quiet operation. Suppress all messages. -`--dictionary-size=SIZE' `-s SIZE' +`--dictionary-size=SIZE' Set the dictionary size limit in bytes. Valid values range from 4KiB to 512MiB. Lzip will use the smallest possible dictionary size for each member without exceeding this limit. Note that dictionary sizes are quantized. If the specified size does not - match one of the valid sizes, it will be rounded upwards. + match one of the valid sizes, it will be rounded upwards by adding + up to (SIZE / 16) to it. For maximum compression you should use a dictionary size limit as large as possible, but keep in mind that the decompression memory requirement is affected at compression time by the choice of dictionary size limit. -`--volume-size=SIZE' `-S SIZE' +`--volume-size=SIZE' Split the compressed output into several volume files with names `original_name00001.lz', `original_name00002.lz', etc, and set the volume size limit to SIZE bytes. Each volume is a complete, maybe @@ -251,17 +257,20 @@ The format for running lzip is: volume size may degrade compression ratio, so use it only when needed. -`--test' `-t' +`--test' Check integrity of the specified file(s), but don't decompress them. This really performs a trial decompression and throws away the result. Use it together with `-v' to see information about the file. -`--verbose' `-v' - Verbose mode. Show the compression ratio for each file processed. - Further -v's increase the verbosity level. +`--verbose' + Verbose mode. When compressing, show the compression ratio for + each file processed. When decompressing or testing, further -v's + (up to 4) increase the verbosity level, showing status, dictionary + size, compression ratio, trailer contents (CRC, data size, member + size), and up to 6 bytes of trailing garbage (if any). `-0 .. -9' Set the compression parameters (dictionary size and match length @@ -343,8 +352,8 @@ additional information before, between, or after them. `VN (version number, 1 byte)' Just in case something needs to be modified in the future. Valid - values are 0 and 1. Version 0 files have only one member and lack - `Member size'. + values are 0 and 1. Version 0 files are deprecated. They can + contain only one member and lack the `Member size' field. `DS (coded dictionary size, 1 byte)' Bits 4-0 contain the base 2 logarithm of the base dictionary size. @@ -449,7 +458,7 @@ the GNU ddrescue manual for details about ddrescue) mount -t iso9660 -o loop,ro cdimage2 /mnt/cdimage cp /mnt/cdimage/backup.tar.lz rescued2.tar.lz umount /mnt/cdimage - lziprecover -m -o rescued.tar.lz rescued1.tar.lz rescued2.tar.lz + lziprecover -m -v -o rescued.tar.lz rescued1.tar.lz rescued2.tar.lz Example 11: Recover the first volume of those created in example 9 from @@ -467,7 +476,7 @@ produced and compared. lzip -t rec*big_db2_00001.lz rec00012big_db2_00001.lz: crc mismatch rec00018big_db2_00001.lz: crc mismatch - lziprecover -m rec00012big_db1_00001.lz rec00012big_db2_00001.lz + lziprecover -m -v rec00012big_db1_00001.lz rec00012big_db2_00001.lz Input files merged successfully cp rec00007big_db2_00001.lz rec00007big_db1_00001.lz cp rec00012big_db1_00001_fixed.lz rec00012big_db1_00001.lz @@ -515,21 +524,21 @@ The format for running lziprecover is: Lziprecover supports the following options: -`--help' `-h' +`--help' Print an informative help message describing the options and exit. -`--version' `-V' +`--version' Print the version number of lziprecover on the standard output and exit. -`--force' `-f' +`--force' Force overwrite of output file. -`--merge' `-m' +`--merge' Try to produce a correct file merging the good parts of two or more damaged copies. The copies must be single-member files. The merge will fail if the copies have too many damaged areas or if the same @@ -543,25 +552,25 @@ The format for running lziprecover is: percent. For large files with small errors, the probability approaches 100 percent even with only two copies. -`--output=FILE' `-o FILE' +`--output=FILE' Place the output into `FILE' instead of into `FILE_fixed.lz'. If splitting, the names of the files produced are in the form `rec00001FILE', etc. -`--quiet' `-q' +`--quiet' Quiet operation. Suppress all messages. -`--repair' `-R' +`--repair' Try to repair a small error, affecting only one byte, in a single-member FILE. If successful, a repaired copy is written to the file `FILE_fixed.lz'. `FILE' is not modified at all. -`--split' `-s' +`--split' Search for members in `FILE' and write each member in its own `.lz' file. You can then use `lzip -t' to test the integrity of the resulting files, decompress those which are undamaged, and try @@ -573,8 +582,8 @@ The format for running lziprecover is: `lzip -cd rec*FILE.lz > recovered_data', processes the files in the correct order. -`--verbose' `-v' +`--verbose' Verbose mode. Further -v's increase the verbosity level. @@ -619,15 +628,15 @@ Concept Index  Tag Table: Node: Top224 -Node: Introduction1029 -Node: Algorithm4421 -Node: Invoking Lzip6939 -Node: File Format11911 -Node: Examples13865 -Ref: ddrescue-example15619 -Node: Lziprecover17412 -Node: Invoking Lziprecover18465 -Node: Problems20826 -Node: Concept Index21360 +Node: Introduction1031 +Node: Algorithm4439 +Node: Invoking Lzip6957 +Node: File Format12303 +Node: Examples14295 +Ref: ddrescue-example16049 +Node: Lziprecover17848 +Node: Invoking Lziprecover18901 +Node: Problems21262 +Node: Concept Index21796  End Tag Table diff --git a/doc/lzip.texinfo b/doc/lzip.texinfo index 5c62d2f..b86de34 100644 --- a/doc/lzip.texinfo +++ b/doc/lzip.texinfo @@ -5,8 +5,8 @@ @finalout @c %**end of header -@set UPDATED 16 September 2010 -@set VERSION 1.11 +@set UPDATED 30 April 2011 +@set VERSION 1.12 @dircategory Data Compression @direntry @@ -14,6 +14,7 @@ @end direntry +@ifnothtml @titlepage @title Lzip @subtitle Data compressor based on the LZMA algorithm @@ -24,7 +25,6 @@ @vskip 0pt plus 1filll @end titlepage -@ifnothtml @contents @end ifnothtml @@ -46,7 +46,7 @@ This manual is for Lzip (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2008, 2009, 2010 Antonio Diaz Diaz. +Copyright @copyright{} 2008, 2009, 2010, 2011 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -89,8 +89,8 @@ The amount of memory required for compression is about 5 MiB plus 1 or 2 times the dictionary size limit (1 if input file size is less than dictionary size limit, else 2) plus 8 times the dictionary size really used. The option @samp{-0} is special and only requires about 1.5 MiB at -most. The amount of memory required for decompression is a little more -than the dictionary size really used. +most. The amount of memory required for decompression is only a few tens +of KiB larger than the dictionary size really used. Lzip will automatically use the smallest possible dictionary size without exceeding the given limit. Keep in mind that the decompression @@ -200,47 +200,52 @@ lzip [@var{options}] [@var{files}] Lzip supports the following options: @table @samp -@item --help -@itemx -h +@item -h +@itemx --help Print an informative help message describing the options and exit. -@item --version -@itemx -V +@item -V +@itemx --version Print the version number of lzip on the standard output and exit. -@item --member-size=@var{size} -@itemx -b @var{size} +@item -b @var{size} +@itemx --member-size=@var{size} Produce a multimember file and set the member size limit to @var{size} bytes. Minimum member size limit is 100kB. Small member size may degrade compression ratio, so use it only when needed. The default is to produce single-member files. -@item --stdout -@itemx -c +@item -c +@itemx --stdout Compress or decompress to standard output. Needed when reading from a named pipe (fifo) or from a device. Use it to recover as much of the uncompressed data as possible when decompressing a corrupt file. -@item --decompress -@itemx -d +@item -d +@itemx --decompress Decompress. -@item --force -@itemx -f +@item -f +@itemx --force Force overwrite of output file. -@item --keep -@itemx -k +@item -F +@itemx --recompress +Force recompression of files whose name already has the @samp{.lz} or +@samp{.tlz} suffix. + +@item -k +@itemx --keep Keep (don't delete) input files during compression or decompression. -@item --match-length=@var{length} -@itemx -m @var{length} +@item -m @var{length} +@itemx --match-length=@var{length} Set the match length limit in bytes. After a match this long is found, the search is finished. Valid values range from 5 to 273. Larger values usually give better compression ratios but longer compression times. -@item --output=@var{file} -@itemx -o @var{file} +@item -o @var{file} +@itemx --output=@var{file} When reading from standard input and @samp{--stdout} has not been specified, use @samp{@var{file}} as the virtual name of the uncompressed file. This produces a file named @samp{@var{file}} when decompressing, a @@ -248,40 +253,44 @@ file named @samp{@var{file}.lz} when compressing, and several files named @samp{@var{file}00001.lz}, @samp{@var{file}00002.lz}, etc, when compressing and splitting the output in volumes. -@item --quiet -@itemx -q +@item -q +@itemx --quiet Quiet operation. Suppress all messages. -@item --dictionary-size=@var{size} -@itemx -s @var{size} +@item -s @var{size} +@itemx --dictionary-size=@var{size} Set the dictionary size limit in bytes. Valid values range from 4KiB to 512MiB. Lzip will use the smallest possible dictionary size for each member without exceeding this limit. Note that dictionary sizes are quantized. If the specified size does not match one of the valid sizes, -it will be rounded upwards. +it will be rounded upwards by adding up to (@var{size} / 16) to it. For maximum compression you should use a dictionary size limit as large as possible, but keep in mind that the decompression memory requirement is affected at compression time by the choice of dictionary size limit. -@item --volume-size=@var{size} -@itemx -S @var{size} +@item -S @var{size} +@itemx --volume-size=@var{size} Split the compressed output into several volume files with names @samp{original_name00001.lz}, @samp{original_name00002.lz}, etc, and set the volume size limit to @var{size} bytes. Each volume is a complete, maybe multimember, lzip file. Minimum volume size limit is 100kB. Small volume size may degrade compression ratio, so use it only when needed. -@item --test -@itemx -t +@item -t +@itemx --test Check integrity of the specified file(s), but don't decompress them. This really performs a trial decompression and throws away the result. Use it together with @samp{-v} to see information about the file. -@item --verbose -@itemx -v -Verbose mode. Show the compression ratio for each file processed. -Further -v's increase the verbosity level. +@item -v +@itemx --verbose +Verbose mode. +When compressing, show the compression ratio for each file processed. +When decompressing or testing, further -v's (up to 4) increase the +verbosity level, showing status, dictionary size, compression ratio, +trailer contents (CRC, data size, member size), and up to 6 bytes of +trailing garbage (if any). @item -0 .. -9 Set the compression parameters (dictionary size and match length limit) @@ -373,8 +382,8 @@ A four byte string, identifying the lzip format, with the value "LZIP". @item VN (version number, 1 byte) Just in case something needs to be modified in the future. Valid values -are 0 and 1. Version 0 files have only one member and lack @samp{Member -size}. +are 0 and 1. Version 0 files are deprecated. They can contain only one +member and lack the @samp{Member size} field. @item DS (coded dictionary size, 1 byte) Bits 4-0 contain the base 2 logarithm of the base dictionary size.@* @@ -508,7 +517,7 @@ ddrescue -b2048 /dev/cdrom cdimage2 logfile2 mount -t iso9660 -o loop,ro cdimage2 /mnt/cdimage cp /mnt/cdimage/backup.tar.lz rescued2.tar.lz umount /mnt/cdimage -lziprecover -m -o rescued.tar.lz rescued1.tar.lz rescued2.tar.lz +lziprecover -m -v -o rescued.tar.lz rescued1.tar.lz rescued2.tar.lz @end example @sp 1 @@ -529,7 +538,7 @@ lzip -t rec*big_db1_00001.lz lzip -t rec*big_db2_00001.lz rec00012big_db2_00001.lz: crc mismatch rec00018big_db2_00001.lz: crc mismatch -lziprecover -m rec00012big_db1_00001.lz rec00012big_db2_00001.lz +lziprecover -m -v rec00012big_db1_00001.lz rec00012big_db2_00001.lz Input files merged successfully cp rec00007big_db2_00001.lz rec00007big_db1_00001.lz cp rec00012big_db1_00001_fixed.lz rec00012big_db1_00001.lz @@ -567,6 +576,7 @@ If the cause of file corruption is damaged media, the combination GNU ddrescue + lziprecover is the best option for recovering data from multiple damaged copies. @xref{ddrescue-example}, for an example. + @node Invoking Lziprecover @chapter Invoking Lziprecover @cindex invoking lziprecover @@ -580,20 +590,20 @@ lziprecover [@var{options}] [@var{files}] Lziprecover supports the following options: @table @samp -@item --help -@itemx -h +@item -h +@itemx --help Print an informative help message describing the options and exit. -@item --version -@itemx -V +@item -V +@itemx --version Print the version number of lziprecover on the standard output and exit. -@item --force -@itemx -f +@item -f +@itemx --force Force overwrite of output file. -@item --merge -@itemx -m +@item -m +@itemx --merge Try to produce a correct file merging the good parts of two or more damaged copies. The copies must be single-member files. The merge will fail if the copies have too many damaged areas or if the same byte is @@ -607,26 +617,26 @@ such copies the probability rises to 99.97 percent. For large files with small errors, the probability approaches 100 percent even with only two copies. -@item --output=@var{file} -@itemx -o @var{file} +@item -o @var{file} +@itemx --output=@var{file} Place the output into @samp{@var{file}} instead of into @samp{@var{file}_fixed.lz}. If splitting, the names of the files produced are in the form @samp{rec00001@var{file}}, etc. -@item --quiet -@itemx -q +@item -q +@itemx --quiet Quiet operation. Suppress all messages. -@item --repair -@itemx -R +@item -R +@itemx --repair Try to repair a small error, affecting only one byte, in a single-member @var{file}. If successful, a repaired copy is written to the file @samp{@var{file}_fixed.lz}. @samp{@var{file}} is not modified at all. -@item --split -@itemx -s +@item -s +@itemx --split Search for members in @samp{@var{file}} and write each member in its own @samp{.lz} file. You can then use @samp{lzip -t} to test the integrity of the resulting files, decompress those which are undamaged, and try to @@ -638,8 +648,8 @@ designed so that the use of wildcards in subsequent processing, for example, @w{@samp{lzip -cd rec*@var{file}.lz > recovered_data}}, processes the files in the correct order. -@item --verbose -@itemx -v +@item -v +@itemx --verbose Verbose mode. Further -v's increase the verbosity level. @end table diff --git a/doc/lziprecover.1 b/doc/lziprecover.1 index 95ddb29..ff8e0ed 100644 --- a/doc/lziprecover.1 +++ b/doc/lziprecover.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH LZIPRECOVER "1" "September 2010" "Lziprecover 1.11" "User Commands" +.TH LZIPRECOVER "1" "April 2011" "Lziprecover 1.12" "User Commands" .SH NAME Lziprecover \- recovers data from damaged lzip files .SH SYNOPSIS @@ -40,7 +40,7 @@ Report bugs to lzip\-bug@nongnu.org .br Lzip home page: http://www.nongnu.org/lzip/lzip.html .SH COPYRIGHT -Copyright \(co 2010 Antonio Diaz Diaz. +Copyright \(co 2011 Antonio Diaz Diaz. License GPLv3+: GNU GPL version 3 or later .br This is free software: you are free to change and redistribute it. diff --git a/encoder.cc b/encoder.cc index e10142a..1bdf78e 100644 --- a/encoder.cc +++ b/encoder.cc @@ -1,5 +1,5 @@ /* Lzip - Data compressor based on the LZMA algorithm - Copyright (C) 2008, 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -357,22 +357,23 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances], return 1; } - { - const int normal_match_price = match_price + price0( bm_rep[state()] ); - int len = min_match_len; if( main_len <= replens[rep_index] ) { main_len = replens[rep_index]; - for( ; len <= main_len; ++len ) trials[len].price = infinite_price; + for( int len = min_match_len; len <= main_len; ++len ) + trials[len].price = infinite_price; } - else for( ; len <= main_len; ++len ) + else { - trials[len].dis = match_distances[len] + num_rep_distances; - trials[len].prev_index = 0; - trials[len].price = normal_match_price + - price_pair( match_distances[len], len, pos_state ); + const int normal_match_price = match_price + price0( bm_rep[state()] ); + for( int len = min_match_len; len <= main_len; ++len ) + { + trials[len].dis = match_distances[len] + num_rep_distances; + trials[len].prev_index = 0; + trials[len].price = normal_match_price + + price_pair( match_distances[len], len, pos_state ); + } } - } for( int rep = 0; rep < num_rep_distances; ++rep ) { @@ -478,10 +479,25 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances], while( num_trials < cur + newlen ) trials[++num_trials].price = infinite_price; - for( int len = min_match_len; len <= newlen; ++len ) - trials[cur+len].update( match_distances[len] + num_rep_distances, cur, - normal_match_price + - price_pair( match_distances[len], len, pos_state ) ); + int dis = match_distances[min_match_len]; + int dis_state = get_dis_state( min_match_len ); + int dis_price = infinite_price; + if( dis < modeled_distances ) + trials[cur+min_match_len].update( dis + num_rep_distances, cur, + normal_match_price + dis_prices[dis_state][dis] + + len_encoder.price( min_match_len, pos_state ) ); + for( int len = min_match_len + 1; len <= newlen; ++len ) + { + if( dis != match_distances[len] || dis_state < max_dis_states - 1 ) + { + dis = match_distances[len]; + dis_state = get_dis_state( len ); + dis_price = price_dis( dis, dis_state ); + } + trials[cur+len].update( dis + num_rep_distances, cur, + normal_match_price + dis_price + + len_encoder.price( len, pos_state ) ); + } } } } diff --git a/encoder.h b/encoder.h index cea753e..314bdcc 100644 --- a/encoder.h +++ b/encoder.h @@ -1,5 +1,5 @@ /* Lzip - Data compressor based on the LZMA algorithm - Copyright (C) 2008, 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -247,12 +247,12 @@ public: ~Range_encoder() { delete[] buffer; } - void flush() { for( int i = 0; i < 5; ++i ) shift_low(); } - void flush_data(); - long long member_position() const throw() { return partial_member_pos + pos + ff_count; } + void flush() { for( int i = 0; i < 5; ++i ) shift_low(); } + void flush_data(); + void put_byte( const uint8_t b ) { buffer[pos] = b; @@ -359,7 +359,9 @@ class Len_encoder pps[len] = tmp + price0( choice2 ) + price_symbol( bm_mid[pos_state], len - len_low_symbols, len_mid_bits ); for( ; len < len_symbols; ++len ) - pps[len] = tmp + price1( choice2 ) + + // using 4 slots per value makes "price" faster + prices[3][len] = prices[2][len] = prices[1][len] = prices[0][len] = + tmp + price1( choice2 ) + price_symbol( bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); counters[pos_state] = len_symbols; } @@ -383,7 +385,7 @@ class Literal_encoder { Bit_model bm_literal[1<> ( 8 - literal_context_bits ) ); } public: @@ -420,9 +422,7 @@ class LZ_encoder int price; // dual use var; cumulative price, match length int reps[num_rep_distances]; void update( const int d, const int p_i, const int pr ) throw() - { - if( pr < price ) { dis = d; prev_index = p_i; price = pr; } - } + { if( pr < price ) { dis = d; prev_index = p_i; price = pr; } } }; int longest_match_found; @@ -495,18 +495,21 @@ class LZ_encoder return price; } + int price_dis( const int dis, const int dis_state ) const throw() + { + if( dis < modeled_distances ) + return dis_prices[dis_state][dis]; + else + return dis_slot_prices[dis_state][dis_slots[dis]] + + align_prices[dis & (dis_align_size - 1)]; + } + int price_pair( const int dis, const int len, const int pos_state ) const throw() { if( len <= min_match_len && dis >= modeled_distances ) return infinite_price; - int price = len_encoder.price( len, pos_state ); - const int dis_state = get_dis_state( len ); - if( dis < modeled_distances ) - price += dis_prices[dis_state][dis]; - else - price += dis_slot_prices[dis_state][dis_slots[dis]] + - align_prices[dis & (dis_align_size - 1)]; - return price; + return len_encoder.price( len, pos_state ) + + price_dis( dis, get_dis_state( len ) ); } void encode_pair( const uint32_t dis, const int len, const int pos_state ) throw() diff --git a/fast_encoder.cc b/fast_encoder.cc index f4becde..4574379 100644 --- a/fast_encoder.cc +++ b/fast_encoder.cc @@ -1,5 +1,5 @@ /* Lzip - Data compressor based on the LZMA algorithm - Copyright (C) 2008, 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -126,9 +126,6 @@ int Fmatchfinder::longest_match_len( int * const distance ) throw() if( len_limit < 4 ) return 0; } - int maxlen = min_match_len - 1; - const int min_pos = (pos >= dictionary_size_) ? - (pos - dictionary_size_ + 1) : 0; const uint8_t * const data = buffer + pos; key4 = ( ( key4 << 4 ) ^ data[3] ) & ( num_prev_positions - 1 ); @@ -136,13 +133,16 @@ int Fmatchfinder::longest_match_len( int * const distance ) throw() prev_positions[key4] = pos; int32_t * ptr0 = prev_pos_chain + cyclic_pos; + int maxlen = 0; for( int count = 4; ; ) { - if( newpos < min_pos || --count < 0 ) { *ptr0 = -1; break; } + if( newpos < (pos - dictionary_size_ + 1) || newpos < 0 || --count < 0 ) + { *ptr0 = -1; break; } const uint8_t * const newdata = buffer + newpos; int len = 0; - while( len < len_limit && newdata[len] == data[len] ) ++len; + if( newdata[maxlen] == data[maxlen] ) + while( len < len_limit && newdata[len] == data[len] ) ++len; const int delta = pos - newpos; if( maxlen < len ) { maxlen = len; *distance = delta - 1; } @@ -176,8 +176,6 @@ void Fmatchfinder::longest_match_len() throw() if( len_limit < 4 ) return; } - const int min_pos = (pos >= dictionary_size_) ? - (pos - dictionary_size_ + 1) : 0; const uint8_t * const data = buffer + pos; key4 = ( ( key4 << 4 ) ^ data[3] ) & ( num_prev_positions - 1 ); @@ -186,7 +184,7 @@ void Fmatchfinder::longest_match_len() throw() int32_t * const ptr0 = prev_pos_chain + cyclic_pos; - if( newpos < min_pos ) *ptr0 = -1; + if( newpos < (pos - dictionary_size_ + 1) || newpos < 0 ) *ptr0 = -1; else { const uint8_t * const newdata = buffer + newpos; @@ -194,8 +192,7 @@ void Fmatchfinder::longest_match_len() throw() std::memcmp( newdata, data, len_limit - 1 ) ) *ptr0 = newpos; else { - const int delta = pos - newpos; - int idx = cyclic_pos - delta; + int idx = cyclic_pos - pos + newpos; if( idx < 0 ) idx += dictionary_size_; *ptr0 = prev_pos_chain[idx]; } @@ -218,7 +215,7 @@ int FLZ_encoder::sequence_optimizer( const int reps[num_rep_distances], const int len = fmatchfinder.true_match_len( 0, reps[i] + 1, max_match_len ); if( len > replen ) { replen = len; rep_index = i; } } - if( replen > min_match_len ) + if( replen > min_match_len && replen + 4 > main_len ) { *disp = rep_index; move_pos( replen, true ); @@ -248,7 +245,8 @@ int FLZ_encoder::sequence_optimizer( const int reps[num_rep_distances], price += literal_encoder.price_matched( prev_byte, cur_byte, match_byte ); const int short_rep_price = price1( bm_match[state()][pos_state] ) + price1( bm_rep[state()] ) + - price_rep_len1( state, pos_state ); + price0( bm_rep0[state()] ) + + price0( bm_len[state()][pos_state] ); if( short_rep_price < price ) *disp = 0; } diff --git a/fast_encoder.h b/fast_encoder.h index 4e817d8..188eb92 100644 --- a/fast_encoder.h +++ b/fast_encoder.h @@ -1,5 +1,5 @@ /* Lzip - Data compressor based on the LZMA algorithm - Copyright (C) 2008, 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -115,11 +115,6 @@ class FLZ_encoder } } - int price_rep_len1( const State & state, const int pos_state ) const throw() - { - return price0( bm_rep0[state()] ) + price0( bm_len[state()][pos_state] ); - } - void encode_pair( const uint32_t dis, const int len, const int pos_state ) throw() { len_encoder.encode( range_encoder, len, pos_state ); diff --git a/lzip.h b/lzip.h index 77ba0e8..a580b3e 100644 --- a/lzip.h +++ b/lzip.h @@ -1,5 +1,5 @@ /* Lzip - Data compressor based on the LZMA algorithm - Copyright (C) 2008, 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -54,6 +54,7 @@ public: } }; + enum { min_dictionary_bits = 12, min_dictionary_size = 1 << min_dictionary_bits, @@ -280,5 +281,7 @@ struct Error void show_error( const char * const msg, const int errcode = 0, const bool help = false ) throw(); void internal_error( const char * const msg ); + +// defined in decoder.cc int readblock( const int fd, uint8_t * const buf, const int size ) throw(); int writeblock( const int fd, const uint8_t * const buf, const int size ) throw(); diff --git a/lziprecover.cc b/lziprecover.cc index 3f1b130..8aded3f 100644 --- a/lziprecover.cc +++ b/lziprecover.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzip compressed files - Copyright (C) 2008, 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Return values: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid input file, 3 for an internal consistency error - (eg, bug) which caused lzip to panic. + (eg, bug) which caused lziprecover to panic. */ #define _FILE_OFFSET_BITS 64 @@ -63,10 +63,10 @@ namespace { +const char * const Program_name = "Lziprecover"; +const char * const program_name = "lziprecover"; +const char * const program_year = "2011"; const char * invocation_name = 0; -const char * const Program_name = "Lziprecover"; -const char * const program_name = "lziprecover"; -const char * const program_year = "2010"; #ifdef O_BINARY const int o_binary = O_BINARY; @@ -190,8 +190,8 @@ bool verify_header( const File_header & header ) if( header.version() != 1 ) { if( verbosity >= 0 ) - std::fprintf( stderr, "Version %d member format not supported, newer %s needed.\n", - header.version(), program_name ); + std::fprintf( stderr, "Version %d member format not supported.\n", + header.version() ); return false; } return true; @@ -411,7 +411,7 @@ int merge_files( const std::vector< std::string > & filenames, for( unsigned int i = 0; i < filenames.size(); ++i ) if( try_decompress( infd_vector[i], isize ) ) { - if( verbosity >= 0 ) + if( verbosity >= 1 ) std::printf( "File `%s' has no errors. Recovery is not needed.\n", filenames[i].c_str() ); return 0; @@ -456,7 +456,7 @@ int merge_files( const std::vector< std::string > & filenames, bool done = false; for( int var = 1; var <= variations; ++var ) { - if( verbosity >= 0 ) + if( verbosity >= 1 ) { std::printf( "Trying variation %d of %d \r", var, variations ); std::fflush( stdout ); @@ -477,13 +477,13 @@ int merge_files( const std::vector< std::string > & filenames, { done = true; break; } if( var % base_variations == 0 ) block_vector[0].shift( block_vector[1] ); } - if( verbosity >= 0 ) std::printf( "\n" ); + if( verbosity >= 1 ) std::printf( "\n" ); if( close( outfd ) != 0 ) { show_error( "Error closing output file", errno ); return 1; } if( done ) { - if( verbosity >= 0 ) + if( verbosity >= 1 ) std::printf( "Input files merged successfully.\n" ); return 0; } @@ -512,7 +512,7 @@ int repair_file( const std::string & input_filename, long long failure_pos = 0; if( try_decompress( infd, isize, &failure_pos ) ) { - if( verbosity >= 0 ) + if( verbosity >= 1 ) std::printf( "Input file has no errors. Recovery is not needed.\n" ); return 0; } @@ -531,7 +531,7 @@ int repair_file( const std::string & input_filename, bool done = false; for( long long pos = failure_pos; pos >= min_pos; --pos ) { - if( verbosity >= 0 ) + if( verbosity >= 1 ) { std::printf( "Trying position %lld \r", pos ); std::fflush( stdout ); @@ -556,13 +556,13 @@ int repair_file( const std::string & input_filename, writeblock( outfd, &byte, 1 ) != 1 ) { show_error( "Error writing output file", errno ); return 1; } } - if( verbosity >= 0 ) std::printf( "\n" ); + if( verbosity >= 1 ) std::printf( "\n" ); if( close( outfd ) != 0 ) { show_error( "Error closing output file", errno ); return 1; } if( done ) { - if( verbosity >= 0 ) + if( verbosity >= 1 ) std::printf( "Copy of input file repaired successfully.\n" ); return 0; } @@ -683,23 +683,6 @@ int split_file( const std::string & input_filename, } // end namespace -void Pretty_print::operator()( const char * const msg ) const throw() - { - if( verbosity_ >= 0 ) - { - if( first_post ) - { - first_post = false; - std::fprintf( stderr, " %s: ", name_.c_str() ); - for( unsigned int i = 0; i < longest_name - name_.size(); ++i ) - std::fprintf( stderr, " " ); - if( !msg ) std::fflush( stderr ); - } - if( msg ) std::fprintf( stderr, "%s.\n", msg ); - } - } - - void show_error( const char * const msg, const int errcode, const bool help ) throw() { if( verbosity >= 0 ) @@ -720,48 +703,12 @@ void show_error( const char * const msg, const int errcode, const bool help ) th void internal_error( const char * const msg ) { - std::fprintf( stderr, "%s: internal error: %s.\n", program_name, msg ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: internal error: %s.\n", program_name, msg ); std::exit( 3 ); } -// Returns the number of bytes really read. -// If (returned value < size) and (errno == 0), means EOF was reached. -// -int readblock( const int fd, uint8_t * const buf, const int size ) throw() - { - int rest = size; - errno = 0; - while( rest > 0 ) - { - errno = 0; - const int n = read( fd, buf + size - rest, rest ); - if( n > 0 ) rest -= n; - else if( n == 0 ) break; - else if( errno != EINTR && errno != EAGAIN ) break; - } - return ( rest > 0 ) ? size - rest : size; - } - - -// Returns the number of bytes really written. -// If (returned value < size), it is always an error. -// -int writeblock( const int fd, const uint8_t * const buf, const int size ) throw() - { - int rest = size; - errno = 0; - while( rest > 0 ) - { - errno = 0; - const int n = write( fd, buf + size - rest, rest ); - if( n > 0 ) rest -= n; - else if( errno && errno != EINTR && errno != EAGAIN ) break; - } - return ( rest > 0 ) ? size - rest : size; - } - - int main( const int argc, const char * const argv[] ) { enum Mode diff --git a/main.cc b/main.cc index 193f4ac..c23b3bc 100644 --- a/main.cc +++ b/main.cc @@ -1,5 +1,5 @@ /* Lzip - Data compressor based on the LZMA algorithm - Copyright (C) 2008, 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -75,10 +75,10 @@ namespace { +const char * const Program_name = "Lzip"; +const char * const program_name = "lzip"; +const char * const program_year = "2011"; const char * invocation_name = 0; -const char * const Program_name = "Lzip"; -const char * const program_name = "lzip"; -const char * const program_year = "2010"; #ifdef O_BINARY const int o_binary = O_BINARY; @@ -117,6 +117,7 @@ void show_help() throw() std::printf( " -c, --stdout send output to standard output\n" ); std::printf( " -d, --decompress decompress\n" ); std::printf( " -f, --force overwrite existing output files\n" ); + std::printf( " -F, --recompress force recompression of compressed files\n" ); std::printf( " -k, --keep keep (don't delete) input files\n" ); std::printf( " -m, --match-length= set match length limit in bytes [36]\n" ); std::printf( " -o, --output= if reading stdin, place the output into \n" ); @@ -151,10 +152,9 @@ const char * format_num( long long num ) throw() { const char * const prefix[8] = { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; - enum { buf_size = 16 }; + enum { buf_size = 16, factor = 1024 }; static char buf[buf_size]; const char *p = ""; - const int factor = 1024; for( int i = 0; i < 8 && ( llabs( num ) > 9999 || ( llabs( num ) >= factor && num % factor == 0 ) ); ++i ) @@ -164,7 +164,7 @@ const char * format_num( long long num ) throw() } -long long getnum( const char * const ptr, const int bs = 0, +long long getnum( const char * const ptr, const long long llimit = LLONG_MIN + 1, const long long ulimit = LLONG_MAX ) throw() { @@ -185,9 +185,6 @@ long long getnum( const char * const ptr, const int bs = 0, switch( tail[0] ) { case ' ': break; - case 'b': if( bs > 0 ) { factor = bs; exponent = 1; } - else bad_multiplier = true; - break; case 'Y': exponent = 8; break; case 'Z': exponent = 7; break; case 'E': exponent = 6; break; @@ -229,7 +226,7 @@ int get_dict_size( const char * const arg ) throw() if( bits >= min_dictionary_bits && bits <= max_dictionary_bits && *tail == 0 ) return ( 1 << bits ); - return getnum( arg, 0, min_dictionary_size, max_dictionary_size ); + return getnum( arg, min_dictionary_size, max_dictionary_size ); } @@ -248,10 +245,10 @@ int extension_index( const std::string & name ) throw() int open_instream( const std::string & name, struct stat * const in_statsp, const Mode program_mode, const int eindex, - const bool force, const bool to_stdout ) throw() + const bool recompress, const bool to_stdout ) throw() { int infd = -1; - if( program_mode == m_compress && !force && eindex >= 0 ) + if( program_mode == m_compress && !recompress && eindex >= 0 ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Input file `%s' already has `%s' suffix.\n", @@ -271,14 +268,16 @@ int open_instream( const std::string & name, struct stat * const in_statsp, { const int i = fstat( infd, in_statsp ); const mode_t & mode = in_statsp->st_mode; - if( i < 0 || !( S_ISREG( mode ) || ( to_stdout && - ( S_ISFIFO( mode ) || S_ISSOCK( mode ) || - S_ISBLK( mode ) || S_ISCHR( mode ) ) ) ) ) + const bool can_read = ( i == 0 && + ( S_ISBLK( mode ) || S_ISCHR( mode ) || + S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); + if( i != 0 || ( !S_ISREG( mode ) && ( !to_stdout || !can_read ) ) ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Input file `%s' is not a regular file%s.\n", program_name, name.c_str(), - to_stdout ? "" : " and `--stdout' was not specified" ); + ( can_read && !to_stdout ) ? + " and `--stdout' was not specified" : "" ); close( infd ); infd = -1; } @@ -309,7 +308,7 @@ void set_d_outname( const std::string & name, const int i ) throw() } } output_filename = name; output_filename += ".out"; - if( verbosity >= 0 ) + if( verbosity >= 1 ) std::fprintf( stderr, "%s: Can't guess original name for `%s' -- using `%s'.\n", program_name, name.c_str(), output_filename.c_str() ); } @@ -358,9 +357,9 @@ void cleanup_and_fail( const int retval ) throw() delete_output_on_interrupt = false; if( verbosity >= 0 ) std::fprintf( stderr, "%s: Deleting output file `%s', if it exists.\n", - program_name, output_filename.c_str() ); + program_name, output_filename.c_str() ); if( outfd >= 0 ) { close( outfd ); outfd = -1; } - if( std::remove( output_filename.c_str() ) != 0 ) + if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT ) show_error( "WARNING: deletion of output file (apparently) failed." ); } std::exit( retval ); @@ -373,9 +372,10 @@ void close_and_set_permissions( const struct stat * const in_statsp ) bool error = false; if( in_statsp ) { - if( fchmod( outfd, in_statsp->st_mode ) != 0 || - ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 && - errno != EPERM ) ) error = true; + if( ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 && + errno != EPERM ) || + fchmod( outfd, in_statsp->st_mode ) != 0 ) + error = true; // fchown will in many cases return with EPERM, which can be safely ignored. } if( close( outfd ) == 0 ) outfd = -1; @@ -541,6 +541,42 @@ int fcompress( const long long member_size, const long long volume_size, } +unsigned char xdigit( const int value ) throw() + { + if( value >= 0 && value <= 9 ) return '0' + value; + if( value >= 10 && value <= 15 ) return 'A' + value - 10; + return 0; + } + + +void show_trailing_garbage( const uint8_t * const data, const int size, + const Pretty_print & pp, const bool all ) throw() + { + std::string garbage_msg; + if( !all ) garbage_msg = "first bytes of "; + garbage_msg += "trailing garbage found = "; + bool text = true; + for( int i = 0; i < size; ++i ) + if( !std::isprint( data[i] ) ) { text = false; break; } + if( text ) + { + garbage_msg += '`'; + garbage_msg.append( (const char *)data, size ); + garbage_msg += '\''; + } + else + { + for( int i = 0; i < size; ++i ) + { + if( i > 0 ) garbage_msg += ' '; + garbage_msg += xdigit( data[i] >> 4 ); + garbage_msg += xdigit( data[i] & 0x0F ); + } + } + pp( garbage_msg.c_str() ); + } + + int decompress( const int infd, const Pretty_print & pp, const bool testing ) { int retval = 0; @@ -551,19 +587,25 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing ) for( bool first_member = true; ; first_member = false, pp.reset() ) { File_header header; + int size; rdec.reset_member_position(); - for( int i = 0; i < File_header::size; ++i ) - header.data[i] = rdec.get_byte(); + for( size = 0; size < File_header::size && !rdec.finished(); ++size ) + header.data[size] = rdec.get_byte(); if( rdec.finished() ) // End Of File { - if( first_member ) { pp( "Error reading member header" ); retval = 1; } + if( first_member ) + { pp( "Error reading member header" ); retval = 1; } + else if( verbosity >= 4 && size > 0 ) + show_trailing_garbage( header.data, size, pp, true ); break; } if( !header.verify_magic() ) { - if( !first_member ) break; // trailing garbage - pp( "Bad magic number (file not in lzip format)" ); - retval = 2; break; + if( first_member ) + { pp( "Bad magic number (file not in lzip format)" ); retval = 2; } + else if( verbosity >= 4 ) + show_trailing_garbage( header.data, size, pp, false ); + break; } if( !header.verify_version() ) { @@ -577,7 +619,7 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing ) header.dictionary_size() > max_dictionary_size ) { pp( "Invalid dictionary size in member header" ); retval = 2; break; } - if( verbosity >= 1 ) + if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) { pp(); if( verbosity >= 2 ) @@ -603,7 +645,7 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing ) } retval = 2; break; } - if( verbosity >= 1 ) + if( verbosity >= 2 ) { if( testing ) std::fprintf( stderr, "ok\n" ); else std::fprintf( stderr, "done\n" ); } } @@ -614,6 +656,9 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing ) retval = 1; } catch( Error e ) { pp(); show_error( e.msg, errno ); retval = 1; } + if( verbosity == 1 && retval == 0 ) + { if( testing ) std::fprintf( stderr, "ok\n" ); + else std::fprintf( stderr, "done\n" ); } return retval; } @@ -635,23 +680,6 @@ void set_signals() throw() } // end namespace -void Pretty_print::operator()( const char * const msg ) const throw() - { - if( verbosity_ >= 0 ) - { - if( first_post ) - { - first_post = false; - std::fprintf( stderr, " %s: ", name_.c_str() ); - for( unsigned int i = 0; i < longest_name - name_.size(); ++i ) - std::fprintf( stderr, " " ); - if( !msg ) std::fflush( stderr ); - } - if( msg ) std::fprintf( stderr, "%s.\n", msg ); - } - } - - void show_error( const char * const msg, const int errcode, const bool help ) throw() { if( verbosity >= 0 ) @@ -672,48 +700,12 @@ void show_error( const char * const msg, const int errcode, const bool help ) th void internal_error( const char * const msg ) { - std::fprintf( stderr, "%s: internal error: %s.\n", program_name, msg ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: internal error: %s.\n", program_name, msg ); std::exit( 3 ); } -// Returns the number of bytes really read. -// If (returned value < size) and (errno == 0), means EOF was reached. -// -int readblock( const int fd, uint8_t * const buf, const int size ) throw() - { - int rest = size; - errno = 0; - while( rest > 0 ) - { - errno = 0; - const int n = read( fd, buf + size - rest, rest ); - if( n > 0 ) rest -= n; - else if( n == 0 ) break; - else if( errno != EINTR && errno != EAGAIN ) break; - } - return ( rest > 0 ) ? size - rest : size; - } - - -// Returns the number of bytes really written. -// If (returned value < size), it is always an error. -// -int writeblock( const int fd, const uint8_t * const buf, const int size ) throw() - { - int rest = size; - errno = 0; - while( rest > 0 ) - { - errno = 0; - const int n = write( fd, buf + size - rest, rest ); - if( n > 0 ) rest -= n; - else if( errno && errno != EINTR && errno != EAGAIN ) break; - } - return ( rest > 0 ) ? size - rest : size; - } - - int main( const int argc, const char * const argv[] ) { // Mapping from gzip/bzip2 style 1..9 compression modes @@ -737,6 +729,7 @@ int main( const int argc, const char * const argv[] ) Mode program_mode = m_compress; bool force = false; bool keep_input_files = false; + bool recompress = false; bool to_stdout = false; bool zero = false; std::string input_filename; @@ -761,6 +754,7 @@ int main( const int argc, const char * const argv[] ) { 'd', "decompress", Arg_parser::no }, { 'e', "extreme", Arg_parser::no }, { 'f', "force", Arg_parser::no }, + { 'F', "recompress", Arg_parser::no }, { 'h', "help", Arg_parser::no }, { 'k', "keep", Arg_parser::no }, { 'm', "match-length", Arg_parser::yes }, @@ -773,7 +767,7 @@ int main( const int argc, const char * const argv[] ) { 'V', "version", Arg_parser::no }, { 0 , 0, Arg_parser::no } }; - Arg_parser parser( argc, argv, options ); + const Arg_parser parser( argc, argv, options ); if( parser.error().size() ) // bad option { show_error( parser.error().c_str(), 0, true ); return 1; } @@ -788,29 +782,30 @@ int main( const int argc, const char * const argv[] ) case '0': zero = true; break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - encoder_options = option_mapping[code-'0']; - zero = false; break; - case 'b': member_size = getnum( arg, 0, 100000, LLONG_MAX / 2 ); break; + zero = false; + encoder_options = option_mapping[code-'0']; break; + case 'b': member_size = getnum( arg, 100000, LLONG_MAX / 2 ); break; case 'c': to_stdout = true; break; case 'd': program_mode = m_decompress; break; case 'e': break; // ignored by now case 'f': force = true; break; + case 'F': recompress = true; break; case 'h': show_help(); return 0; case 'k': keep_input_files = true; break; case 'm': encoder_options.match_len_limit = - getnum( arg, 0, min_match_len_limit, max_match_len ); + getnum( arg, min_match_len_limit, max_match_len ); zero = false; break; case 'o': default_output_filename = arg; break; case 'q': verbosity = -1; break; case 's': encoder_options.dictionary_size = get_dict_size( arg ); zero = false; break; - case 'S': volume_size = getnum( arg, 0, 100000, LLONG_MAX / 2 ); break; + case 'S': volume_size = getnum( arg, 100000, LLONG_MAX / 2 ); break; case 't': program_mode = m_test; break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; default : internal_error( "uncaught option" ); } - } + } // end process options #if defined(__MSVCRT__) || defined(__OS2__) _setmode( STDIN_FILENO, O_BINARY ); @@ -872,7 +867,7 @@ int main( const int argc, const char * const argv[] ) input_filename = filenames[i]; const int eindex = extension_index( input_filename ); infd = open_instream( input_filename, &in_stats, program_mode, - eindex, force, to_stdout ); + eindex, recompress, to_stdout ); if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } if( program_mode != m_test ) { diff --git a/testsuite/check.sh b/testsuite/check.sh index 710f0b8..82bc47d 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lzip - Data compressor based on the LZMA algorithm -# Copyright (C) 2008, 2009, 2010 Antonio Diaz Diaz. +# Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -48,6 +48,13 @@ printf . cmp in copy || fail=1 printf . +"${LZIP}" -cf "${testdir}"/test_v1.lz > out 2>/dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cF "${testdir}"/test_v1.lz > out || fail=1 +"${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1 +cmp in copy || fail=1 +printf . + for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do "${LZIP}" -k -$i in || fail=1 mv -f in.lz copy.lz || fail=1 @@ -79,6 +86,11 @@ for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do printf . done +"${LZIP}" -$i < in > anyothername || fail=1 +"${LZIP}" -d anyothername || fail=1 +cmp in anyothername.out || fail=1 +printf . + # Description of test files for lziprecover: # test_bad1.lz: byte at offset 67 changed from 0xCC to 0x33 # test_bad2.lz: [ 34- 66) --> copy of bytes [ 68- 100) @@ -98,47 +110,47 @@ for i in 1 2 3 ; do done "${LZIP}" -0kf -$i in || fail=1 -"${LZIPRECOVER}" -R in.lz > /dev/null || fail=1 +"${LZIPRECOVER}" -R in.lz || fail=1 printf . -"${LZIPRECOVER}" -R "${testdir}"/test_v1.lz > /dev/null || fail=1 +"${LZIPRECOVER}" -R "${testdir}"/test_v1.lz || fail=1 printf . -"${LZIPRECOVER}" -R -o copy.lz "${testdir}"/test_bad1.lz > /dev/null || fail=1 +"${LZIPRECOVER}" -R -o copy.lz "${testdir}"/test_bad1.lz || fail=1 "${LZIP}" -df copy.lz || fail=1 cmp in copy || fail=1 printf . -"${LZIPRECOVER}" -m -o copy.lz "${testdir}"/test_bad1.lz "${testdir}"/test_bad2.lz > /dev/null || fail=1 +"${LZIPRECOVER}" -m -o copy.lz "${testdir}"/test_bad1.lz "${testdir}"/test_bad2.lz || fail=1 "${LZIP}" -df copy.lz || fail=1 cmp in copy || fail=1 printf . -"${LZIPRECOVER}" -m -o copy.lz "${testdir}"/test_bad2.lz "${testdir}"/test_bad1.lz > /dev/null || fail=1 +"${LZIPRECOVER}" -m -o copy.lz "${testdir}"/test_bad2.lz "${testdir}"/test_bad1.lz || fail=1 "${LZIP}" -df copy.lz || fail=1 cmp in copy || fail=1 printf . for i in 1 2 ; do for j in 3 4 5 ; do - "${LZIPRECOVER}" -m -o copy.lz "${testdir}"/test_bad${i}.lz "${testdir}"/test_bad${j}.lz > /dev/null || fail=1 + "${LZIPRECOVER}" -m -o copy.lz "${testdir}"/test_bad${i}.lz "${testdir}"/test_bad${j}.lz || fail=1 "${LZIP}" -df copy.lz || fail=1 cmp in copy || fail=1 printf . - "${LZIPRECOVER}" -m -o copy.lz "${testdir}"/test_bad${j}.lz "${testdir}"/test_bad${i}.lz > /dev/null || fail=1 + "${LZIPRECOVER}" -m -o copy.lz "${testdir}"/test_bad${j}.lz "${testdir}"/test_bad${i}.lz || fail=1 "${LZIP}" -df copy.lz || fail=1 cmp in copy || fail=1 printf . done done -"${LZIPRECOVER}" -m -o copy.lz "${testdir}"/test_bad3.lz "${testdir}"/test_bad4.lz "${testdir}"/test_bad5.lz > /dev/null || fail=1 +"${LZIPRECOVER}" -m -o copy.lz "${testdir}"/test_bad3.lz "${testdir}"/test_bad4.lz "${testdir}"/test_bad5.lz || fail=1 "${LZIP}" -df copy.lz || fail=1 cmp in copy || fail=1 printf . -"${LZIPRECOVER}" -m -o copy.lz "${testdir}"/test_bad4.lz "${testdir}"/test_bad5.lz "${testdir}"/test_bad3.lz > /dev/null || fail=1 +"${LZIPRECOVER}" -m -o copy.lz "${testdir}"/test_bad4.lz "${testdir}"/test_bad5.lz "${testdir}"/test_bad3.lz || fail=1 "${LZIP}" -df copy.lz || fail=1 cmp in copy || fail=1 printf . -"${LZIPRECOVER}" -m -o copy.lz "${testdir}"/test_bad5.lz "${testdir}"/test_bad3.lz "${testdir}"/test_bad4.lz > /dev/null || fail=1 +"${LZIPRECOVER}" -m -o copy.lz "${testdir}"/test_bad5.lz "${testdir}"/test_bad3.lz "${testdir}"/test_bad4.lz || fail=1 "${LZIP}" -df copy.lz || fail=1 cmp in copy || fail=1 printf . diff --git a/testsuite/unzcrash.cc b/testsuite/unzcrash.cc index 7d10b28..5fac783 100644 --- a/testsuite/unzcrash.cc +++ b/testsuite/unzcrash.cc @@ -1,84 +1,349 @@ /* Unzcrash - A test program written to test robustness to decompression of corrupted data. Inspired by unzcrash.c from Julian Seward's bzip2. - Copyright (C) 2008, 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz. - This program is free software: you have unlimited permission - to copy, distribute and modify it. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Usage is: - unzcrash "lzip -tv" filename.lz + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - This program reads the specified file and then repeatedly - decompresses it, increasing 256 times each byte of the compressed - data, so as to test all possible one-byte errors. This should not - cause any invalid memory accesses. If it does, please, report it as - a bug. - - Compile this file with the command: - g++ -Wall -W -O2 -o unzcrash testsuite/unzcrash.cc + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ +#include +#include #include #include #include +#include +#include +#include #include #include +#include "../arg_parser.h" + +#if CHAR_BIT != 8 +#error "Environments where CHAR_BIT != 8 are not supported." +#endif + +#ifndef LLONG_MAX +#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL +#endif +#ifndef LLONG_MIN +#define LLONG_MIN (-LLONG_MAX - 1LL) +#endif +#ifndef ULLONG_MAX +#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL +#endif + + +namespace { + +const char * const Program_name = "Unzcrash"; +const char * const program_name = "unzcrash"; +const char * const program_year = "2011"; +const char * invocation_name = 0; + +int verbosity = 0; + + +void show_help() throw() + { + std::printf( "%s - A test program written to test robustness to\n", Program_name ); + std::printf( "decompression of corrupted data.\n" ); + std::printf( "\nUsage: %s [options] \"lzip -tv\" filename.lz\n", invocation_name ); + std::printf( "\nThis program reads the specified file and then repeatedly decompresses\n" ); + std::printf( "it, increasing 256 times each byte of the compressed data, so as to test\n" ); + std::printf( "all possible one-byte errors. This should not cause any invalid memory\n" ); + std::printf( "accesses. If it does, please, report it as a bug.\n" ); + std::printf( "\nOptions:\n" ); + std::printf( " -h, --help display this help and exit\n" ); + std::printf( " -V, --version output version information and exit\n" ); + std::printf( " -b, --bits=[,n] test -bit errors instead of full byte\n" ); + std::printf( " -p, --position= first byte position to test\n" ); + std::printf( " -q, --quiet suppress all messages\n" ); + std::printf( " -s, --size= number of byte positions to test\n" ); + std::printf( " -v, --verbose be verbose (a 2nd -v gives more)\n" ); + std::printf( "\nReport bugs to lzip-bug@nongnu.org\n" ); + std::printf( "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" ); + } + + +void show_version() throw() + { + std::printf( "%s %s\n", Program_name, PROGVERSION ); + std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); + std::printf( "License GPLv3+: GNU GPL version 3 or later \n" ); + std::printf( "This is free software: you are free to change and redistribute it.\n" ); + std::printf( "There is NO WARRANTY, to the extent permitted by law.\n" ); + } + + +void show_error( const char * const msg, const int errcode = 0, + const bool help = false ) throw() + { + if( verbosity >= 0 ) + { + if( msg && msg[0] ) + { + std::fprintf( stderr, "%s: %s", program_name, msg ); + if( errcode > 0 ) + std::fprintf( stderr, ": %s", std::strerror( errcode ) ); + std::fprintf( stderr, "\n" ); + } + if( help && invocation_name && invocation_name[0] ) + std::fprintf( stderr, "Try `%s --help' for more information.\n", + invocation_name ); + } + } + + +void internal_error( const char * const msg ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: internal error: %s.\n", program_name, msg ); + std::exit( 3 ); + } + + +long long getnum( const char * const ptr, + const long long llimit = LLONG_MIN + 1, + const long long ulimit = LLONG_MAX ) throw() + { + errno = 0; + char *tail; + long long result = strtoll( ptr, &tail, 0 ); + if( tail == ptr ) + { + show_error( "Bad or missing numerical argument.", 0, true ); + std::exit( 1 ); + } + + if( !errno && tail[0] ) + { + int factor = ( tail[1] == 'i' ) ? 1024 : 1000; + int exponent = 0; + bool bad_multiplier = false; + switch( tail[0] ) + { + case ' ': break; + case 'Y': exponent = 8; break; + case 'Z': exponent = 7; break; + case 'E': exponent = 6; break; + case 'P': exponent = 5; break; + case 'T': exponent = 4; break; + case 'G': exponent = 3; break; + case 'M': exponent = 2; break; + case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true; + break; + case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true; + break; + default : bad_multiplier = true; + } + if( bad_multiplier ) + { + show_error( "Bad multiplier in numerical argument.", 0, true ); + std::exit( 1 ); + } + for( int i = 0; i < exponent; ++i ) + { + if( LLONG_MAX / factor >= llabs( result ) ) result *= factor; + else { errno = ERANGE; break; } + } + } + if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; + if( errno ) + { + show_error( "Numerical argument out of limits." ); + std::exit( 1 ); + } + return result; + } + + +class Bitset8 // 8 value bitset (1..8) + { + bool data[8]; + static bool valid_digit( const unsigned char ch ) throw() + { return ( ch >= '1' && ch <= '8' ); } + +public: + Bitset8() throw() { for( int i = 0; i < 8; ++i ) data[i] = true; } + + bool includes( const int i ) const throw() + { return ( i >= 1 && i <= 8 && data[i-1] ); } + + // Recognized formats: 1 1,2,3 1-4 1,3-5,8 + bool parse( const char * p ) throw() + { + for( int i = 0; i < 8; ++i ) data[i] = false; + while( true ) + { + const unsigned char ch1 = *p++; + if( !valid_digit( ch1 ) ) break; + if( *p != '-' ) data[ch1-'1'] = true; + else + { + ++p; + if( !valid_digit( *p ) || ch1 > *p ) break; + for( int c = ch1; c <= *p; ++c ) data[c-'1'] = true; + ++p; + } + if( *p == 0 ) return true; + if( *p == ',' ) ++p; else break; + } + show_error( "Invalid value or range." ); + return false; + } + + // number of n-bit errors per byte (n=0..8): 1 8 28 56 70 56 28 8 1 + void print() const throw() + { + std::fflush( stderr ); + int c = 0; + for( int i = 0; i < 8; ++i ) if( data[i] ) ++c; + if( c == 8 ) std::printf( "Testing full byte.\n" ); + else if( c == 0 ) std::printf( "Nothing to test.\n" ); + else + { + std::printf( "Testing " ); + for( int i = 0; i < 8; ++i ) + if( data[i] ) + { + std::printf( "%d", i + 1 ); + if( --c ) std::printf( "," ); + } + std::printf( " bit errors.\n" ); + } + std::fflush( stdout ); + } + }; + + +int differing_bits( const uint8_t byte1, const uint8_t byte2 ) + { + int count = 0; + uint8_t dif = byte1 ^ byte2; + while( dif ) + { count += ( dif & 1 ); dif >>= 1; } + return count; + } + +} // end namespace + int main( const int argc, const char * const argv[] ) { - if( argc != 3 ) + enum { buffer_size = 3 << 20 }; + Bitset8 bits; // if Bitset8::parse not called test full byte + int pos = 0; + int max_size = buffer_size; + invocation_name = argv[0]; + + const Arg_parser::Option options[] = { - std::fprintf( stderr, "Usage: unzcrash \"lzip -tv\" filename.lz\n" ); + { 'h', "help", Arg_parser::no }, + { 'b', "bits", Arg_parser::yes }, + { 'p', "position", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 's', "size", Arg_parser::yes }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { 0 , 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 1; } + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const char * const arg = parser.argument( argind ).c_str(); + switch( code ) + { + case 'h': show_help(); return 0; + case 'b': if( !bits.parse( arg ) ) return 1; break; + case 'p': pos = getnum( arg, 0, buffer_size - 1 ); break; + case 'q': verbosity = -1; break; + case 's': max_size = getnum( arg, 1, buffer_size ); break; + case 'v': if( verbosity < 4 ) ++verbosity; break; + case 'V': show_version(); return 0; + default : internal_error( "uncaught option" ); + } + } // end process options + + if( argind + 2 != parser.arguments() ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "Usage: %s \"lzip -tv\" filename.lz\n", + invocation_name ); return 1; } - FILE *f = std::fopen( argv[2], "rb" ); + FILE *f = std::fopen( parser.argument( argind + 1 ).c_str(), "rb" ); if( !f ) { - std::fprintf( stderr, "Can't open file `%s' for reading\n", argv[2] ); + if( verbosity >= 0 ) + std::fprintf( stderr, "Can't open file `%s' for reading\n", + parser.argument( argind + 1 ).c_str() ); return 1; } - const int buffer_size = 1 << 21; uint8_t * const buffer = new uint8_t[buffer_size]; const int size = std::fread( buffer, 1, buffer_size, f ); if( size >= buffer_size ) { - std::fprintf( stderr, "input file `%s' too big.\n", argv[2] ); + if( verbosity >= 0 ) + std::fprintf( stderr, "input file `%s' is too big.\n", + parser.argument( argind + 1 ).c_str() ); return 1; } std::fclose( f ); - f = popen( argv[1], "w" ); + f = popen( parser.argument( argind ).c_str(), "w" ); if( !f ) - { - std::fprintf( stderr, "incorrect parameters or too many files.\n" ); - return 1; - } + { show_error( "Can't open pipe", errno ); return 1; } const int wr = std::fwrite( buffer, 1, size, f ); if( wr != size || pclose( f ) != 0 ) { - std::fprintf( stderr, "Could not run `%s' or other error.\n", argv[1] ); + if( verbosity >= 0 ) + std::fprintf( stderr, "Could not run `%s' : %s.\n", + parser.argument( argind ).c_str(), std::strerror( errno ) ); return 1; } std::signal( SIGPIPE, SIG_IGN ); + if( verbosity >= 1 ) bits.print(); - for( int byte = 0; byte < size; ++byte ) + const int end = ( ( pos + max_size < size ) ? pos + max_size : size ); + for( int i = pos; i < end; ++i ) { - std::fprintf( stderr, "byte %d\n", byte ); - for( int i = 0; i < 255; ++i ) + if( verbosity >= 0 ) + std::fprintf( stderr, "byte %d\n", i ); + const uint8_t byte = buffer[i]; + for( int j = 0; j < 255; ++j ) { - ++buffer[byte]; - f = popen( argv[1], "w" ); - if( !f ) - { std::fprintf( stderr, "Can't open pipe.\n" ); return 1; } - std::fwrite( buffer, 1, size, f ); - pclose( f ); + ++buffer[i]; + if( bits.includes( differing_bits( byte, buffer[i] ) ) ) + { + f = popen( parser.argument( argind ).c_str(), "w" ); + if( !f ) + { show_error( "Can't open pipe", errno ); return 1; } + std::fwrite( buffer, 1, size, f ); + pclose( f ); + } } - ++buffer[byte]; + buffer[i] = byte; } delete[] buffer; -- cgit v1.2.3