diff options
-rw-r--r-- | ChangeLog | 28 | ||||
-rw-r--r-- | INSTALL | 2 | ||||
-rw-r--r-- | NEWS | 34 | ||||
-rw-r--r-- | README | 2 | ||||
-rw-r--r-- | bbexample.c | 2 | ||||
-rw-r--r-- | carg_parser.c | 2 | ||||
-rw-r--r-- | carg_parser.h | 2 | ||||
-rw-r--r-- | cbuffer.c | 2 | ||||
-rwxr-xr-x | configure | 6 | ||||
-rw-r--r-- | decoder.c | 2 | ||||
-rw-r--r-- | decoder.h | 10 | ||||
-rw-r--r-- | doc/lzlib.info | 284 | ||||
-rw-r--r-- | doc/lzlib.texi | 275 | ||||
-rw-r--r-- | doc/minilzip.1 | 11 | ||||
-rw-r--r-- | encoder.c | 2 | ||||
-rw-r--r-- | encoder.h | 6 | ||||
-rw-r--r-- | encoder_base.c | 49 | ||||
-rw-r--r-- | encoder_base.h | 21 | ||||
-rw-r--r-- | fast_encoder.c | 2 | ||||
-rw-r--r-- | fast_encoder.h | 6 | ||||
-rw-r--r-- | lzcheck.c | 89 | ||||
-rw-r--r-- | lzip.h | 19 | ||||
-rw-r--r-- | lzlib.c | 26 | ||||
-rw-r--r-- | lzlib.h | 4 | ||||
-rw-r--r-- | main.c | 471 | ||||
-rwxr-xr-x | testsuite/check.sh | 62 |
26 files changed, 997 insertions, 422 deletions
@@ -1,3 +1,16 @@ +2018-02-07 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.10 released. + * LZ_compress_finish now adjusts dictionary size for each member. + * lzlib.c (LZ_decompress_read): Detect corrupt header with HD=3. + * main.c: Added new option '--loose-trailing'. + * main.c (main): Option '-S, --volume-size' now keeps input files. + * main.c: Replaced 'bits/byte' with inverse compression ratio. + * main.c: Show final diagnostic when testing multiple files. + * main.c: Do not add a second .lz extension to the arg of -o. + * main.c: Show dictionary size at verbosity level 4 (-vvvv). + * lzlib.texi: Added chapter 'Invoking minilzip'. + 2017-04-11 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.9 released. @@ -10,14 +23,15 @@ 2016-05-17 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.8 released. + * lzlib.h: Define LZ_API_VERSION to 1. * decoder.c (LZd_verify_trailer): Removed test of final code. * main.c: Added new option '-a, --trailing-error'. * main.c (main): Delete '--output' file if infd is a terminal. * main.c (main): Don't use stdin more than once. * configure: Avoid warning on some shells when testing for gcc. * Makefile.in: Detect the existence of install-info. - * testsuite/check.sh: A POSIX shell is required to run the tests. - * testsuite/check.sh: Don't check error messages. + * check.sh: A POSIX shell is required to run the tests. + * check.sh: Don't check error messages. 2015-07-08 Antonio Diaz Diaz <antonio@gnu.org> @@ -78,7 +92,7 @@ reduces compression time for large values of '--match-length' by up to 6%. * main.cc: Added new option '-F, --recompress'. - * Makefile.in: 'make install' no more tries to run + * Makefile.in: 'make install' no longer tries to run '/sbin/ldconfig' on systems lacking it. 2011-01-03 Antonio Diaz Diaz <ant_diaz@teleline.es> @@ -129,14 +143,14 @@ allowing LZ_compress_restart_member to restart a finished stream. * lzlib.cc: Accept only query or close operations after a fatal error has occurred. - * Shared version of lzlib is no more built by default. - * testsuite/check.sh: Use 'test1' instead of 'COPYING' for testing. + * Shared version of lzlib is no longer built by default. + * check.sh: Use 'test1' instead of 'COPYING' for testing. 2009-10-20 Antonio Diaz Diaz <ant_diaz@teleline.es> * Version 0.7 released. * Compression time has been reduced by 4%. - * testsuite/check.sh: Removed -9 to run in less than 256MiB of RAM. + * check.sh: Removed -9 to run in less than 256MiB of RAM. * lzcheck.cc: Read files of any size up to 2^63 bytes. 2009-09-02 Antonio Diaz Diaz <ant_diaz@teleline.es> @@ -174,7 +188,7 @@ * Version 0.1 released. -Copyright (C) 2009-2017 Antonio Diaz Diaz. +Copyright (C) 2009-2018 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and @@ -62,7 +62,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2009-2017 Antonio Diaz Diaz. +Copyright (C) 2009-2018 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -1,12 +1,32 @@ -Changes in version 1.9: +Changes in version 1.10: -Compression time of option '-0' has been reduced by 3%. +The function 'LZ_compress_finish' now adjusts the dictionary size for +each member. -Compression time of options '-1' to '-9' has been reduced by 1%. +The option '--loose-trailing', has been added to minilzip. -Decompression time has been reduced by 3%. +The test used by lzlib to discriminate trailing data from a corrupt +header in multimember or concatenated files has been improved to a +Hamming distance (HD) of 3, and the 3 bit flips must happen in different +magic bytes for the test to fail. As a consequence, lzlib now returns a +data error when some kinds of files are appended to a lzip file as +trailing data. +The '--loose-trailing' option of minilzip can be used to ignore such +trailing data when decompressing. +Lziprecover can be used to remove conflicting trailing data from a file. -In test mode, minilzip now continues checking the rest of the files if -any input file is a terminal. +Option '-S, --volume-size' of minilzip now keeps input files unchanged. -The license of the library has been changed to "2-clause BSD". +The 'bits/byte' ratio has been replaced with the inverse compression +ratio in the output of minilzip. + +minilzip now shows a final diagnostic at verbosity level 1 (-v) or +higher if any file fails the test when testing multiple files. + +minilzip no longer adds a second '.lz' extension to the argument of '-o' +if it already ends in '.lz' or '.tlz'. + +minilzip now shows the dictionary size at verbosity level 4 (-vvvv) when +decompressing or testing. + +The new chapter 'Invoking minilzip' has been added to the manual. @@ -87,7 +87,7 @@ range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI). -Copyright (C) 2009-2017 Antonio Diaz Diaz. +Copyright (C) 2009-2018 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/bbexample.c b/bbexample.c index ab9a6e0..8c2ff04 100644 --- a/bbexample.c +++ b/bbexample.c @@ -1,5 +1,5 @@ /* Buffer to buffer example - Test program for the lzlib library - Copyright (C) 2010-2017 Antonio Diaz Diaz. + Copyright (C) 2010-2018 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute and modify it. diff --git a/carg_parser.c b/carg_parser.c index 6850643..10ad4dc 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2017 Antonio Diaz Diaz. + Copyright (C) 2006-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/carg_parser.h b/carg_parser.h index c4ce31d..e1c70dd 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2017 Antonio Diaz Diaz. + Copyright (C) 2006-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lzlib - Compression library for the lzip format -# Copyright (C) 2009-2017 Antonio Diaz Diaz. +# Copyright (C) 2009-2018 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=lzlib -pkgversion=1.9 +pkgversion=1.10 soversion=1 progname=minilzip progname_static=${progname} @@ -194,7 +194,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lzlib - Compression library for the lzip format -# Copyright (C) 2009-2017 Antonio Diaz Diaz. +# Copyright (C) 2009-2018 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -319,11 +319,7 @@ static inline bool LZd_enough_free_bytes( const struct LZ_decoder * const d ) { return Cb_free_bytes( &d->cb ) >= lzd_min_free_bytes; } static inline uint8_t LZd_peek_prev( const struct LZ_decoder * const d ) - { - if( d->cb.put > 0 ) return d->cb.buffer[d->cb.put-1]; - if( d->pos_wrapped ) return d->cb.buffer[d->cb.buffer_size-1]; - return 0; /* prev_byte of first byte */ - } + { return d->cb.buffer[((d->cb.put > 0) ? d->cb.put : d->cb.buffer_size)-1]; } static inline uint8_t LZd_peek( const struct LZ_decoder * const d, const unsigned distance ) @@ -387,6 +383,8 @@ static inline bool LZd_init( struct LZ_decoder * const d, d->member_finished = false; d->verify_trailer_pending = false; d->pos_wrapped = false; + /* prev_byte of first byte; also for LZd_peek( 0 ) on corrupt file */ + d->cb.buffer[d->cb.buffer_size-1] = 0; d->rep0 = 0; d->rep1 = 0; d->rep2 = 0; diff --git a/doc/lzlib.info b/doc/lzlib.info index a2a8c68..23a4d21 100644 --- a/doc/lzlib.info +++ b/doc/lzlib.info @@ -11,25 +11,26 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir) Lzlib Manual ************ -This manual is for Lzlib (version 1.9, 11 April 2017). +This manual is for Lzlib (version 1.10, 7 February 2018). * Menu: -* Introduction:: Purpose and features of lzlib -* Library version:: Checking library version -* Buffering:: Sizes of lzlib's buffers -* Parameter limits:: Min / max values for some parameters -* Compression functions:: Descriptions of the compression functions -* Decompression functions:: Descriptions of the decompression functions -* Error codes:: Meaning of codes returned by functions -* Error messages:: Error messages corresponding to error codes -* Data format:: Detailed format of the compressed data -* Examples:: A small tutorial with examples -* Problems:: Reporting bugs -* Concept index:: Index of concepts +* Introduction:: Purpose and features of lzlib +* Library version:: Checking library version +* Buffering:: Sizes of lzlib's buffers +* Parameter limits:: Min / max values for some parameters +* Compression functions:: Descriptions of the compression functions +* Decompression functions:: Descriptions of the decompression functions +* Error codes:: Meaning of codes returned by functions +* Error messages:: Error messages corresponding to error codes +* Invoking minilzip:: Command line interface of the test program +* Data format:: Detailed format of the compressed data +* Examples:: A small tutorial with examples +* Problems:: Reporting bugs +* Concept index:: Index of concepts - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -50,7 +51,7 @@ archiving, taking into account both data integrity and decoder availability: * The lzip format provides very safe integrity checking and some data - recovery means. The lziprecover program can repair bit-flip errors + recovery means. The lziprecover program can repair bit flip errors (one of the most common forms of data corruption) in lzip files, and provides data recovery capabilities, including error-checked merging of damaged copies of a file. *Note Data safety: @@ -170,8 +171,9 @@ minimum sizes: * Input compression buffer. Written to by the 'LZ_compress_write' function. For the normal variant of LZMA, its size is two times - the dictionary size set with the 'LZ_compress_open' function or 64 - KiB, whichever is larger. For the fast variant, its size is 1 MiB. + the dictionary size set with the 'LZ_compress_open' function or + 64 KiB, whichever is larger. For the fast variant, its size is + 1 MiB. * Output compression buffer. Read from by the 'LZ_compress_read' function. Its size is 64 KiB. @@ -326,6 +328,7 @@ calling 'LZ_compress_errno' before using it. ENCODER ) Returns 1 if all the data have been read and 'LZ_compress_close' can be safely called. Otherwise it returns 0. + 'LZ_compress_finished' implies 'LZ_compress_member_finished'. -- Function: int LZ_compress_member_finished ( struct LZ_Encoder * const ENCODER ) @@ -532,14 +535,19 @@ whether a call failed. If the call failed, then you can examine The end of the data stream was reached in the middle of a member. -- Constant: enum LZ_Errno LZ_data_error - The data stream is corrupt. + The data stream is corrupt. If 'LZ_decompress_member_position' is 6 + or less, it indicates either a format version not supported, an + invalid dictionary size, a corrupt header in a multimember data + stream, or trailing data too similar to a valid lzip header. + Lziprecover can be used to remove conflicting trailing data from a + file. -- Constant: enum LZ_Errno LZ_library_error A bug was detected in the library. Please, report it (*note Problems::). -File: lzlib.info, Node: Error messages, Next: Data format, Prev: Error codes, Up: Top +File: lzlib.info, Node: Error messages, Next: Invoking minilzip, Prev: Error codes, Up: Top 8 Error messages **************** @@ -555,10 +563,198 @@ File: lzlib.info, Node: Error messages, Next: Data format, Prev: Error codes, 'LZ_(de)compress_errno'. -File: lzlib.info, Node: Data format, Next: Examples, Prev: Error messages, Up: Top +File: lzlib.info, Node: Invoking minilzip, Next: Data format, Prev: Error messages, Up: Top + +9 Invoking minilzip +******************* + +The format for running minilzip is: + + minilzip [OPTIONS] [FILES] + +'-' used as a FILE argument means standard input. It can be mixed with +other FILES and is read just once, the first time it appears in the +command line. + + minilzip supports the following options: + +'-h' +'--help' + Print an informative help message describing the options and exit. + +'-V' +'--version' + Print the version number of minilzip on the standard output and + exit. + +'-a' +'--trailing-error' + Exit with error status 2 if any remaining input is detected after + decompressing the last member. Such remaining input is usually + trailing garbage that can be safely ignored. + +'-b BYTES' +'--member-size=BYTES' + When compressing, set the member size limit to BYTES. A small + member size may degrade compression ratio, so use it only when + needed. Valid values range from 100 kB to 2 PiB. Defaults to + 2 PiB. + +'-c' +'--stdout' + Compress or decompress to standard output; keep input files + unchanged. If compressing several files, each file is compressed + independently. This option is needed when reading from a named + pipe (fifo) or from a device. Use it also to recover as much of + the decompressed data as possible when decompressing a corrupt + file. + +'-d' +'--decompress' + Decompress the specified files. If a file does not exist or can't + be opened, minilzip continues decompressing the rest of the files. + If a file fails to decompress, or is a terminal, minilzip exits + immediately without decompressing the rest of the files. + +'-f' +'--force' + Force overwrite of output files. + +'-F' +'--recompress' + When compressing, force re-compression of files whose name already + has the '.lz' or '.tlz' suffix. + +'-k' +'--keep' + Keep (don't delete) input files during compression or + decompression. + +'-m BYTES' +'--match-length=BYTES' + When compressing, set the match length limit in bytes. After a + match this long is found, the search is finished. Valid values + range from 5 to 273. Larger values usually give better compression + ratios but longer compression times. -9 Data format -************* +'-o FILE' +'--output=FILE' + When reading from standard input and '--stdout' has not been + specified, use 'FILE' as the virtual name of the uncompressed + file. This produces a file named 'FILE' when decompressing, or a + file named 'FILE.lz' when compressing. A second '.lz' extension is + not added if 'FILE' already ends in '.lz' or '.tlz'. When + compressing and splitting the output in volumes, several files + named 'FILE00001.lz', 'FILE00002.lz', etc, are created. + +'-q' +'--quiet' + Quiet operation. Suppress all messages. + +'-s BYTES' +'--dictionary-size=BYTES' + When compressing, set the dictionary size limit in bytes. Minilzip + will use the smallest possible dictionary size for each file + without exceeding this limit. Valid values range from 4 KiB to + 512 MiB. Values 12 to 29 are interpreted as powers of two, meaning + 2^12 to 2^29 bytes. Note that dictionary sizes are quantized. If + the specified size does not match one of the valid sizes, it will + be rounded upwards by adding up to (BYTES / 8) to it. + + For maximum compression you should use a dictionary size limit as + large as possible, but keep in mind that the decompression memory + requirement is affected at compression time by the choice of + dictionary size limit. + +'-S BYTES' +'--volume-size=BYTES' + When compressing, split the compressed output into several volume + files with names 'original_name00001.lz', 'original_name00002.lz', + etc, and set the volume size limit to BYTES. Input files are kept + unchanged. Each volume is a complete, maybe multimember, lzip + file. A small volume size may degrade compression ratio, so use it + only when needed. Valid values range from 100 kB to 4 EiB. + +'-t' +'--test' + Check integrity of the specified files, but don't decompress them. + This really performs a trial decompression and throws away the + result. Use it together with '-v' to see information about the + files. If a file fails the test, does not exist, can't be opened, + or is a terminal, minilzip continues checking the rest of the + files. A final diagnostic is shown at verbosity level 1 or higher + if any file fails the test when testing multiple files. + +'-v' +'--verbose' + Verbose mode. + When compressing, show the compression ratio and size for each file + processed. + When decompressing or testing, further -v's (up to 4) increase the + verbosity level, showing status, compression ratio, dictionary + size, and trailer contents (CRC, data size, member size). + +'-0 .. -9' + Set the compression parameters (dictionary size and match length + limit) as shown in the table below. The default compression level + is '-6'. Note that '-9' can be much slower than '-0'. These + options have no effect when decompressing or testing. + + The bidimensional parameter space of LZMA can't be mapped to a + linear scale optimal for all files. If your files are large, very + repetitive, etc, you may need to use the '--dictionary-size' and + '--match-length' options directly to achieve optimal performance. + + Level Dictionary size Match length limit + -0 64 KiB 16 bytes + -1 1 MiB 5 bytes + -2 1.5 MiB 6 bytes + -3 2 MiB 8 bytes + -4 3 MiB 12 bytes + -5 4 MiB 20 bytes + -6 8 MiB 36 bytes + -7 16 MiB 68 bytes + -8 24 MiB 132 bytes + -9 32 MiB 273 bytes + +'--fast' +'--best' + Aliases for GNU gzip compatibility. + +'--loose-trailing' + When decompressing or testing, allow trailing data whose first + bytes are so similar to the magic bytes of a lzip header that they + can be confused with a corrupt header. Use this option if a file + triggers a "corrupt header" error and the cause is not indeed a + corrupt header. + + + Numbers given as arguments to options may be followed by a multiplier +and an optional 'B' for "byte". + + Table of SI and binary prefixes (unit multipliers): + +Prefix Value | Prefix Value +k kilobyte (10^3 = 1000) | Ki kibibyte (2^10 = 1024) +M megabyte (10^6) | Mi mebibyte (2^20) +G gigabyte (10^9) | Gi gibibyte (2^30) +T terabyte (10^12) | Ti tebibyte (2^40) +P petabyte (10^15) | Pi pebibyte (2^50) +E exabyte (10^18) | Ei exbibyte (2^60) +Z zettabyte (10^21) | Zi zebibyte (2^70) +Y yottabyte (10^24) | Yi yobibyte (2^80) + + + Exit status: 0 for a normal exit, 1 for environmental problems (file +not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or +invalid input file, 3 for an internal consistency error (eg, bug) which +caused minilzip to panic. + + +File: lzlib.info, Node: Data format, Next: Examples, Prev: Invoking minilzip, Up: Top + +10 Data format +************** Perfection is reached, not when there is no longer anything to add, but when there is no longer anything to take away. @@ -578,9 +774,9 @@ when there is no longer anything to take away. represents a variable number of bytes. - A lzip data stream consists of a series of "members" (compressed data -sets). The members simply appear one after another in the data stream, -with no additional information before, between, or after them. + A lzip data stream consists of a series of "members" (compressed +data sets). The members simply appear one after another in the data +stream, with no additional information before, between, or after them. Each member has the following structure: +--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -630,7 +826,7 @@ with no additional information before, between, or after them. File: lzlib.info, Node: Examples, Next: Problems, Prev: Data format, Up: Top -10 A small tutorial with examples +11 A small tutorial with examples ********************************* This chapter shows the order in which the library functions should be @@ -709,15 +905,15 @@ Example 5: Multimember compression (MEMBER_SIZE < total output). Example 6: Multimember compression (user-restarted members). - 1) LZ_compress_open + 1) LZ_compress_open (with MEMBER_SIZE > largest member). 2) LZ_compress_write 3) LZ_compress_read 4) go back to step 2 until member termination is desired 5) LZ_compress_finish 6) LZ_compress_read 7) go back to step 6 until LZ_compress_member_finished returns 1 - 8) verify that LZ_compress_finished returns 1 - 9) go to step 12 if all input data have been written + 9) go to step 12 if all input data have been written and + LZ_compress_finished returns 1 10) LZ_compress_restart_member 11) go back to step 2 12) LZ_compress_close @@ -750,7 +946,7 @@ next member in case of data error. File: lzlib.info, Node: Problems, Next: Concept index, Prev: Examples, Up: Top -11 Reporting bugs +12 Reporting bugs ***************** There are probably bugs in lzlib. There are certainly errors and @@ -783,25 +979,29 @@ Concept index * examples: Examples. (line 6) * getting help: Problems. (line 6) * introduction: Introduction. (line 6) +* invoking: Invoking minilzip. (line 6) * library version: Library version. (line 6) +* options: Invoking minilzip. (line 6) * parameter limits: Parameter limits. (line 6) Tag Table: Node: Top220 -Node: Introduction1303 -Node: Library version6115 -Node: Buffering6760 -Node: Parameter limits7980 -Node: Compression functions8939 -Node: Decompression functions15481 -Node: Error codes21651 -Node: Error messages23626 -Node: Data format24205 -Node: Examples26770 -Node: Problems30851 -Node: Concept index31423 +Node: Introduction1342 +Node: Library version6154 +Node: Buffering6799 +Node: Parameter limits8024 +Node: Compression functions8983 +Node: Decompression functions15592 +Node: Error codes21762 +Node: Error messages24064 +Node: Invoking minilzip24649 +Ref: --trailing-error25243 +Node: Data format31883 +Node: Examples34454 +Node: Problems38570 +Node: Concept index39142 End Tag Table diff --git a/doc/lzlib.texi b/doc/lzlib.texi index 8b4aaaf..34154cd 100644 --- a/doc/lzlib.texi +++ b/doc/lzlib.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 11 April 2017 -@set VERSION 1.9 +@set UPDATED 7 February 2018 +@set VERSION 1.10 @dircategory Data Compression @direntry @@ -35,22 +35,23 @@ This manual is for Lzlib (version @value{VERSION}, @value{UPDATED}). @menu -* Introduction:: Purpose and features of lzlib -* Library version:: Checking library version -* Buffering:: Sizes of lzlib's buffers -* Parameter limits:: Min / max values for some parameters -* Compression functions:: Descriptions of the compression functions -* Decompression functions:: Descriptions of the decompression functions -* Error codes:: Meaning of codes returned by functions -* Error messages:: Error messages corresponding to error codes -* Data format:: Detailed format of the compressed data -* Examples:: A small tutorial with examples -* Problems:: Reporting bugs -* Concept index:: Index of concepts +* Introduction:: Purpose and features of lzlib +* Library version:: Checking library version +* Buffering:: Sizes of lzlib's buffers +* Parameter limits:: Min / max values for some parameters +* Compression functions:: Descriptions of the compression functions +* Decompression functions:: Descriptions of the decompression functions +* Error codes:: Meaning of codes returned by functions +* Error messages:: Error messages corresponding to error codes +* Invoking minilzip:: Command line interface of the test program +* Data format:: Detailed format of the compressed data +* Examples:: A small tutorial with examples +* Problems:: Reporting bugs +* Concept index:: Index of concepts @end menu @sp 1 -Copyright @copyright{} 2009-2017 Antonio Diaz Diaz. +Copyright @copyright{} 2009-2018 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -74,7 +75,7 @@ availability: The lzip format provides very safe integrity checking and some data recovery means. The @uref{http://www.nongnu.org/lzip/manual/lziprecover_manual.html#Data-safety,,lziprecover} -program can repair bit-flip errors (one of the most common forms of data +program can repair bit flip errors (one of the most common forms of data corruption) in lzip files, and provides data recovery capabilities, including error-checked merging of damaged copies of a file. @ifnothtml @@ -201,18 +202,18 @@ sizes: @item Input compression buffer. Written to by the @samp{LZ_compress_write} function. For the normal variant of LZMA, its size is two times the dictionary size set with the -@samp{LZ_compress_open} function or 64 KiB, whichever is larger. For the -fast variant, its size is 1 MiB. +@samp{LZ_compress_open} function or @w{64 KiB}, whichever is larger. For +the fast variant, its size is @w{1 MiB}. @item Output compression buffer. Read from by the -@samp{LZ_compress_read} function. Its size is 64 KiB. +@samp{LZ_compress_read} function. Its size is @w{64 KiB}. @item Input decompression buffer. Written to by the -@samp{LZ_decompress_write} function. Its size is 64 KiB. +@samp{LZ_decompress_write} function. Its size is @w{64 KiB}. @item Output decompression buffer. Read from by the @samp{LZ_decompress_read} function. Its size is the dictionary size set -in the header of the member currently being decompressed or 64 KiB, +in the header of the member currently being decompressed or @w{64 KiB}, whichever is larger. @end itemize @@ -271,10 +272,10 @@ does not return @samp{LZ_ok}, the returned pointer must not be used and should be freed with @samp{LZ_compress_close} to avoid memory leaks. @var{dictionary_size} sets the dictionary size to be used, in bytes. -Valid values range from 4 KiB to 512 MiB. Note that dictionary sizes are -quantized. If the specified size does not match one of the valid sizes, -it will be rounded upwards by adding up to (@var{dictionary_size} / 8) -to it. +Valid values range from @w{4 KiB} to @w{512 MiB}. Note that dictionary +sizes are quantized. If the specified size does not match one of the +valid sizes, it will be rounded upwards by adding up to +@w{(@var{dictionary_size} / 8)} to it. @var{match_len_limit} sets the match length limit in bytes. Valid values range from 5 to 273. Larger values usually give better compression @@ -283,13 +284,13 @@ ratios but longer compression times. If @var{dictionary_size} is 65535 and @var{match_len_limit} is 16, the fast variant of LZMA is chosen, which produces identical compressed output as @code{lzip -0}. (The dictionary size used will be rounded -upwards to 64 KiB). +upwards to @w{64 KiB}). @var{member_size} sets the member size limit in bytes. Minimum member -size limit is 100 kB. Small member size may degrade compression ratio, so -use it only when needed. To produce a single-member data stream, give -@var{member_size} a value larger than the amount of data to be produced, -for example INT64_MAX. +size limit is @w{100 kB}. Small member size may degrade compression +ratio, so use it only when needed. To produce a single-member data +stream, give @var{member_size} a value larger than the amount of data to +be produced, for example INT64_MAX. @end deftypefun @@ -369,7 +370,8 @@ Returns the current error code for @var{encoder} (@pxref{Error codes}). @deftypefun int LZ_compress_finished ( struct LZ_Encoder * const @var{encoder} ) Returns 1 if all the data have been read and @samp{LZ_compress_close} -can be safely called. Otherwise it returns 0. +can be safely called. Otherwise it returns 0. @samp{LZ_compress_finished} +implies @samp{LZ_compress_member_finished}. @end deftypefun @@ -606,7 +608,11 @@ The end of the data stream was reached in the middle of a member. @end deftypevr @deftypevr Constant {enum LZ_Errno} LZ_data_error -The data stream is corrupt. +The data stream is corrupt. If @samp{LZ_decompress_member_position} is 6 +or less, it indicates either a format version not supported, an invalid +dictionary size, a corrupt header in a multimember data stream, or +trailing data too similar to a valid lzip header. Lziprecover can be +used to remove conflicting trailing data from a file. @end deftypevr @deftypevr Constant {enum LZ_Errno} LZ_library_error @@ -629,6 +635,199 @@ The value of @var{lz_errno} normally comes from a call to @end deftypefun +@node Invoking minilzip +@chapter Invoking minilzip +@cindex invoking +@cindex options + +The format for running minilzip is: + +@example +minilzip [@var{options}] [@var{files}] +@end example + +@noindent +@samp{-} used as a @var{file} argument means standard input. It can be +mixed with other @var{files} and is read just once, the first time it +appears in the command line. + +minilzip supports the following options: + +@table @code +@item -h +@itemx --help +Print an informative help message describing the options and exit. + +@item -V +@itemx --version +Print the version number of minilzip on the standard output and exit. + +@anchor{--trailing-error} +@item -a +@itemx --trailing-error +Exit with error status 2 if any remaining input is detected after +decompressing the last member. Such remaining input is usually trailing +garbage that can be safely ignored. + +@item -b @var{bytes} +@itemx --member-size=@var{bytes} +When compressing, set the member size limit to @var{bytes}. A small +member size may degrade compression ratio, so use it only when needed. +Valid values range from @w{100 kB} to @w{2 PiB}. Defaults to @w{2 PiB}. + +@item -c +@itemx --stdout +Compress or decompress to standard output; keep input files unchanged. +If compressing several files, each file is compressed independently. +This option is needed when reading from a named pipe (fifo) or from a +device. Use it also to recover as much of the decompressed data as +possible when decompressing a corrupt file. + +@item -d +@itemx --decompress +Decompress the specified files. If a file does not exist or can't be +opened, minilzip continues decompressing the rest of the files. If a file +fails to decompress, or is a terminal, minilzip exits immediately without +decompressing the rest of the files. + +@item -f +@itemx --force +Force overwrite of output files. + +@item -F +@itemx --recompress +When compressing, force re-compression of files whose name already has +the @samp{.lz} or @samp{.tlz} suffix. + +@item -k +@itemx --keep +Keep (don't delete) input files during compression or decompression. + +@item -m @var{bytes} +@itemx --match-length=@var{bytes} +When compressing, set the match length limit in bytes. After a match +this long is found, the search is finished. Valid values range from 5 to +273. Larger values usually give better compression ratios but longer +compression times. + +@item -o @var{file} +@itemx --output=@var{file} +When reading from standard input and @samp{--stdout} has not been +specified, use @samp{@var{file}} as the virtual name of the uncompressed +file. This produces a file named @samp{@var{file}} when decompressing, +or a file named @samp{@var{file}.lz} when compressing. A second +@samp{.lz} extension is not added if @samp{@var{file}} already ends in +@samp{.lz} or @samp{.tlz}. When compressing and splitting the output in +volumes, several files named @samp{@var{file}00001.lz}, +@samp{@var{file}00002.lz}, etc, are created. + +@item -q +@itemx --quiet +Quiet operation. Suppress all messages. + +@item -s @var{bytes} +@itemx --dictionary-size=@var{bytes} +When compressing, set the dictionary size limit in bytes. Minilzip will use +the smallest possible dictionary size for each file without exceeding +this limit. Valid values range from @w{4 KiB} to @w{512 MiB}. Values 12 +to 29 are interpreted as powers of two, meaning 2^12 to 2^29 bytes. Note +that dictionary sizes are quantized. If the specified size does not +match one of the valid sizes, it will be rounded upwards by adding up to +@w{(@var{bytes} / 8)} to it. + +For maximum compression you should use a dictionary size limit as large +as possible, but keep in mind that the decompression memory requirement +is affected at compression time by the choice of dictionary size limit. + +@item -S @var{bytes} +@itemx --volume-size=@var{bytes} +When compressing, split the compressed output into several volume files +with names @samp{original_name00001.lz}, @samp{original_name00002.lz}, +etc, and set the volume size limit to @var{bytes}. Input files are kept +unchanged. Each volume is a complete, maybe multimember, lzip file. A +small volume size may degrade compression ratio, so use it only when +needed. Valid values range from @w{100 kB} to @w{4 EiB}. + +@item -t +@itemx --test +Check integrity of the specified files, but don't decompress them. This +really performs a trial decompression and throws away the result. Use it +together with @samp{-v} to see information about the files. If a file +fails the test, does not exist, can't be opened, or is a terminal, minilzip +continues checking the rest of the files. A final diagnostic is shown at +verbosity level 1 or higher if any file fails the test when testing +multiple files. + +@item -v +@itemx --verbose +Verbose mode.@* +When compressing, show the compression ratio and size for each file +processed.@* +When decompressing or testing, further -v's (up to 4) increase the +verbosity level, showing status, compression ratio, dictionary size, +and trailer contents (CRC, data size, member size). + +@item -0 .. -9 +Set the compression parameters (dictionary size and match length limit) +as shown in the table below. The default compression level is @samp{-6}. +Note that @samp{-9} can be much slower than @samp{-0}. These options +have no effect when decompressing or testing. + +The bidimensional parameter space of LZMA can't be mapped to a linear +scale optimal for all files. If your files are large, very repetitive, +etc, you may need to use the @samp{--dictionary-size} and +@samp{--match-length} options directly to achieve optimal performance. + +@multitable {Level} {Dictionary size} {Match length limit} +@item Level @tab Dictionary size @tab Match length limit +@item -0 @tab 64 KiB @tab 16 bytes +@item -1 @tab 1 MiB @tab 5 bytes +@item -2 @tab 1.5 MiB @tab 6 bytes +@item -3 @tab 2 MiB @tab 8 bytes +@item -4 @tab 3 MiB @tab 12 bytes +@item -5 @tab 4 MiB @tab 20 bytes +@item -6 @tab 8 MiB @tab 36 bytes +@item -7 @tab 16 MiB @tab 68 bytes +@item -8 @tab 24 MiB @tab 132 bytes +@item -9 @tab 32 MiB @tab 273 bytes +@end multitable + +@item --fast +@itemx --best +Aliases for GNU gzip compatibility. + +@item --loose-trailing +When decompressing or testing, allow trailing data whose first bytes are +so similar to the magic bytes of a lzip header that they can be confused +with a corrupt header. Use this option if a file triggers a "corrupt +header" error and the cause is not indeed a corrupt header. + +@end table + +Numbers given as arguments to options may be followed by a multiplier +and an optional @samp{B} for "byte". + +Table of SI and binary prefixes (unit multipliers): + +@multitable {Prefix} {kilobyte (10^3 = 1000)} {|} {Prefix} {kibibyte (2^10 = 1024)} +@item Prefix @tab Value @tab | @tab Prefix @tab Value +@item k @tab kilobyte (10^3 = 1000) @tab | @tab Ki @tab kibibyte (2^10 = 1024) +@item M @tab megabyte (10^6) @tab | @tab Mi @tab mebibyte (2^20) +@item G @tab gigabyte (10^9) @tab | @tab Gi @tab gibibyte (2^30) +@item T @tab terabyte (10^12) @tab | @tab Ti @tab tebibyte (2^40) +@item P @tab petabyte (10^15) @tab | @tab Pi @tab pebibyte (2^50) +@item E @tab exabyte (10^18) @tab | @tab Ei @tab exbibyte (2^60) +@item Z @tab zettabyte (10^21) @tab | @tab Zi @tab zebibyte (2^70) +@item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80) +@end multitable + +@sp 1 +Exit status: 0 for a normal exit, 1 for environmental problems (file not +found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or +invalid input file, 3 for an internal consistency error (eg, bug) which +caused minilzip to panic. + + @node Data format @chapter Data format @cindex data format @@ -655,9 +854,9 @@ represents one byte; a box like this: represents a variable number of bytes. @sp 1 -A lzip data stream consists of a series of "members" (compressed data -sets). The members simply appear one after another in the data stream, -with no additional information before, between, or after them. +A lzip data stream consists of a series of "members" (compressed data sets). +The members simply appear one after another in the data stream, with no +additional information before, between, or after them. Each member has the following structure: @verbatim @@ -810,15 +1009,15 @@ Example 5: Multimember compression (@var{member_size} < total output). Example 6: Multimember compression (user-restarted members). @example - 1) LZ_compress_open + 1) LZ_compress_open (with @var{member_size} > largest member). 2) LZ_compress_write 3) LZ_compress_read 4) go back to step 2 until member termination is desired 5) LZ_compress_finish 6) LZ_compress_read 7) go back to step 6 until LZ_compress_member_finished returns 1 - 8) verify that LZ_compress_finished returns 1 - 9) go to step 12 if all input data have been written + 9) go to step 12 if all input data have been written and + LZ_compress_finished returns 1 10) LZ_compress_restart_member 11) go back to step 2 12) LZ_compress_close diff --git a/doc/minilzip.1 b/doc/minilzip.1 index 3fefa02..a5f30c8 100644 --- a/doc/minilzip.1 +++ b/doc/minilzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH MINILZIP "1" "April 2017" "minilzip 1.9" "User Commands" +.TH MINILZIP "1" "February 2018" "minilzip 1.10" "User Commands" .SH NAME minilzip \- reduces the size of files .SH SYNOPSIS @@ -49,7 +49,7 @@ suppress all messages set dictionary size limit in bytes [8 MiB] .TP \fB\-S\fR, \fB\-\-volume\-size=\fR<bytes> -set volume size limit in bytes +set volume size limit in bytes, implies \fB\-k\fR .TP \fB\-t\fR, \fB\-\-test\fR test compressed file integrity @@ -65,6 +65,9 @@ alias for \fB\-0\fR .TP \fB\-\-best\fR alias for \fB\-9\fR +.TP +\fB\-\-loose\-trailing\fR +allow trailing data seeming corrupt header .PP If no file names are given, or if a file is '\-', minilzip compresses or decompresses from standard input to standard output. @@ -87,8 +90,8 @@ Report bugs to lzip\-bug@nongnu.org .br Lzlib home page: http://www.nongnu.org/lzip/lzlib.html .SH COPYRIGHT -Copyright \(co 2017 Antonio Diaz Diaz. -Using lzlib 1.9 +Copyright \(co 2018 Antonio Diaz Diaz. +Using lzlib 1.10 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -287,7 +287,7 @@ static inline bool LZe_init( struct LZ_encoder * const e, const int dict_size, const int len_limit, const unsigned long long member_size ) { - enum { before = max_num_trials, + enum { before_size = max_num_trials, /* bytes to keep in buffer after pos */ after_size = max_num_trials + ( 2 * max_match_len ) + 1, dict_factor = 2, @@ -295,7 +295,7 @@ static inline bool LZe_init( struct LZ_encoder * const e, pos_array_factor = 2, min_free_bytes = 2 * max_num_trials }; - if( !LZeb_init( &e->eb, before, dict_size, after_size, dict_factor, + if( !LZeb_init( &e->eb, before_size, dict_size, after_size, dict_factor, num_prev_positions23, pos_array_factor, min_free_bytes, member_size ) ) return false; e->cycles = ( len_limit < max_match_len ) ? 16 + ( len_limit / 2 ) : 256; diff --git a/encoder_base.c b/encoder_base.c index be9af5d..0dbc617 100644 --- a/encoder_base.c +++ b/encoder_base.c @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -39,39 +39,39 @@ static bool Mb_normalize_pos( struct Matchfinder_base * const mb ) } -static bool Mb_init( struct Matchfinder_base * const mb, - const int before, const int dict_size, - const int after_size, const int dict_factor, - const int num_prev_positions23, +static bool Mb_init( struct Matchfinder_base * const mb, const int before_size, + const int dict_size, const int after_size, + const int dict_factor, const int num_prev_positions23, const int pos_array_factor ) { const int buffer_size_limit = - ( dict_factor * dict_size ) + before + after_size; + ( dict_factor * dict_size ) + before_size + after_size; unsigned size; int i; mb->partial_data_pos = 0; - mb->before_size = before; + mb->before_size = before_size; mb->after_size = after_size; mb->pos = 0; mb->cyclic_pos = 0; mb->stream_pos = 0; + mb->num_prev_positions23 = num_prev_positions23; mb->at_stream_end = false; mb->flushing = false; mb->buffer_size = max( 65536, buffer_size_limit ); mb->buffer = (uint8_t *)malloc( mb->buffer_size ); if( !mb->buffer ) return false; + mb->saved_dictionary_size = dict_size; mb->dictionary_size = dict_size; mb->pos_limit = mb->buffer_size - after_size; size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 ); if( mb->dictionary_size > 1 << 26 ) /* 64 MiB */ size >>= 1; mb->key4_mask = size - 1; - mb->num_prev_positions23 = num_prev_positions23; size += num_prev_positions23; - mb->num_prev_positions = size; + mb->pos_array_size = pos_array_factor * ( mb->dictionary_size + 1 ); size += mb->pos_array_size; if( size * sizeof mb->prev_positions[0] <= size ) mb->prev_positions = 0; @@ -84,21 +84,25 @@ static bool Mb_init( struct Matchfinder_base * const mb, } +static void Mb_adjust_array( struct Matchfinder_base * const mb ) + { + int size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 ); + if( mb->dictionary_size > 1 << 26 ) /* 64 MiB */ + size >>= 1; + mb->key4_mask = size - 1; + size += mb->num_prev_positions23; + mb->num_prev_positions = size; + mb->pos_array = mb->prev_positions + mb->num_prev_positions; + } + + static void Mb_adjust_dictionary_size( struct Matchfinder_base * const mb ) { if( mb->stream_pos < mb->dictionary_size ) { - int size; - mb->buffer_size = - mb->dictionary_size = - mb->pos_limit = max( min_dictionary_size, mb->stream_pos ); - size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 ); - if( mb->dictionary_size > 1 << 26 ) /* 64 MiB */ - size >>= 1; - mb->key4_mask = size - 1; - size += mb->num_prev_positions23; - mb->num_prev_positions = size; - mb->pos_array = mb->prev_positions + mb->num_prev_positions; + mb->dictionary_size = max( min_dictionary_size, mb->stream_pos ); + Mb_adjust_array( mb ); + mb->pos_limit = mb->buffer_size; } } @@ -114,6 +118,9 @@ static void Mb_reset( struct Matchfinder_base * const mb ) mb->cyclic_pos = 0; mb->at_stream_end = false; mb->flushing = false; + mb->dictionary_size = mb->saved_dictionary_size; + Mb_adjust_array( mb ); + mb->pos_limit = mb->buffer_size - mb->after_size; for( i = 0; i < mb->num_prev_positions; ++i ) mb->prev_positions[i] = 0; } @@ -180,7 +187,7 @@ static void LZeb_reset( struct LZ_encoder_base * const eb, Bm_array_init( eb->bm_align, dis_align_size ); Lm_init( &eb->match_len_model ); Lm_init( &eb->rep_len_model ); - Re_reset( &eb->renc ); + Re_reset( &eb->renc, eb->mb.dictionary_size ); for( i = 0; i < num_rep_distances; ++i ) eb->reps[i] = 0; eb->state = 0; eb->member_finished = false; diff --git a/encoder_base.h b/encoder_base.h index c4cfcca..ddba7e6 100644 --- a/encoder_base.h +++ b/encoder_base.h @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -238,16 +238,16 @@ struct Matchfinder_base int num_prev_positions23; int num_prev_positions; /* size of prev_positions */ int pos_array_size; + int saved_dictionary_size; /* dictionary_size restored by Mb_reset */ bool at_stream_end; /* stream_pos shows real end of file */ bool flushing; }; static bool Mb_normalize_pos( struct Matchfinder_base * const mb ); -static bool Mb_init( struct Matchfinder_base * const mb, - const int before, const int dict_size, - const int after_size, const int dict_factor, - const int num_prev_positions23, +static bool Mb_init( struct Matchfinder_base * const mb, const int before_size, + const int dict_size, const int after_size, + const int dict_factor, const int num_prev_positions23, const int pos_array_factor ); static inline void Mb_free( struct Matchfinder_base * const mb ) @@ -339,7 +339,8 @@ static inline void Re_shift_low( struct Range_encoder * const renc ) renc->low = ( renc->low & 0x00FFFFFFU ) << 8; } -static inline void Re_reset( struct Range_encoder * const renc ) +static inline void Re_reset( struct Range_encoder * const renc, + const unsigned dictionary_size ) { int i; Cb_reset( &renc->cb ); @@ -348,6 +349,7 @@ static inline void Re_reset( struct Range_encoder * const renc ) renc->range = 0xFFFFFFFFU; renc->ff_count = 0; renc->cache = 0; + Fh_set_dictionary_size( renc->header, dictionary_size ); for( i = 0; i < Fh_size; ++i ) Cb_put_byte( &renc->cb, renc->header[i] ); } @@ -359,8 +361,7 @@ static inline bool Re_init( struct Range_encoder * const renc, if( !Cb_init( &renc->cb, 65536 + min_free_bytes ) ) return false; renc->min_free_bytes = min_free_bytes; Fh_set_magic( renc->header ); - Fh_set_dictionary_size( renc->header, dictionary_size ); - Re_reset( renc ); + Re_reset( renc, dictionary_size ); return true; } @@ -544,14 +545,14 @@ static void LZeb_reset( struct LZ_encoder_base * const eb, const unsigned long long member_size ); static inline bool LZeb_init( struct LZ_encoder_base * const eb, - const int before, const int dict_size, + const int before_size, const int dict_size, const int after_size, const int dict_factor, const int num_prev_positions23, const int pos_array_factor, const unsigned min_free_bytes, const unsigned long long member_size ) { - if( !Mb_init( &eb->mb, before, dict_size, after_size, dict_factor, + if( !Mb_init( &eb->mb, before_size, dict_size, after_size, dict_factor, num_prev_positions23, pos_array_factor ) ) return false; if( !Re_init( &eb->renc, eb->mb.dictionary_size, min_free_bytes ) ) return false; diff --git a/fast_encoder.c b/fast_encoder.c index af81137..1ba80e0 100644 --- a/fast_encoder.c +++ b/fast_encoder.c @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/fast_encoder.h b/fast_encoder.h index c02e9cb..47dfb3e 100644 --- a/fast_encoder.h +++ b/fast_encoder.h @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -53,7 +53,7 @@ static inline bool FLZe_update_and_move( struct FLZ_encoder * const fe, int n ) static inline bool FLZe_init( struct FLZ_encoder * const fe, const unsigned long long member_size ) { - enum { before = 0, + enum { before_size = 0, dict_size = 65536, /* bytes to keep in buffer after pos */ after_size = max_match_len, @@ -62,7 +62,7 @@ static inline bool FLZe_init( struct FLZ_encoder * const fe, pos_array_factor = 1, min_free_bytes = max_marker_size }; - return LZeb_init( &fe->eb, before, dict_size, after_size, dict_factor, + return LZeb_init( &fe->eb, before_size, dict_size, after_size, dict_factor, num_prev_positions23, pos_array_factor, min_free_bytes, member_size ); } @@ -1,19 +1,20 @@ /* Lzcheck - Test program for the lzlib library - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute and modify it. Usage is: - lzcheck filename.txt + lzcheck filename.txt... - This program reads the specified text file and then compresses it, + This program reads each specified text file and then compresses it, line by line, to test the flushing mechanism and the member restart/reset/sync functions. */ #define _FILE_OFFSET_BITS 64 +#include <ctype.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> @@ -34,6 +35,15 @@ uint8_t mid_buffer[buffer_size]; uint8_t out_buffer[buffer_size]; +void show_line( const uint8_t * const buffer, const int size ) + { + int i; + for( i = 0; i < size; ++i ) + fputc( isprint( buffer[i] ) ? buffer[i] : '.', stderr ); + fputc( '\n', stderr ); + } + + int lzcheck( FILE * const file, const int dictionary_size ) { const int match_len_limit = 16; @@ -66,7 +76,7 @@ int lzcheck( FILE * const file, const int dictionary_size ) while( retval <= 1 ) { - int i, l, r; + int l, r; const int read_size = fread( in_buffer, 1, buffer_size, file ); if( read_size <= 0 ) break; /* end of file */ @@ -95,15 +105,11 @@ int lzcheck( FILE * const file, const int dictionary_size ) if( out_size != in_size || memcmp( in_buffer + l, out_buffer, out_size ) ) { - fprintf( stderr, "lzcheck: Sync error at pos %d in_size = %d, out_size = %d\n", + fprintf( stderr, "lzcheck: Sync error at pos %d in_size = %d, " + "out_size = %d\n", l, in_size, out_size ); - for( i = 0; i < in_size; ++i ) - fputc( in_buffer[l+i], stderr ); - if( in_buffer[l+in_size-1] != '\n' ) - fputc( '\n', stderr ); - for( i = 0; i < out_size; ++i ) - fputc( out_buffer[i], stderr ); - fputc( '\n', stderr ); + show_line( in_buffer + l, in_size ); + show_line( out_buffer, out_size ); retval = 1; } } @@ -113,7 +119,8 @@ int lzcheck( FILE * const file, const int dictionary_size ) { rewind( file ); if( LZ_compress_finish( encoder ) < 0 || - LZ_decompress_write( decoder, mid_buffer, LZ_compress_read( encoder, mid_buffer, buffer_size ) ) < 0 || + LZ_decompress_write( decoder, mid_buffer, + LZ_compress_read( encoder, mid_buffer, buffer_size ) ) < 0 || LZ_decompress_read( decoder, out_buffer, buffer_size ) != 0 || LZ_compress_restart_member( encoder, member_size ) < 0 ) { @@ -125,7 +132,7 @@ int lzcheck( FILE * const file, const int dictionary_size ) while( retval <= 1 ) { - int i, l, r, size; + int l, r, size; const int read_size = fread( in_buffer, 1, buffer_size / 2, file ); if( read_size <= 0 ) break; /* end of file */ @@ -133,7 +140,7 @@ int lzcheck( FILE * const file, const int dictionary_size ) { int leading_garbage, in_size, mid_size, out_size; while( r < read_size && in_buffer[r-1] != '\n' ) ++r; - leading_garbage = (l == 0) ? min( r, read_size / 2 ) : 0; + leading_garbage = (l == 0) ? min( r, read_size ) / 2 : 0; in_size = LZ_compress_write( encoder, in_buffer + l, r - l ); if( in_size < r - l ) r = l + in_size; LZ_compress_sync_flush( encoder ); @@ -167,22 +174,19 @@ int lzcheck( FILE * const file, const int dictionary_size ) if( out_size != in_size || memcmp( in_buffer + l, out_buffer, out_size ) ) { - fprintf( stderr, "lzcheck: Sync error at pos %d in_size = %d, out_size = %d, leading garbage = %d\n", + fprintf( stderr, "lzcheck: Sync error at pos %d in_size = %d, " + "out_size = %d, leading garbage = %d\n", l, in_size, out_size, leading_garbage ); - for( i = 0; i < in_size; ++i ) - fputc( in_buffer[l+i], stderr ); - if( in_buffer[l+in_size-1] != '\n' ) - fputc( '\n', stderr ); - for( i = 0; i < out_size; ++i ) - fputc( out_buffer[i], stderr ); - fputc( '\n', stderr ); + show_line( in_buffer + l, in_size ); + show_line( out_buffer, out_size ); retval = 1; } } if( retval >= 3 ) break; if( LZ_compress_finish( encoder ) < 0 || - LZ_decompress_write( decoder, mid_buffer, LZ_compress_read( encoder, mid_buffer, buffer_size ) ) < 0 || + LZ_decompress_write( decoder, mid_buffer, + LZ_compress_read( encoder, mid_buffer, buffer_size ) ) < 0 || LZ_decompress_read( decoder, out_buffer, buffer_size ) != 0 || LZ_decompress_reset( decoder ) < 0 || LZ_compress_restart_member( encoder, member_size ) < 0 ) @@ -195,7 +199,8 @@ int lzcheck( FILE * const file, const int dictionary_size ) size = min( 100, read_size ); if( LZ_compress_write( encoder, in_buffer, size ) != size || LZ_compress_finish( encoder ) < 0 || - LZ_decompress_write( decoder, mid_buffer, LZ_compress_read( encoder, mid_buffer, buffer_size ) ) < 0 || + LZ_decompress_write( decoder, mid_buffer, + LZ_compress_read( encoder, mid_buffer, buffer_size ) ) < 0 || LZ_decompress_read( decoder, out_buffer, 0 ) != 0 || LZ_decompress_sync_to_member( decoder ) < 0 || LZ_compress_restart_member( encoder, member_size ) < 0 ) @@ -214,26 +219,34 @@ int lzcheck( FILE * const file, const int dictionary_size ) int main( const int argc, const char * const argv[] ) { - FILE * file; - int retval; + int retval = 0, i; + int open_failures = 0; + const bool verbose = ( argc > 2 ); if( argc < 2 ) { - fputs( "Usage: lzcheck filename.txt\n", stderr ); + fputs( "Usage: lzcheck filename.txt...\n", stderr ); return 1; } - file = fopen( argv[1], "rb" ); - if( !file ) + for( i = 1; i < argc && retval == 0; ++ i ) { - fprintf( stderr, "lzcheck: Can't open file '%s' for reading.\n", argv[1] ); - return 1; - } -/* fprintf( stderr, "lzcheck: Testing file '%s'\n", argv[1] ); */ + FILE * file = fopen( argv[i], "rb" ); + if( !file ) + { + fprintf( stderr, "lzcheck: Can't open file '%s' for reading.\n", argv[i] ); + ++open_failures; continue; + } + if( verbose ) fprintf( stderr, " Testing file '%s'\n", argv[i] ); - retval = lzcheck( file, 65535 ); /* 65535,16 chooses fast encoder */ - if( retval == 0 ) - { rewind( file ); retval = lzcheck( file, 1 << 20 ); } - fclose( file ); + retval = lzcheck( file, 65535 ); /* 65535,16 chooses fast encoder */ + if( retval == 0 ) + { rewind( file ); retval = lzcheck( file, 1 << 20 ); } + fclose( file ); + } + if( open_failures > 0 && verbose ) + fprintf( stderr, "lzcheck: warning: %d %s failed to open.\n", + open_failures, ( open_failures == 1 ) ? "file" : "files" ); + if( retval == 0 && open_failures ) retval = 1; return retval; } @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -204,12 +204,21 @@ static inline void Fh_set_magic( File_header data ) static inline bool Fh_verify_magic( const File_header data ) { return ( memcmp( data, magic_string, 4 ) == 0 ); } -/* detect truncated header */ -static inline bool Fh_verify_prefix( const File_header data, const int size ) +/* detect (truncated) header */ +static inline bool Fh_verify_prefix( const File_header data, const int sz ) { - int i; for( i = 0; i < size && i < 4; ++i ) + int i; for( i = 0; i < sz && i < 4; ++i ) if( data[i] != magic_string[i] ) return false; - return ( size > 0 ); + return ( sz > 0 ); + } + +/* detect corrupt header */ +static inline bool Fh_verify_corrupt( const File_header data ) + { + int matches = 0; + int i; for( i = 0; i < 4; ++i ) + if( data[i] == magic_string[i] ) ++matches; + return ( matches > 1 && matches < 4 ); } static inline uint8_t Fh_version( const File_header data ) @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -193,7 +193,7 @@ int LZ_compress_finish( struct LZ_Encoder * const e ) /* if (open --> write --> finish) use same dictionary size as lzip. */ /* this does not save any memory. */ if( Mb_data_position( &e->lz_encoder_base->mb ) == 0 && - LZ_compress_total_out_size( e ) == Fh_size ) + Re_member_position( &e->lz_encoder_base->renc ) == Fh_size ) { Mb_adjust_dictionary_size( &e->lz_encoder_base->mb ); Fh_set_dictionary_size( e->lz_encoder_base->renc.header, @@ -417,7 +417,7 @@ int LZ_decompress_read( struct LZ_Decoder * const d, !d->rdec->at_stream_end ) return 0; if( Rd_finished( d->rdec ) && !d->first_header ) return 0; rd = Rd_read_data( d->rdec, d->member_header, Fh_size ); - if( Rd_finished( d->rdec ) ) + if( Rd_finished( d->rdec ) ) /* End Of File */ { if( rd <= 0 || Fh_verify_prefix( d->member_header, rd ) ) d->lz_errno = LZ_unexpected_eof; @@ -431,7 +431,12 @@ int LZ_decompress_read( struct LZ_Decoder * const d, /* unreading the header prevents sync_to_member from skipping a member if leading garbage is shorter than a full header; "lgLZIP\x01\x0C" */ if( Rd_unread_data( d->rdec, rd ) ) - d->lz_errno = LZ_header_error; + { + if( d->first_header || !Fh_verify_corrupt( d->member_header ) ) + d->lz_errno = LZ_header_error; + else + d->lz_errno = LZ_data_error; /* corrupt header */ + } else d->lz_errno = LZ_library_error; d->fatal = true; @@ -440,7 +445,10 @@ int LZ_decompress_read( struct LZ_Decoder * const d, if( !Fh_verify_version( d->member_header ) || !isvalid_ds( Fh_get_dictionary_size( d->member_header ) ) ) { - d->lz_errno = LZ_data_error; /* bad version or bad dict size */ + if( Rd_unread_data( d->rdec, 1 + !Fh_verify_version( d->member_header ) ) ) + d->lz_errno = LZ_data_error; /* bad version or bad dict size */ + else + d->lz_errno = LZ_library_error; d->fatal = true; return -1; } @@ -469,10 +477,10 @@ int LZ_decompress_read( struct LZ_Decoder * const d, result = LZd_decode_member( d->lz_decoder ); if( result != 0 ) { - if( result == 2 ) - { d->lz_errno = LZ_unexpected_eof; - d->rdec->member_position += Cb_used_bytes( &d->rdec->cb ); - Cb_reset( &d->rdec->cb ); } + if( result == 2 ) /* set position at EOF */ + { d->rdec->member_position += Cb_used_bytes( &d->rdec->cb ); + Cb_reset( &d->rdec->cb ); + d->lz_errno = LZ_unexpected_eof; } else if( result == 5 ) d->lz_errno = LZ_library_error; else d->lz_errno = LZ_data_error; d->fatal = true; @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -23,7 +23,7 @@ extern "C" { #define LZ_API_VERSION 1 -static const char * const LZ_version_string = "1.9"; +static const char * const LZ_version_string = "1.10"; enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error, LZ_sequence_error, LZ_header_error, LZ_unexpected_eof, @@ -1,5 +1,5 @@ /* Minilzip - Test program for the lzlib library - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -79,7 +79,7 @@ int verbosity = 0; const char * const Program_name = "Minilzip"; const char * const program_name = "minilzip"; -const char * const program_year = "2017"; +const char * const program_year = "2018"; const char * invocation_name = 0; const struct { const char * from; const char * to; } known_extensions[] = { @@ -100,9 +100,79 @@ int outfd = -1; bool delete_output_on_interrupt = false; +static void show_help( void ) + { + printf( "%s - Test program for the lzlib library.\n", Program_name ); + printf( "\nUsage: %s [options] [files]\n", invocation_name ); + printf( "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -a, --trailing-error exit with error status if trailing data\n" + " -b, --member-size=<bytes> set member size limit in bytes\n" + " -c, --stdout write to standard output, keep input files\n" + " -d, --decompress decompress\n" + " -f, --force overwrite existing output files\n" + " -F, --recompress force re-compression of compressed files\n" + " -k, --keep keep (don't delete) input files\n" + " -m, --match-length=<bytes> set match length limit in bytes [36]\n" + " -o, --output=<file> if reading standard input, write to <file>\n" + " -q, --quiet suppress all messages\n" + " -s, --dictionary-size=<bytes> set dictionary size limit in bytes [8 MiB]\n" + " -S, --volume-size=<bytes> set volume size limit in bytes, implies -k\n" + " -t, --test test compressed file integrity\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " -0 .. -9 set compression level [default 6]\n" + " --fast alias for -0\n" + " --best alias for -9\n" + " --loose-trailing allow trailing data seeming corrupt header\n" + "If no file names are given, or if a file is '-', minilzip compresses or\n" + "decompresses from standard input to standard output.\n" + "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" + "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" + "Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12\n" + "to 2^29 bytes.\n" + "\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n" + "scale optimal for all files. If your files are large, very repetitive,\n" + "etc, you may need to use the --dictionary-size and --match-length\n" + "options directly to achieve optimal performance.\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" + "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" + "invalid input file, 3 for an internal consistency error (eg, bug) which\n" + "caused minilzip to panic.\n" + "\nReport bugs to lzip-bug@nongnu.org\n" + "Lzlib home page: http://www.nongnu.org/lzip/lzlib.html\n" ); + } + + +static void show_version( void ) + { + printf( "%s %s\n", program_name, PROGVERSION ); + printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); + printf( "Using lzlib %s\n", LZ_version() ); + printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n" ); + } + + +/* assure at least a minimum size for buffer 'buf' */ +static void * resize_buffer( void * buf, const unsigned min_size ) + { + if( buf ) buf = realloc( buf, min_size ); + else buf = malloc( min_size ); + if( !buf ) + { + show_error( "Not enough memory.", 0, false ); + cleanup_and_fail( 1 ); + } + return buf; + } + + struct Pretty_print { const char * name; + char * padded_name; const char * stdin_name; unsigned longest_name; bool first_post; @@ -110,11 +180,12 @@ struct Pretty_print static void Pp_init( struct Pretty_print * const pp, const char * const filenames[], - const int num_filenames, const int verbosity ) + const int num_filenames ) { unsigned stdin_name_len; int i; pp->name = 0; + pp->padded_name = 0; pp->stdin_name = "(stdin)"; pp->longest_name = 0; pp->first_post = false; @@ -133,9 +204,19 @@ static void Pp_init( struct Pretty_print * const pp, static inline void Pp_set_name( struct Pretty_print * const pp, const char * const filename ) { + unsigned name_len, padded_name_len, i = 0; + if( filename && filename[0] && strcmp( filename, "-" ) != 0 ) pp->name = filename; else pp->name = pp->stdin_name; + name_len = strlen( pp->name ); + padded_name_len = max( name_len, pp->longest_name ) + 4; + pp->padded_name = resize_buffer( pp->padded_name, padded_name_len + 1 ); + while( i < 2 ) pp->padded_name[i++] = ' '; + while( i < name_len + 2 ) { pp->padded_name[i] = pp->name[i-2]; ++i; } + pp->padded_name[i++] = ':'; + while( i < padded_name_len ) pp->padded_name[i++] = ' '; + pp->padded_name[i] = 0; pp->first_post = true; } @@ -148,11 +229,8 @@ static void Pp_show_msg( struct Pretty_print * const pp, const char * const msg { if( pp->first_post ) { - unsigned i; pp->first_post = false; - fprintf( stderr, " %s: ", pp->name ); - for( i = strlen( pp->name ); i < pp->longest_name; ++i ) - fputc( ' ', stderr ); + fputs( pp->padded_name, stderr ); if( !msg ) fflush( stderr ); } if( msg ) fprintf( stderr, "%s\n", msg ); @@ -160,77 +238,20 @@ static void Pp_show_msg( struct Pretty_print * const pp, const char * const msg } -static void show_help( void ) - { - printf( "%s - Test program for the lzlib library.\n", Program_name ); - printf( "\nUsage: %s [options] [files]\n", invocation_name ); - printf( "\nOptions:\n" - " -h, --help display this help and exit\n" - " -V, --version output version information and exit\n" - " -a, --trailing-error exit with error status if trailing data\n" - " -b, --member-size=<bytes> set member size limit in bytes\n" - " -c, --stdout write to standard output, keep input files\n" - " -d, --decompress decompress\n" - " -f, --force overwrite existing output files\n" - " -F, --recompress force re-compression of compressed files\n" - " -k, --keep keep (don't delete) input files\n" - " -m, --match-length=<bytes> set match length limit in bytes [36]\n" - " -o, --output=<file> if reading standard input, write to <file>\n" - " -q, --quiet suppress all messages\n" - " -s, --dictionary-size=<bytes> set dictionary size limit in bytes [8 MiB]\n" - " -S, --volume-size=<bytes> set volume size limit in bytes\n" - " -t, --test test compressed file integrity\n" - " -v, --verbose be verbose (a 2nd -v gives more)\n" - " -0 .. -9 set compression level [default 6]\n" - " --fast alias for -0\n" - " --best alias for -9\n" - "If no file names are given, or if a file is '-', minilzip compresses or\n" - "decompresses from standard input to standard output.\n" - "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" - "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" - "Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12\n" - "to 2^29 bytes.\n" - "\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n" - "scale optimal for all files. If your files are large, very repetitive,\n" - "etc, you may need to use the --dictionary-size and --match-length\n" - "options directly to achieve optimal performance.\n" - "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" - "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" - "invalid input file, 3 for an internal consistency error (eg, bug) which\n" - "caused minilzip to panic.\n" - "\nReport bugs to lzip-bug@nongnu.org\n" - "Lzlib home page: http://www.nongnu.org/lzip/lzlib.html\n" ); - } - - -static void show_version( void ) - { - printf( "%s %s\n", program_name, PROGVERSION ); - printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); - printf( "Using lzlib %s\n", LZ_version() ); - printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n" - "This is free software: you are free to change and redistribute it.\n" - "There is NO WARRANTY, to the extent permitted by law.\n" ); - } - - static void show_header( const unsigned dictionary_size ) { - if( verbosity >= 3 ) - { - enum { factor = 1024 }; - const char * const prefix[8] = - { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; - const char * p = ""; - const char * np = " "; - unsigned num = dictionary_size, i; - bool exact = ( num % factor == 0 ); - - for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) - { num /= factor; if( num % factor != 0 ) exact = false; - p = prefix[i]; np = ""; } - fprintf( stderr, "dictionary %s%4u %sB. ", np, num, p ); - } + enum { factor = 1024 }; + const char * const prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + const char * p = ""; + const char * np = " "; + unsigned num = dictionary_size; + bool exact = ( num % factor == 0 ); + + int i; for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; + p = prefix[i]; np = ""; } + fprintf( stderr, "dictionary %s%4u %sB, ", np, num, p ); } @@ -317,6 +338,43 @@ static int extension_index( const char * const name ) } +static void set_c_outname( const char * const name, const bool force_ext, + const bool multifile ) + { + output_filename = resize_buffer( output_filename, strlen( name ) + 5 + + strlen( known_extensions[0].from ) + 1 ); + strcpy( output_filename, name ); + if( multifile ) strcat( output_filename, "00001" ); + if( force_ext || multifile || extension_index( output_filename ) < 0 ) + strcat( output_filename, known_extensions[0].from ); + } + + +static void set_d_outname( const char * const name, const int eindex ) + { + const unsigned name_len = strlen( name ); + if( eindex >= 0 ) + { + const char * const from = known_extensions[eindex].from; + const unsigned from_len = strlen( from ); + if( name_len > from_len ) + { + output_filename = resize_buffer( output_filename, name_len + + strlen( known_extensions[eindex].to ) + 1 ); + strcpy( output_filename, name ); + strcpy( output_filename + name_len - from_len, known_extensions[eindex].to ); + return; + } + } + output_filename = resize_buffer( output_filename, name_len + 4 + 1 ); + strcpy( output_filename, name ); + strcat( output_filename, ".out" ); + if( verbosity >= 1 ) + fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", + program_name, name, output_filename ); + } + + static int open_instream( const char * const name, struct stat * const in_statsp, const enum Mode program_mode, const int eindex, const bool recompress, const bool to_stdout ) @@ -357,55 +415,6 @@ static int open_instream( const char * const name, struct stat * const in_statsp } -/* assure at least a minimum size for buffer 'buf' */ -static void * resize_buffer( void * buf, const unsigned min_size ) - { - if( buf ) buf = realloc( buf, min_size ); - else buf = malloc( min_size ); - if( !buf ) - { - show_error( "Not enough memory.", 0, false ); - cleanup_and_fail( 1 ); - } - return buf; - } - - -static void set_c_outname( const char * const name, const bool multifile ) - { - output_filename = resize_buffer( output_filename, strlen( name ) + 5 + - strlen( known_extensions[0].from ) + 1 ); - strcpy( output_filename, name ); - if( multifile ) strcat( output_filename, "00001" ); - strcat( output_filename, known_extensions[0].from ); - } - - -static void set_d_outname( const char * const name, const int eindex ) - { - const unsigned name_len = strlen( name ); - if( eindex >= 0 ) - { - const char * const from = known_extensions[eindex].from; - const unsigned from_len = strlen( from ); - if( name_len > from_len ) - { - output_filename = resize_buffer( output_filename, name_len + - strlen( known_extensions[eindex].to ) + 1 ); - strcpy( output_filename, name ); - strcpy( output_filename + name_len - from_len, known_extensions[eindex].to ); - return; - } - } - output_filename = resize_buffer( output_filename, name_len + 4 + 1 ); - strcpy( output_filename, name ); - strcat( output_filename, ".out" ); - if( verbosity >= 1 ) - fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", - program_name, name, output_filename ); - } - - static bool open_outstream( const bool force, const bool from_stdin ) { const mode_t usr_rw = S_IRUSR | S_IWUSR; @@ -552,8 +561,8 @@ static bool next_filename( void ) static int do_compress( struct LZ_Encoder * const encoder, const unsigned long long member_size, - const unsigned long long volume_size, - const int infd, struct Pretty_print * const pp, + const unsigned long long volume_size, const int infd, + struct Pretty_print * const pp, const struct stat * const in_statsp ) { unsigned long long partial_volume_size = 0; @@ -597,7 +606,8 @@ static int do_compress( struct LZ_Encoder * const encoder, return 1; } } - else if( in_size == 0 ) internal_error( "library error (LZ_compress_read)." ); + else if( in_size == 0 ) + internal_error( "library error (LZ_compress_read)." ); if( LZ_compress_member_finished( encoder ) ) { unsigned long long size; @@ -638,11 +648,11 @@ static int do_compress( struct LZ_Encoder * const encoder, if( in_size == 0 || out_size == 0 ) fputs( " no data compressed.\n", stderr ); else - fprintf( stderr, "%6.3f:1, %6.3f bits/byte, " - "%5.2f%% saved, %llu in, %llu out.\n", + fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved, " + "%llu in, %llu out.\n", (double)in_size / out_size, - ( 8.0 * out_size ) / in_size, - 100.0 * ( 1.0 - ( (double)out_size / in_size ) ), + ( 100.0 * out_size ) / in_size, + 100.0 - ( ( 100.0 * out_size ) / in_size ), in_size, out_size ); } return 0; @@ -677,8 +687,8 @@ static int compress( const unsigned long long member_size, static int do_decompress( struct LZ_Decoder * const decoder, const int infd, - struct Pretty_print * const pp, - const bool ignore_trailing, const bool testing ) + struct Pretty_print * const pp, const bool ignore_trailing, + const bool loose_trailing, const bool testing ) { enum { buffer_size = 65536 }; uint8_t buffer[buffer_size]; @@ -723,73 +733,107 @@ static int do_decompress( struct LZ_Decoder * const decoder, const int infd, { const unsigned long long data_size = LZ_decompress_data_position( decoder ); const unsigned long long member_size = LZ_decompress_member_position( decoder ); - Pp_show_msg( pp, 0 ); - show_header( LZ_decompress_dictionary_size( decoder ) ); - if( verbosity >= 2 && data_size > 0 && member_size > 0 ) - fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", + if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) + Pp_show_msg( pp, 0 ); + if( verbosity >= 2 ) + { + if( verbosity >= 4 ) + show_header( LZ_decompress_dictionary_size( decoder ) ); + if( data_size == 0 || member_size == 0 ) + fputs( "no data compressed. ", stderr ); + else + fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved. ", (double)data_size / member_size, - ( 8.0 * member_size ) / data_size, - 100.0 * ( 1.0 - ( (double)member_size / data_size ) ) ); - if( verbosity >= 4 ) - fprintf( stderr, "CRC %08X, decompressed %9llu, compressed %8llu. ", - LZ_decompress_data_crc( decoder ), data_size, member_size ); - fputs( testing ? "ok\n" : "done\n", stderr ); + ( 100.0 * member_size ) / data_size, + 100.0 - ( ( 100.0 * member_size ) / data_size ) ); + if( verbosity >= 4 ) + fprintf( stderr, "CRC %08X, ", LZ_decompress_data_crc( decoder ) ); + if( verbosity >= 3 ) + fprintf( stderr, "decompressed %9llu, compressed %8llu. ", + data_size, member_size ); + fputs( testing ? "ok\n" : "done\n", stderr ); Pp_reset( pp ); + } } - first_member = false; Pp_reset( pp ); + first_member = false; } if( rd <= 0 ) break; } if( out_size < 0 || ( first_member && out_size == 0 ) ) { + const unsigned long long member_pos = LZ_decompress_member_position( decoder ); const enum LZ_Errno lz_errno = LZ_decompress_errno( decoder ); - if( lz_errno == LZ_unexpected_eof && - LZ_decompress_member_position( decoder ) <= 6 ) - { Pp_show_msg( pp, "File ends unexpectedly at member header." ); - return 2; } + if( lz_errno == LZ_library_error ) + internal_error( "library error (LZ_decompress_read)." ); + if( member_pos <= 6 ) + { + if( lz_errno == LZ_unexpected_eof ) + { + if( first_member ) + show_file_error( pp->name, "File ends unexpectedly at member header.", 0 ); + else + Pp_show_msg( pp, "Truncated header in multimember file." ); + return 2; + } + else if( lz_errno == LZ_data_error ) + { + if( member_pos == 4 ) + { if( verbosity >= 0 ) + { Pp_show_msg( pp, 0 ); + fprintf( stderr, "Version %d member format not supported.\n", + LZ_decompress_member_version( decoder ) ); } } + else if( member_pos == 5 ) + Pp_show_msg( pp, "Invalid dictionary size in member header." ); + else if( first_member ) /* for lzlib older than 1.10 */ + Pp_show_msg( pp, "Bad version or dictionary size in member header." ); + else if( !loose_trailing ) + Pp_show_msg( pp, "Corrupt header in multimember file." ); + else if( !ignore_trailing ) + Pp_show_msg( pp, "Trailing data not allowed." ); + else break; /* trailing data */ + return 2; + } + } if( lz_errno == LZ_header_error ) { if( first_member ) - { show_file_error( pp->name, - "Bad magic number (file not in lzip format).", 0 ); - return 2; } + show_file_error( pp->name, + "Bad magic number (file not in lzip format).", 0 ); else if( !ignore_trailing ) - { show_file_error( pp->name, "Trailing data not allowed.", 0 ); - return 2; } - break; + Pp_show_msg( pp, "Trailing data not allowed." ); + else break; /* trailing data */ + return 2; } if( lz_errno == LZ_mem_error ) { Pp_show_msg( pp, "Not enough memory." ); return 1; } if( verbosity >= 0 ) { Pp_show_msg( pp, 0 ); - if( lz_errno == LZ_unexpected_eof ) - fprintf( stderr, "File ends unexpectedly at pos %llu\n", - LZ_decompress_total_in_size( decoder ) ); - else - fprintf( stderr, "Decoder error at pos %llu: %s\n", - LZ_decompress_total_in_size( decoder ), - LZ_strerror( LZ_decompress_errno( decoder ) ) ); + fprintf( stderr, "%s at pos %llu\n", ( lz_errno == LZ_unexpected_eof ) ? + "File ends unexpectedly" : "Decoder error", + LZ_decompress_total_in_size( decoder ) ); } return 2; } if( LZ_decompress_finished( decoder ) == 1 ) break; if( in_size == 0 && out_size == 0 ) - internal_error( "library error (LZ_decompress_read)." ); + internal_error( "library error (stalled)." ); } + if( verbosity == 1 ) fputs( testing ? "ok\n" : "done\n", stderr ); return 0; } static int decompress( const int infd, struct Pretty_print * const pp, - const bool ignore_trailing, const bool testing ) + const bool ignore_trailing, + const bool loose_trailing, const bool testing ) { struct LZ_Decoder * const decoder = LZ_decompress_open(); int retval; if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) { Pp_show_msg( pp, "Not enough memory." ); retval = 1; } - else retval = do_decompress( decoder, infd, pp, ignore_trailing, testing ); - + else retval = do_decompress( decoder, infd, pp, ignore_trailing, + loose_trailing, testing ); LZ_decompress_close( decoder ); return retval; } @@ -868,50 +912,53 @@ int main( const int argc, const char * const argv[] ) const char * default_output_filename = ""; const char ** filenames = 0; int num_filenames = 0; - int infd = -1; enum Mode program_mode = m_compress; int argind = 0; + int failed_tests = 0; int retval = 0; int i; bool filenames_given = false; bool force = false; bool ignore_trailing = true; bool keep_input_files = false; + bool loose_trailing = false; bool recompress = false; bool stdin_used = false; bool to_stdout = false; struct Pretty_print pp; + enum { opt_lt = 256 }; const struct ap_Option options[] = { - { '0', "fast", ap_no }, - { '1', 0, ap_no }, - { '2', 0, ap_no }, - { '3', 0, ap_no }, - { '4', 0, ap_no }, - { '5', 0, ap_no }, - { '6', 0, ap_no }, - { '7', 0, ap_no }, - { '8', 0, ap_no }, - { '9', "best", ap_no }, - { 'a', "trailing-error", ap_no }, - { 'b', "member-size", ap_yes }, - { 'c', "stdout", ap_no }, - { 'd', "decompress", ap_no }, - { 'f', "force", ap_no }, - { 'F', "recompress", ap_no }, - { 'h', "help", ap_no }, - { 'k', "keep", ap_no }, - { 'm', "match-length", ap_yes }, - { 'n', "threads", ap_yes }, - { 'o', "output", ap_yes }, - { 'q', "quiet", ap_no }, - { 's', "dictionary-size", ap_yes }, - { 'S', "volume-size", ap_yes }, - { 't', "test", ap_no }, - { 'v', "verbose", ap_no }, - { 'V', "version", ap_no }, - { 0 , 0, ap_no } }; + { '0', "fast", ap_no }, + { '1', 0, ap_no }, + { '2', 0, ap_no }, + { '3', 0, ap_no }, + { '4', 0, ap_no }, + { '5', 0, ap_no }, + { '6', 0, ap_no }, + { '7', 0, ap_no }, + { '8', 0, ap_no }, + { '9', "best", ap_no }, + { 'a', "trailing-error", ap_no }, + { 'b', "member-size", ap_yes }, + { 'c', "stdout", ap_no }, + { 'd', "decompress", ap_no }, + { 'f', "force", ap_no }, + { 'F', "recompress", ap_no }, + { 'h', "help", ap_no }, + { 'k', "keep", ap_no }, + { 'm', "match-length", ap_yes }, + { 'n', "threads", ap_yes }, + { 'o', "output", ap_yes }, + { 'q', "quiet", ap_no }, + { 's', "dictionary-size", ap_yes }, + { 'S', "volume-size", ap_yes }, + { 't', "test", ap_no }, + { 'v', "verbose", ap_no }, + { 'V', "version", ap_no }, + { opt_lt, "loose-trailing", ap_no }, + { 0 , 0, ap_no } }; struct Arg_parser parser; @@ -957,6 +1004,7 @@ int main( const int argc, const char * const argv[] ) case 't': program_mode = m_test; break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; + case opt_lt: loose_trailing = true; break; default : internal_error( "uncaught option." ); } } /* end process options */ @@ -983,12 +1031,13 @@ int main( const int argc, const char * const argv[] ) ( filenames_given || default_output_filename[0] ) ) set_signals(); - Pp_init( &pp, filenames, num_filenames, verbosity ); + Pp_init( &pp, filenames, num_filenames ); output_filename = resize_buffer( output_filename, 1 ); for( i = 0; i < num_filenames; ++i ) { const char * input_filename = ""; + int infd; int tmp; struct stat in_stats; const struct stat * in_statsp; @@ -1005,17 +1054,17 @@ int main( const int argc, const char * const argv[] ) else { if( program_mode == m_compress ) - set_c_outname( default_output_filename, volume_size > 0 ); + set_c_outname( default_output_filename, false, volume_size > 0 ); else { output_filename = resize_buffer( output_filename, - strlen( default_output_filename ) + 1 ); + strlen( default_output_filename ) + 1 ); strcpy( output_filename, default_output_filename ); } if( !open_outstream( force, true ) ) { if( retval < 1 ) retval = 1; - close( infd ); infd = -1; + close( infd ); continue; } } @@ -1033,12 +1082,12 @@ int main( const int argc, const char * const argv[] ) else { if( program_mode == m_compress ) - set_c_outname( input_filename, volume_size > 0 ); + set_c_outname( input_filename, true, volume_size > 0 ); else set_d_outname( input_filename, eindex ); if( !open_outstream( force, false ) ) { if( retval < 1 ) retval = 1; - close( infd ); infd = -1; + close( infd ); continue; } } @@ -1049,7 +1098,7 @@ int main( const int argc, const char * const argv[] ) if( !check_tty( pp.name, infd, program_mode ) ) { if( retval < 1 ) retval = 1; - if( program_mode == m_test ) { close( infd ); infd = -1; continue; } + if( program_mode == m_test ) { close( infd ); continue; } cleanup_and_fail( retval ); } @@ -1058,24 +1107,32 @@ int main( const int argc, const char * const argv[] ) tmp = compress( member_size, volume_size, infd, &encoder_options, &pp, in_statsp ); else - tmp = decompress( infd, &pp, ignore_trailing, program_mode == m_test ); + tmp = decompress( infd, &pp, ignore_trailing, + loose_trailing, program_mode == m_test ); if( tmp > retval ) retval = tmp; - if( tmp && program_mode != m_test ) cleanup_and_fail( retval ); + if( tmp ) + { if( program_mode != m_test ) cleanup_and_fail( retval ); + else ++failed_tests; } if( delete_output_on_interrupt ) close_and_set_permissions( in_statsp ); if( input_filename[0] ) { - close( infd ); infd = -1; - if( !keep_input_files && !to_stdout && program_mode != m_test ) + close( infd ); + if( !keep_input_files && !to_stdout && program_mode != m_test && + ( program_mode != m_compress || volume_size == 0 ) ) remove( input_filename ); } } if( outfd >= 0 && close( outfd ) != 0 ) { - show_error( "Can't close stdout", errno, false ); + show_error( "Error closing stdout", errno, false ); if( retval < 1 ) retval = 1; } + if( failed_tests > 0 && verbosity >= 1 && num_filenames > 1 ) + fprintf( stderr, "%s: warning: %d %s failed the test.\n", + program_name, failed_tests, + ( failed_tests == 1 ) ? "file" : "files" ); free( output_filename ); free( filenames ); ap_free( &parser ); diff --git a/testsuite/check.sh b/testsuite/check.sh index 037e2c9..929a690 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh -# check script for Lzlib - A compression library for lzip files -# Copyright (C) 2009-2017 Antonio Diaz Diaz. +# check script for Lzlib - Compression library for the lzip format +# Copyright (C) 2009-2018 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -54,6 +54,8 @@ done [ $? = 2 ] || test_failed $LINENO "${LZIP}" -cdq < in [ $? = 2 ] || test_failed $LINENO +"${LZIP}" -dq -o in < "${in_lz}" +[ $? = 1 ] || test_failed $LINENO # these are for code coverage "${LZIP}" -t -- nx_file 2> /dev/null [ $? = 1 ] || test_failed $LINENO @@ -97,6 +99,11 @@ printf "to be overwritten" > copy || framework_failure "${LZIP}" -df copy.lz { [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; } || test_failed $LINENO +rm -f copy +cat "${in_lz}" > copy.lz || framework_failure +"${LZIP}" -d -S100k copy.lz +{ [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; } || test_failed $LINENO + printf "to be overwritten" > copy || framework_failure "${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO cmp in copy || test_failed $LINENO @@ -136,7 +143,7 @@ cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure "${LZIP}" -cd in2.lz > copy2 || test_failed $LINENO cmp in2 copy2 || test_failed $LINENO -"${LZIP}" -s16 --output=copy2 < in2 || test_failed $LINENO +"${LZIP}" -s16 --output=copy2.lz < in2 || test_failed $LINENO "${LZIP}" -t copy2.lz || test_failed $LINENO "${LZIP}" -cd copy2.lz > copy2 || test_failed $LINENO cmp in2 copy2 || test_failed $LINENO @@ -192,10 +199,14 @@ for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do done cat in in in in in in in in > in8 || framework_failure -"${LZIP}" -1s12 -S100k -o out < in8 || test_failed $LINENO -"${LZIP}" -t out00001.lz out00002.lz || test_failed $LINENO -"${LZIP}" -cd out00001.lz out00002.lz | cmp in8 - || test_failed $LINENO -rm -f out00001.lz +"${LZIP}" -1s12 -S100k in8 || test_failed $LINENO +"${LZIP}" -t in800001.lz in800002.lz || test_failed $LINENO +"${LZIP}" -cd in800001.lz in800002.lz | cmp in8 - || test_failed $LINENO +rm -f in800001.lz in800002.lz +"${LZIP}" -1s12 -S100k -o out.lz < in8 || test_failed $LINENO +"${LZIP}" -t out.lz00001.lz out.lz00002.lz || test_failed $LINENO +"${LZIP}" -cd out.lz00001.lz out.lz00002.lz | cmp in8 - || test_failed $LINENO +rm -f out.lz00001.lz out.lz00002.lz "${LZIP}" -1ks4Ki -b100000 in8 || test_failed $LINENO "${LZIP}" -t in8.lz || test_failed $LINENO "${LZIP}" -cd in8.lz | cmp in8 - || test_failed $LINENO @@ -203,7 +214,15 @@ rm -f in8 "${LZIP}" -0 -S100k -o out < in8.lz || test_failed $LINENO "${LZIP}" -t out00001.lz out00002.lz || test_failed $LINENO "${LZIP}" -cd out00001.lz out00002.lz | cmp in8.lz - || test_failed $LINENO +rm -f out00001.lz +"${LZIP}" -1 -S100k -o out < in8.lz || test_failed $LINENO +"${LZIP}" -t out00001.lz out00002.lz || test_failed $LINENO +"${LZIP}" -cd out00001.lz out00002.lz | cmp in8.lz - || test_failed $LINENO rm -f out00001.lz out00002.lz +"${LZIP}" -0 -F -S100k in8.lz || test_failed $LINENO +"${LZIP}" -t in8.lz00001.lz in8.lz00002.lz || test_failed $LINENO +"${LZIP}" -cd in8.lz00001.lz in8.lz00002.lz | cmp in8.lz - || test_failed $LINENO +rm -f in8.lz00001.lz in8.lz00002.lz "${LZIP}" -0kF -b100k in8.lz || test_failed $LINENO "${LZIP}" -t in8.lz.lz || test_failed $LINENO "${LZIP}" -cd in8.lz.lz | cmp in8.lz - || test_failed $LINENO @@ -219,12 +238,37 @@ rm -f in8.lz in8.lz.lz printf "\ntesting bad input..." +headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP' +body='\001\014\000\203\377\373\377\377\300\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000$\000\000\000\000\000\000\000' +cat "${in_lz}" > in0.lz +printf "LZIP${body}" >> in0.lz +if "${LZIP}" -tq in0.lz ; then + for header in ${headers} ; do + printf "${header}${body}" > in0.lz # first member + "${LZIP}" -tq in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + cat "${in_lz}" > in0.lz + printf "${header}${body}" >> in0.lz # trailing data + "${LZIP}" -tq in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -t --loose-trailing in0.lz + [ $? = 0 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing --trailing-error in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + done +else + printf "\nwarning: skipping header test: 'printf' does not work on your system." +fi +rm -f in0.lz + cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && [ -e trunc.lz ] && cmp in2.lz trunc.lz > /dev/null 2>&1 ; then for i in 6 20 14734 14753 14754 14755 14756 14757 14758 ; do dd if=in3.lz of=trunc.lz bs=$i count=1 2> /dev/null - "${LZIP}" -t trunc.lz 2> /dev/null + "${LZIP}" -tq trunc.lz [ $? = 2 ] || test_failed $LINENO $i "${LZIP}" -tq < trunc.lz [ $? = 2 ] || test_failed $LINENO $i @@ -236,6 +280,7 @@ if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && else printf "\nwarning: skipping truncation test: 'dd' does not work on your system." fi +rm -f in3.lz trunc.lz cat "${in_lz}" > ingin.lz || framework_failure printf "g" >> ingin.lz || framework_failure @@ -246,6 +291,7 @@ cmp in copy || test_failed $LINENO "${LZIP}" -t < ingin.lz || test_failed $LINENO "${LZIP}" -d < ingin.lz > copy || test_failed $LINENO cmp in copy || test_failed $LINENO +rm -f ingin.lz echo if [ ${fail} = 0 ] ; then |