diff options
-rw-r--r-- | AUTHORS | 2 | ||||
-rw-r--r-- | ChangeLog | 23 | ||||
-rw-r--r-- | INSTALL | 17 | ||||
-rw-r--r-- | Makefile.in | 7 | ||||
-rw-r--r-- | NEWS | 24 | ||||
-rw-r--r-- | README | 41 | ||||
-rw-r--r-- | bbexample.c | 12 | ||||
-rw-r--r-- | carg_parser.c | 4 | ||||
-rw-r--r-- | carg_parser.h | 4 | ||||
-rw-r--r-- | cbuffer.c | 4 | ||||
-rwxr-xr-x | configure | 27 | ||||
-rw-r--r-- | decoder.c | 14 | ||||
-rw-r--r-- | decoder.h | 18 | ||||
-rw-r--r-- | doc/lzlib.info | 251 | ||||
-rw-r--r-- | doc/lzlib.texi | 274 | ||||
-rw-r--r-- | doc/minilzip.1 | 54 | ||||
-rw-r--r-- | encoder.c | 7 | ||||
-rw-r--r-- | encoder.h | 2 | ||||
-rw-r--r-- | encoder_base.c | 6 | ||||
-rw-r--r-- | encoder_base.h | 17 | ||||
-rw-r--r-- | fast_encoder.c | 2 | ||||
-rw-r--r-- | fast_encoder.h | 2 | ||||
-rw-r--r-- | ffexample.c | 10 | ||||
-rw-r--r-- | lzcheck.c | 55 | ||||
-rw-r--r-- | lzip.h | 48 | ||||
-rw-r--r-- | lzlib.c | 80 | ||||
-rw-r--r-- | lzlib.h | 6 | ||||
-rw-r--r-- | minilzip.c | 202 | ||||
-rwxr-xr-x | testsuite/check.sh | 187 |
29 files changed, 746 insertions, 654 deletions
@@ -1,7 +1,7 @@ Lzlib was written by Antonio Diaz Diaz. The ideas embodied in lzlib are due to (at least) the following people: -Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the +Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrei Markov (for the definition of Markov chains), G.N.N. Martin (for the definition of range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI). @@ -1,8 +1,19 @@ +2024-01-20 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.14 released. + * minilzip.c: Reformat file diagnostics as 'PROGRAM: FILE: MESSAGE'. + (show_option_error): New function showing argument and option name. + (main): Make -o preserve date/mode/owner if 1 input file. + * lzip.h: Rename verify_* to check_*. + * lzlib.texi: Document the need to declare uint8_t before lzlib.h. + (Reported by Michal Górny). + * configure, Makefile.in: New variable 'MAKEINFO'. + * INSTALL: Document use of CFLAGS+='--std=c99 -D_XOPEN_SOURCE=500'. + 2022-01-23 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.13 released. - * Set variables AR and ARFLAGS from configure. - (Reported by Hoël Bézier). + * configure: Set variables AR and ARFLAGS. (Reported by Hoël Bézier). * main.c: Rename to minilzip.c. * minilzip.c (getnum): Show option name and valid range if error. (check_lib): Check that LZ_API_VERSION and LZ_version_string match. @@ -29,7 +40,7 @@ Do not open output if input is a terminal. Replace 'decompressed', 'compressed' with 'out', 'in' in output. Set a valid invocation_name even if argc == 0. - * lzlib.texi: Document the new way of verifying the library version. + * lzlib.texi: Document the new way of checking the library version. Document that 'LZ_(de)compress_close' and 'LZ_(de)compress_errno' can be called with a null argument. Document that sync flush marker is not allowed in lzip files. @@ -158,8 +169,8 @@ * encoder.h, encoder.cc: Optimize pair price calculations, reducing compression time for large values of '--match-length' by up to 6%. * main.cc: New option '-F, --recompress'. - * Makefile.in: 'make install' no longer tries to run - '/sbin/ldconfig' on systems lacking it. + * Makefile.in: 'make install' no longer tries to run '/sbin/ldconfig' + on systems lacking it. 2011-01-03 Antonio Diaz Diaz <ant_diaz@teleline.es> @@ -248,7 +259,7 @@ * Version 0.1 released. -Copyright (C) 2009-2022 Antonio Diaz Diaz. +Copyright (C) 2009-2024 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute, and @@ -18,8 +18,8 @@ Procedure or lzip -cd lzlib[version].tar.lz | tar -xf - -This creates the directory ./lzlib[version] containing the source from -the main archive. +This creates the directory ./lzlib[version] containing the source code +extracted from the archive. 2. Change to lzlib directory and run configure. (Try 'configure --help' for usage instructions). @@ -27,6 +27,10 @@ the main archive. cd lzlib[version] ./configure + If you choose a C standard, enable the POSIX features explicitly: + + ./configure CFLAGS+='--std=c99 -D_XOPEN_SOURCE=500' + If you are compiling on MinGW, use: ./configure CFLAGS+='-D __USE_MINGW_ANSI_STDIO' @@ -38,7 +42,8 @@ the main archive. 4. Optionally, type 'make check' to run the tests that come with lzlib. 5. Type 'make install' to install the library and any data files and - documentation. (You may need to run ldconfig also). + documentation. You need root privileges to install into a prefix owned + by root. (You may need to run ldconfig also). Or type 'make install-compress', which additionally compresses the info manual after installation. @@ -66,15 +71,15 @@ object files and executables to go and run the 'configure' script. 'configure' automatically checks for the source code in '.', in '..', and in the directory that 'configure' is in. -'configure' recognizes the option '--srcdir=DIR' to control where to -look for the sources. Usually 'configure' can determine that directory +'configure' recognizes the option '--srcdir=DIR' to control where to look +for the source code. Usually 'configure' can determine that directory automatically. After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2009-2022 Antonio Diaz Diaz. +Copyright (C) 2009-2024 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/Makefile.in b/Makefile.in index 81b404b..de54626 100644 --- a/Makefile.in +++ b/Makefile.in @@ -52,6 +52,10 @@ lzlib_sh.o : lzlib.c %.o : %.c $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< +# prevent 'make' from trying to remake source files +$(VPATH)/configure $(VPATH)/Makefile.in $(VPATH)/doc/$(pkgname).texi : ; +%.h %.c : ; + lzdeps = lzlib.h lzip.h cbuffer.c decoder.h decoder.c encoder_base.h \ encoder_base.c encoder.h encoder.c fast_encoder.h fast_encoder.c @@ -64,13 +68,12 @@ bbexample.o : Makefile lzlib.h ffexample.o : Makefile lzlib.h lzcheck.o : Makefile lzlib.h - doc : info man info : $(VPATH)/doc/$(pkgname).info $(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi - cd $(VPATH)/doc && makeinfo $(pkgname).texi + cd $(VPATH)/doc && $(MAKEINFO) $(pkgname).texi man : $(VPATH)/doc/$(progname).1 @@ -1,15 +1,19 @@ -Changes in version 1.13: +Changes in version 1.14: -The variables AR and ARFLAGS can now be set from configure. (Before you -needed to run 'make AR=<ar_command>'. (Reported by Hoël Bézier). +In minilzip, file diagnostics have been reformatted as 'PROGRAM: FILE: MESSAGE'. -In case of error in a numerical argument to a command line option, minilzip -now shows the name of the option and the range of valid values. +In minilzip, diagnostics caused by invalid arguments to command-line options +now show the argument and the name of the option. -'minilzip --check-lib' now checks that LZ_API_VERSION and LZ_version_string -match. +The option '-o, --output' of minilzip now preserves dates, permissions, and +ownership of the file, when (de)compressing exactly one file. -Several descriptions have been improved in manual, '--help', and man page. +It has been documented in the manual that it is the responsibility of the +program using lzlib to include before 'lzlib.h' some header that declares +the type 'uint8_t'. (Reported by Michal Górny). -The texinfo category of the manual has been changed from 'Data Compression' -to 'Compression' to match that of gzip. (Reported by Alfred M. Szmidt). +The variable MAKEINFO has been added to configure and Makefile.in. + +It has been documented in INSTALL that when choosing a C standard, the POSIX +features need to be enabled explicitly: + ./configure CFLAGS+='--std=c99 -D_XOPEN_SOURCE=500' @@ -34,6 +34,12 @@ are declared in the file 'lzlib.h'. Usage examples of the library are given in the files 'bbexample.c', 'ffexample.c', and 'minilzip.c' from the source distribution. +As 'lzlib.h' can be used by C and C++ programs, it must not impose a choice +of system headers on the program by including one of them. Therefore it is +the responsibility of the program using lzlib to include before 'lzlib.h' +some header that declares the type 'uint8_t'. There are at least four such +headers in C and C++: 'stdint.h', 'cstdint', 'inttypes.h', and 'cinttypes'. + All the library functions are thread safe. The library does not install any signal handler. The decoder checks the consistency of the compressed data, so the library should never crash even in case of corrupted input. @@ -44,18 +50,18 @@ This interface is safer and less error prone than the traditional zlib interface. Compression/decompression is done when the read function is called. This -means the value returned by the position functions will not be updated until -a read call, even if a lot of data are written. If you want the data to be +means the value returned by the position functions is not updated until a +read call, even if a lot of data are written. If you want the data to be compressed in advance, just call the read function with a size equal to 0. -If all the data to be compressed are written in advance, lzlib will -automatically adjust the header of the compressed data to use the largest -dictionary size that does not exceed neither the data size nor the limit -given to 'LZ_compress_open'. This feature reduces the amount of memory -needed for decompression and allows minilzip to produce identical compressed -output as lzip. +If all the data to be compressed are written in advance, lzlib automatically +adjusts the header of the compressed data to use the largest dictionary size +that does not exceed neither the data size nor the limit given to +'LZ_compress_open'. This feature reduces the amount of memory needed for +decompression and allows minilzip to produce identical compressed output as +lzip. -Lzlib will correctly decompress a data stream which is the concatenation of +Lzlib correctly decompresses a data stream which is the concatenation of two or more compressed data streams. The result is the concatenation of the corresponding decompressed data streams. Integrity testing of concatenated compressed data streams is also supported. @@ -77,13 +83,13 @@ Lzlib currently implements two variants of the LZMA algorithm: fast (used by option '-0' of minilzip) and normal (used by all other compression levels). The high compression of LZMA comes from combining two basic, well-proven -compression ideas: sliding dictionaries (LZ77/78) and markov models (the -thing used by every compression algorithm that uses a range encoder or -similar order-0 entropy coder as its last stage) with segregation of -contexts according to what the bits are used for. +compression ideas: sliding dictionaries (LZ77) and markov models (the thing +used by every compression algorithm that uses a range encoder or similar +order-0 entropy coder as its last stage) with segregation of contexts +according to what the bits are used for. The ideas embodied in lzlib are due to (at least) the following people: -Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the +Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrei Markov (for the definition of Markov chains), G.N.N. Martin (for the definition of range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI). @@ -93,11 +99,10 @@ been compressed. Decompressed is used to refer to data which have undergone the process of decompression. -Copyright (C) 2009-2022 Antonio Diaz Diaz. +Copyright (C) 2009-2024 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. -The file Makefile.in is a data file used by configure to produce the -Makefile. It has the same copyright owner and permissions that configure -itself. +The file Makefile.in is a data file used by configure to produce the Makefile. +It has the same copyright owner and permissions that configure itself. diff --git a/bbexample.c b/bbexample.c index 074f7ae..50ccf33 100644 --- a/bbexample.c +++ b/bbexample.c @@ -1,5 +1,5 @@ /* Buffer to buffer example - Test program for the library lzlib - Copyright (C) 2010-2022 Antonio Diaz Diaz. + Copyright (C) 2010-2024 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute, and modify it. @@ -38,7 +38,7 @@ uint8_t * read_file( const char * const name, long * const file_sizep ) uint8_t * buffer, * tmp; FILE * const f = fopen( name, "rb" ); if( !f ) - { fprintf( stderr, "bbexample: Can't open input file '%s': %s\n", + { fprintf( stderr, "bbexample: %s: Can't open input file: %s\n", name, strerror( errno ) ); return 0; } buffer = (uint8_t *)malloc( buffer_size ); @@ -50,7 +50,7 @@ uint8_t * read_file( const char * const name, long * const file_sizep ) { if( buffer_size >= LONG_MAX ) { - fprintf( stderr, "bbexample: Input file '%s' is too large.\n", name ); + fprintf( stderr, "bbexample: %s: Input file is too large.\n", name ); free( buffer ); fclose( f ); return 0; } buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX; @@ -63,7 +63,7 @@ uint8_t * read_file( const char * const name, long * const file_sizep ) } if( ferror( f ) || !feof( f ) ) { - fprintf( stderr, "bbexample: Error reading file '%s': %s\n", + fprintf( stderr, "bbexample: %s: Error reading file: %s\n", name, strerror( errno ) ); free( buffer ); fclose( f ); return 0; } @@ -86,8 +86,8 @@ uint8_t * bbcompressl( const uint8_t * const inbuf, const long insize, int dictionary_size; /* 4 KiB .. 512 MiB */ int match_len_limit; /* 5 .. 273 */ }; - /* Mapping from gzip/bzip2 style 1..9 compression modes - to the corresponding LZMA compression modes. */ + /* Mapping from gzip/bzip2 style 0..9 compression levels to the + corresponding LZMA compression parameters. */ const struct Lzma_options option_mapping[] = { { 65535, 16 }, /* -0 (65535,16 chooses fast encoder) */ diff --git a/carg_parser.c b/carg_parser.c index 181ba23..edb4eb9 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,5 @@ -/* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2022 Antonio Diaz Diaz. +/* Arg_parser - POSIX/GNU command-line argument parser. (C version) + Copyright (C) 2006-2024 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/carg_parser.h b/carg_parser.h index 0c64861..69ce271 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,5 @@ -/* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2022 Antonio Diaz Diaz. +/* Arg_parser - POSIX/GNU command-line argument parser. (C version) + Copyright (C) 2006-2024 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2024 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -33,7 +33,7 @@ static inline bool Cb_init( struct Circular_buffer * const cb, cb->put = 0; cb->buffer = ( cb->buffer_size > 1 ) ? (uint8_t *)malloc( cb->buffer_size ) : 0; - return ( cb->buffer != 0 ); + return cb->buffer != 0; } static inline void Cb_free( struct Circular_buffer * const cb ) @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lzlib - Compression library for the lzip format -# Copyright (C) 2009-2022 Antonio Diaz Diaz. +# Copyright (C) 2009-2024 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute, and modify it. pkgname=lzlib -pkgversion=1.13 +pkgversion=1.14 soversion=1 progname=minilzip progname_static=${progname} @@ -34,6 +34,7 @@ CPPFLAGS= CFLAGS='-Wall -W -O2' LDFLAGS= ARFLAGS=-rcs +MAKEINFO=makeinfo # checking whether we are using GNU C. /bin/sh -c "${CC} --version" > /dev/null 2>&1 || { CC=cc ; CFLAGS=-O2 ; } @@ -67,7 +68,7 @@ while [ $# != 0 ] ; do echo "Options and variables: [defaults in brackets]" echo " -h, --help display this help and exit" echo " -V, --version output version information and exit" - echo " --srcdir=DIR find the sources in DIR [. or ..]" + echo " --srcdir=DIR find the source code in DIR [. or ..]" echo " --prefix=DIR install into DIR [${prefix}]" echo " --exec-prefix=DIR base directory for arch-dependent files [${exec_prefix}]" echo " --bindir=DIR user executables directory [${bindir}]" @@ -82,11 +83,12 @@ while [ $# != 0 ] ; do echo " --disable-ldconfig don't run ldconfig after install" echo " CC=COMPILER C compiler to use [${CC}]" echo " AR=ARCHIVER library archiver to use [${AR}]" - echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]" - echo " CFLAGS=OPTIONS command line options for the C compiler [${CFLAGS}]" + echo " CPPFLAGS=OPTIONS command-line options for the preprocessor [${CPPFLAGS}]" + echo " CFLAGS=OPTIONS command-line options for the C compiler [${CFLAGS}]" echo " CFLAGS+=OPTIONS append options to the current value of CFLAGS" - echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]" - echo " ARFLAGS=OPTIONS command line options for the library archiver [${ARFLAGS}]" + echo " LDFLAGS=OPTIONS command-line options for the linker [${LDFLAGS}]" + echo " ARFLAGS=OPTIONS command-line options for the library archiver [${ARFLAGS}]" + echo " MAKEINFO=NAME makeinfo program to use [${MAKEINFO}]" echo exit 0 ;; --version | -V) @@ -128,6 +130,7 @@ while [ $# != 0 ] ; do CFLAGS+=*) CFLAGS="${CFLAGS} ${optarg}" ;; LDFLAGS=*) LDFLAGS=${optarg} ;; ARFLAGS=*) ARFLAGS=${optarg} ;; + MAKEINFO=*) MAKEINFO=${optarg} ;; --*) echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; @@ -147,7 +150,7 @@ while [ $# != 0 ] ; do fi done -# Find the source files, if location was not specified. +# Find the source code, if location was not specified. srcdirtext= if [ -z "${srcdir}" ] ; then srcdirtext="or . or .." ; srcdir=. @@ -159,7 +162,7 @@ if [ -z "${srcdir}" ] ; then fi if [ ! -r "${srcdir}/${srctrigger}" ] ; then - echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2 + echo "configure: Can't find source code in ${srcdir} ${srcdirtext}" 1>&2 echo "configure: (At least ${srctrigger} is missing)." 1>&2 exit 1 fi @@ -179,7 +182,7 @@ if [ -z "${no_create}" ] ; then # This script is free software: you have unlimited permission # to copy, distribute, and modify it. -exec /bin/sh $0 ${args} --no-create +exec /bin/sh "$0" ${args} --no-create EOF chmod +x config.status fi @@ -200,10 +203,11 @@ echo "CPPFLAGS = ${CPPFLAGS}" echo "CFLAGS = ${CFLAGS}" echo "LDFLAGS = ${LDFLAGS}" echo "ARFLAGS = ${ARFLAGS}" +echo "MAKEINFO = ${MAKEINFO}" rm -f Makefile cat > Makefile << EOF # Makefile for Lzlib - Compression library for the lzip format -# Copyright (C) 2009-2022 Antonio Diaz Diaz. +# Copyright (C) 2009-2024 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission @@ -233,6 +237,7 @@ CPPFLAGS = ${CPPFLAGS} CFLAGS = ${CFLAGS} LDFLAGS = ${LDFLAGS} ARFLAGS = ${ARFLAGS} +MAKEINFO = ${MAKEINFO} EOF cat "${srcdir}/Makefile.in" >> Makefile @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2024 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -17,12 +17,12 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ -static int LZd_try_verify_trailer( struct LZ_decoder * const d ) +static int LZd_try_check_trailer( struct LZ_decoder * const d ) { Lzip_trailer trailer; if( Rd_available_bytes( d->rdec ) < Lt_size ) { if( !d->rdec->at_stream_end ) return 0; else return 2; } - d->verify_trailer_pending = false; + d->check_trailer_pending = false; d->member_finished = true; if( Rd_read_data( d->rdec, trailer, Lt_size ) == Lt_size && @@ -45,7 +45,7 @@ static int LZd_decode_member( struct LZ_decoder * const d ) if( d->member_finished ) return 0; if( !Rd_try_reload( rdec ) ) { if( !rdec->at_stream_end ) return 0; else return 2; } - if( d->verify_trailer_pending ) return LZd_try_verify_trailer( d ); + if( d->check_trailer_pending ) return LZd_try_check_trailer( d ); while( !Rd_finished( rdec ) ) { @@ -121,14 +121,14 @@ static int LZd_decode_member( struct LZ_decoder * const d ) old_mpos = mpos; */ if( len == min_match_len ) /* End Of Stream marker */ { - d->verify_trailer_pending = true; - return LZd_try_verify_trailer( d ); + d->check_trailer_pending = true; + return LZd_try_check_trailer( d ); } if( len == min_match_len + 1 ) /* Sync Flush marker */ { rdec->reload_pending = true; if( Rd_try_reload( rdec ) ) continue; - else { if( !rdec->at_stream_end ) return 0; else break; } + if( !rdec->at_stream_end ) return 0; else break; } return 4; } @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2024 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -50,7 +50,7 @@ static inline void Rd_finish( struct Range_decoder * const rdec ) { rdec->at_stream_end = true; } static inline bool Rd_enough_available_bytes( const struct Range_decoder * const rdec ) - { return ( Cb_used_bytes( &rdec->cb ) >= rd_min_available_bytes ); } + { return Cb_used_bytes( &rdec->cb ) >= rd_min_available_bytes; } static inline unsigned Rd_available_bytes( const struct Range_decoder * const rdec ) { return Cb_used_bytes( &rdec->cb ); } @@ -92,7 +92,7 @@ static bool Rd_find_header( struct Range_decoder * const rdec, header[i] = rdec->cb.buffer[get]; if( ++get >= rdec->cb.buffer_size ) get = 0; } - if( Lh_verify( header ) ) return true; + if( Lh_check( header ) ) return true; } if( ++rdec->cb.get >= rdec->cb.buffer_size ) rdec->cb.get = 0; ++*skippedp; @@ -137,12 +137,12 @@ static bool Rd_try_reload( struct Range_decoder * const rdec ) { if( rdec->reload_pending && Rd_available_bytes( rdec ) >= 5 ) { - int i; rdec->reload_pending = false; rdec->code = 0; - for( i = 0; i < 5; ++i ) rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); rdec->range = 0xFFFFFFFFU; - rdec->code &= rdec->range; /* make sure that first byte is discarded */ + Rd_get_byte( rdec ); /* discard first byte of the LZMA stream */ + int i; for( i = 0; i < 4; ++i ) + rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); } return !rdec->reload_pending; } @@ -334,8 +334,8 @@ struct LZ_decoder struct Range_decoder * rdec; unsigned dictionary_size; uint32_t crc; + bool check_trailer_pending; bool member_finished; - bool verify_trailer_pending; bool pos_wrapped; unsigned rep0; /* rep[0-3] latest four distances */ unsigned rep1; /* used for efficient coding of */ @@ -423,8 +423,8 @@ static inline bool LZd_init( struct LZ_decoder * const d, d->rdec = rde; d->dictionary_size = dict_size; d->crc = 0xFFFFFFFFU; + d->check_trailer_pending = false; d->member_finished = false; - d->verify_trailer_pending = false; d->pos_wrapped = false; /* prev_byte of first byte; also for LZd_peek( 0 ) on corrupt file */ d->cb.buffer[d->cb.buffer_size-1] = 0; @@ -453,7 +453,7 @@ static inline void LZd_free( struct LZ_decoder * const d ) { Cb_free( &d->cb ); } static inline bool LZd_member_finished( const struct LZ_decoder * const d ) - { return ( d->member_finished && Cb_empty( &d->cb ) ); } + { return d->member_finished && Cb_empty( &d->cb ); } static inline unsigned LZd_crc( const struct LZ_decoder * const d ) { return d->crc ^ 0xFFFFFFFFU; } diff --git a/doc/lzlib.info b/doc/lzlib.info index d81bc88..979c477 100644 --- a/doc/lzlib.info +++ b/doc/lzlib.info @@ -11,7 +11,7 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir) Lzlib Manual ************ -This manual is for Lzlib (version 1.13, 23 January 2022). +This manual is for Lzlib (version 1.14, 20 January 2024). * Menu: @@ -23,14 +23,14 @@ This manual is for Lzlib (version 1.13, 23 January 2022). * Decompression functions:: Descriptions of the decompression functions * Error codes:: Meaning of codes returned by functions * Error messages:: Error messages corresponding to error codes -* Invoking minilzip:: Command line interface of the test program +* Invoking minilzip:: Command-line interface of the test program * Data format:: Detailed format of the compressed data * Examples:: A small tutorial with examples * Problems:: Reporting bugs * Concept index:: Index of concepts - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2024 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute, and modify it. @@ -76,6 +76,13 @@ library are declared in the file 'lzlib.h'. Usage examples of the library are given in the files 'bbexample.c', 'ffexample.c', and 'minilzip.c' from the source distribution. + As 'lzlib.h' can be used by C and C++ programs, it must not impose a +choice of system headers on the program by including one of them. Therefore +it is the responsibility of the program using lzlib to include before +'lzlib.h' some header that declares the type 'uint8_t'. There are at least +four such headers in C and C++: 'stdint.h', 'cstdint', 'inttypes.h', and +'cinttypes'. + All the library functions are thread safe. The library does not install any signal handler. The decoder checks the consistency of the compressed data, so the library should never crash even in case of corrupted input. @@ -86,21 +93,21 @@ This interface is safer and less error prone than the traditional zlib interface. Compression/decompression is done when the read function is called. This -means the value returned by the position functions will not be updated until -a read call, even if a lot of data are written. If you want the data to be +means the value returned by the position functions is not updated until a +read call, even if a lot of data are written. If you want the data to be compressed in advance, just call the read function with a SIZE equal to 0. - If all the data to be compressed are written in advance, lzlib will -automatically adjust the header of the compressed data to use the largest + If all the data to be compressed are written in advance, lzlib +automatically adjusts the header of the compressed data to use the largest dictionary size that does not exceed neither the data size nor the limit given to 'LZ_compress_open'. This feature reduces the amount of memory -needed for decompression and allows minilzip to produce identical compressed -output as lzip. +needed for decompression and allows minilzip to produce identical +compressed output as lzip. - Lzlib will correctly decompress a data stream which is the concatenation -of two or more compressed data streams. The result is the concatenation of -the corresponding decompressed data streams. Integrity testing of -concatenated compressed data streams is also supported. + Lzlib correctly decompresses a data stream which is the concatenation of +two or more compressed data streams. The result is the concatenation of the +corresponding decompressed data streams. Integrity testing of concatenated +compressed data streams is also supported. Lzlib is able to compress and decompress streams of unlimited size by automatically creating multimember output. The members so created are large, @@ -111,22 +118,22 @@ concrete algorithm; it is more like "any algorithm using the LZMA coding scheme". For example, the option '-0' of lzip uses the scheme in almost the simplest way possible; issuing the longest match it can find, or a literal byte if it can't find a match. Inversely, a much more elaborated way of -finding coding sequences of minimum size than the one currently used by lzip -could be developed, and the resulting sequence could also be coded using the -LZMA coding scheme. +finding coding sequences of minimum size than the one currently used by +lzip could be developed, and the resulting sequence could also be coded +using the LZMA coding scheme. Lzlib currently implements two variants of the LZMA algorithm: fast (used by option '-0' of minilzip) and normal (used by all other compression levels). The high compression of LZMA comes from combining two basic, well-proven -compression ideas: sliding dictionaries (LZ77/78) and markov models (the -thing used by every compression algorithm that uses a range encoder or -similar order-0 entropy coder as its last stage) with segregation of -contexts according to what the bits are used for. +compression ideas: sliding dictionaries (LZ77) and markov models (the thing +used by every compression algorithm that uses a range encoder or similar +order-0 entropy coder as its last stage) with segregation of contexts +according to what the bits are used for. The ideas embodied in lzlib are due to (at least) the following people: -Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the +Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrei Markov (for the definition of Markov chains), G.N.N. Martin (for the definition of range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI). @@ -150,7 +157,7 @@ of them are declared in 'lzlib.h'. -- Constant: LZ_API_VERSION This constant is defined in 'lzlib.h' and works as a version test - macro. The application should verify at compile time that + macro. The application should check at compile time that LZ_API_VERSION is greater than or equal to the version required by the application: @@ -170,12 +177,13 @@ desire to have certain symbols and prototypes exposed. -- Function: int LZ_api_version ( void ) If LZ_API_VERSION >= 1012, this function is declared in 'lzlib.h' (else it doesn't exist). It returns the LZ_API_VERSION of the library object - code being used. The application should verify at run time that the + code being used. The application should check at run time that the value returned by 'LZ_api_version' is greater than or equal to the - version required by the application. An application may be dinamically + version required by the application. An application may be dynamically linked at run time with a different version of lzlib than the one it - was compiled for, and this should not break the program as long as the - library used provides the functionality required by the application. + was compiled for, and this should not break the application as long as + the library used provides the functionality required by the + application. #if defined LZ_API_VERSION && LZ_API_VERSION >= 1012 if( LZ_api_version() < 1012 ) @@ -258,7 +266,7 @@ File: lzlib.info, Node: Compression functions, Next: Decompression functions, These are the functions used to compress data. In case of error, all of them return -1 or 0, for signed and unsigned return values respectively, -except 'LZ_compress_open' whose return value must be verified by calling +except 'LZ_compress_open' whose return value must be checked by calling 'LZ_compress_errno' before using it. -- Function: struct LZ_Encoder * LZ_compress_open ( const int @@ -269,7 +277,7 @@ except 'LZ_compress_open' whose return value must be verified by calling LZ_compress functions, or a null pointer if the encoder could not be allocated. - The returned pointer must be verified by calling 'LZ_compress_errno' + The returned pointer must be checked by calling 'LZ_compress_errno' before using it. If 'LZ_compress_errno' does not return 'LZ_ok', the returned pointer must not be used and should be freed with 'LZ_compress_close' to avoid memory leaks. @@ -277,8 +285,8 @@ except 'LZ_compress_open' whose return value must be verified by calling DICTIONARY_SIZE sets the dictionary size to be used, in bytes. Valid values range from 4 KiB to 512 MiB. Note that dictionary sizes are quantized. If the size specified does not match one of the valid - sizes, it will be rounded upwards by adding up to - (DICTIONARY_SIZE / 8) to it. + sizes, it is rounded upwards by adding up to (DICTIONARY_SIZE / 8) to + it. MATCH_LEN_LIMIT sets the match length limit in bytes. Valid values range from 5 to 273. Larger values usually give better compression @@ -286,15 +294,14 @@ except 'LZ_compress_open' whose return value must be verified by calling If DICTIONARY_SIZE is 65535 and MATCH_LEN_LIMIT is 16, the fast variant of LZMA is chosen, which produces identical compressed output - as 'lzip -0'. (The dictionary size used will be rounded upwards to - 64 KiB). + as 'lzip -0'. (The dictionary size used is rounded upwards to 64 KiB). MEMBER_SIZE sets the member size limit in bytes. Valid values range from 4 KiB to 2 PiB. A small member size may degrade compression ratio, so use it only when needed. To produce a single-member data stream, give MEMBER_SIZE a value larger than the amount of data to be - produced. Values larger than 2 PiB will be reduced to 2 PiB to prevent - the uncompressed size of the member from overflowing. + produced. Values larger than 2 PiB are reduced to 2 PiB to prevent the + uncompressed size of the member from overflowing. -- Function: int LZ_compress_close ( struct LZ_Encoder * const ENCODER ) Frees all dynamically allocated data structures for this stream. This @@ -420,7 +427,7 @@ File: lzlib.info, Node: Decompression functions, Next: Error codes, Prev: Com These are the functions used to decompress data. In case of error, all of them return -1 or 0, for signed and unsigned return values respectively, -except 'LZ_decompress_open' whose return value must be verified by calling +except 'LZ_decompress_open' whose return value must be checked by calling 'LZ_decompress_errno' before using it. -- Function: struct LZ_Decoder * LZ_decompress_open ( void ) @@ -429,7 +436,7 @@ except 'LZ_decompress_open' whose return value must be verified by calling LZ_decompress functions, or a null pointer if the decoder could not be allocated. - The returned pointer must be verified by calling 'LZ_decompress_errno' + The returned pointer must be checked by calling 'LZ_decompress_errno' before using it. If 'LZ_decompress_errno' does not return 'LZ_ok', the returned pointer must not be used and should be freed with 'LZ_decompress_close' to avoid memory leaks. @@ -459,13 +466,13 @@ except 'LZ_decompress_open' whose return value must be verified by calling Resets the error state of DECODER and enters a search state that lasts until a new member header (or the end of the stream) is found. After a successful call to 'LZ_decompress_sync_to_member', data written with - 'LZ_decompress_write' will be consumed and 'LZ_decompress_read' will - return 0 until a header is found. + 'LZ_decompress_write' is consumed and 'LZ_decompress_read' returns 0 + until a header is found. - This function is useful to discard any data preceding the first member, - or to discard the rest of the current member, for example in case of a - data error. If the decoder is already at the beginning of a member, - this function does nothing. + This function is useful to discard any data preceding the first + member, or to discard the rest of the current member, for example in + case of a data error. If the decoder is already at the beginning of a + member, this function does nothing. -- Function: int LZ_decompress_read ( struct LZ_Decoder * const DECODER, uint8_t * const BUFFER, const int SIZE ) @@ -571,7 +578,7 @@ File: lzlib.info, Node: Error codes, Next: Error messages, Prev: Decompressio Most library functions return -1 to indicate that they have failed. But this return value only tells you that an error has occurred. To find out -what kind of error it was, you need to verify the error code by calling +what kind of error it was, you need to check the error code by calling 'LZ_(de)compress_errno'. Library functions don't change the value returned by @@ -639,19 +646,20 @@ File: lzlib.info, Node: Invoking minilzip, Next: Data format, Prev: Error mes 9 Invoking minilzip ******************* -Minilzip is a test program for the compression library lzlib, fully -compatible with lzip 1.4 or newer. +Minilzip is a test program for the compression library lzlib, compatible +with lzip 1.4 or newer. Lzip is a lossless data compressor with a user interface similar to the one of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov -chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity -checking to maximize interoperability and optimize safety. Lzip can compress -about as fast as gzip (lzip -0) or compress most files more than bzip2 -(lzip -9). Decompression speed is intermediate between gzip and bzip2. Lzip -is better than gzip and bzip2 from a data recovery perspective. Lzip has -been designed, written, and tested with great care to replace gzip and -bzip2 as the standard general-purpose compressed format for unix-like -systems. +chain-Algorithm' (LZMA) stream format to maximize interoperability. The +maximum dictionary size is 512 MiB so that any lzip file can be decompressed +on 32-bit machines. Lzip provides accurate and robust 3-factor integrity +checking. Lzip can compress about as fast as gzip (lzip -0) or compress most +files more than bzip2 (lzip -9). Decompression speed is intermediate between +gzip and bzip2. Lzip is better than gzip and bzip2 from a data recovery +perspective. Lzip has been designed, written, and tested with great care to +replace gzip and bzip2 as the standard general-purpose compressed format for +Unix-like systems. The format for running minilzip is: @@ -660,7 +668,8 @@ The format for running minilzip is: If no file names are specified, minilzip compresses (or decompresses) from standard input to standard output. A hyphen '-' used as a FILE argument means standard input. It can be mixed with other FILES and is read just -once, the first time it appears in the command line. +once, the first time it appears in the command line. Remember to prepend +'./' to any file name beginning with a hyphen, or use '--'. minilzip supports the following options: *Note Argument syntax: (arg_parser)Argument syntax. @@ -696,17 +705,18 @@ once, the first time it appears in the command line. members). This option (or '-o') is needed when reading from a named pipe (fifo) or from a device. Use it also to recover as much of the decompressed data as possible when decompressing a corrupt file. '-c' - overrides '-o' and '-S'. '-c' has no effect when testing or listing. + overrides '-o' and '-S'. '-c' has no effect when testing. '-d' '--decompress' - Decompress the files specified. If a file does not exist, can't be - opened, or the destination file already exists and '--force' has not - been specified, minilzip continues decompressing the rest of the files - and exits with error status 1. If a file fails to decompress, or is a - terminal, minilzip exits immediately with error status 2 without - decompressing the rest of the files. A terminal is considered an - uncompressed file, and therefore invalid. + Decompress the files specified. The integrity of the files specified is + checked. If a file does not exist, can't be opened, or the destination + file already exists and '--force' has not been specified, minilzip + continues decompressing the rest of the files and exits with error + status 1. If a file fails to decompress, or is a terminal, minilzip + exits immediately with error status 2 without decompressing the rest + of the files. A terminal is considered an uncompressed file, and + therefore invalid. '-f' '--force' @@ -725,17 +735,17 @@ once, the first time it appears in the command line. '--match-length=BYTES' When compressing, set the match length limit in bytes. After a match this long is found, the search is finished. Valid values range from 5 - to 273. Larger values usually give better compression ratios but longer - compression times. + to 273. Larger values usually give better compression ratios but + longer compression times. '-o FILE' '--output=FILE' - If '-c' has not been also specified, write the (de)compressed output to - FILE; keep input files unchanged. If compressing several files, each - file is compressed independently. (The output consists of a sequence of - independently compressed members). This option (or '-c') is needed when - reading from a named pipe (fifo) or from a device. '-o -' is - equivalent to '-c'. '-o' has no effect when testing or listing. + If '-c' has not been also specified, write the (de)compressed output + to FILE; keep input files unchanged. If compressing several files, + each file is compressed independently. (The output consists of a + sequence of independently compressed members). This option (or '-c') + is needed when reading from a named pipe (fifo) or from a device. + '-o -' is equivalent to '-c'. '-o' has no effect when testing. When compressing and splitting the output in volumes, FILE is used as a prefix, and several files named 'FILE00001.lz', 'FILE00002.lz', etc, @@ -748,13 +758,13 @@ once, the first time it appears in the command line. '-s BYTES' '--dictionary-size=BYTES' When compressing, set the dictionary size limit in bytes. Minilzip - will use for each file the largest dictionary size that does not - exceed neither the file size nor this limit. Valid values range from - 4 KiB to 512 MiB. Values 12 to 29 are interpreted as powers of two, - meaning 2^12 to 2^29 bytes. Dictionary sizes are quantized so that - they can be coded in just one byte (*note coded-dict-size::). If the - size specified does not match one of the valid sizes, it will be - rounded upwards by adding up to (BYTES / 8) to it. + uses for each file the largest dictionary size that does not exceed + neither the file size nor this limit. Valid values range from 4 KiB to + 512 MiB. Values 12 to 29 are interpreted as powers of two, meaning + 2^12 to 2^29 bytes. Dictionary sizes are quantized so that they can be + coded in just one byte (*note coded-dict-size::). If the size + specified does not match one of the valid sizes, it is rounded upwards + by adding up to (BYTES / 8) to it. For maximum compression you should use a dictionary size limit as large as possible, but keep in mind that the decompression memory requirement @@ -776,7 +786,7 @@ once, the first time it appears in the command line. really performs a trial decompression and throws away the result. Use it together with '-v' to see information about the files. If a file fails the test, does not exist, can't be opened, or is a terminal, - minilzip continues checking the rest of the files. A final diagnostic + minilzip continues testing the rest of the files. A final diagnostic is shown at verbosity level 1 or higher if any file fails the test when testing multiple files. @@ -839,26 +849,29 @@ once, the first time it appears in the command line. defined). *Note Library version::. - Numbers given as arguments to options may be followed by a multiplier -and an optional 'B' for "byte". + Numbers given as arguments to options may be expressed in decimal, +hexadecimal, or octal (using the same syntax as integer constants in C++), +and may be followed by a multiplier and an optional 'B' for "byte". Table of SI and binary prefixes (unit multipliers): -Prefix Value | Prefix Value -k kilobyte (10^3 = 1000) | Ki kibibyte (2^10 = 1024) -M megabyte (10^6) | Mi mebibyte (2^20) -G gigabyte (10^9) | Gi gibibyte (2^30) -T terabyte (10^12) | Ti tebibyte (2^40) -P petabyte (10^15) | Pi pebibyte (2^50) -E exabyte (10^18) | Ei exbibyte (2^60) -Z zettabyte (10^21) | Zi zebibyte (2^70) -Y yottabyte (10^24) | Yi yobibyte (2^80) +Prefix Value | Prefix Value +k kilobyte (10^3 = 1000) | Ki kibibyte (2^10 = 1024) +M megabyte (10^6) | Mi mebibyte (2^20) +G gigabyte (10^9) | Gi gibibyte (2^30) +T terabyte (10^12) | Ti tebibyte (2^40) +P petabyte (10^15) | Pi pebibyte (2^50) +E exabyte (10^18) | Ei exbibyte (2^60) +Z zettabyte (10^21) | Zi zebibyte (2^70) +Y yottabyte (10^24) | Yi yobibyte (2^80) +R ronnabyte (10^27) | Ri robibyte (2^90) +Q quettabyte (10^30) | Qi quebibyte (2^100) Exit status: 0 for a normal exit, 1 for environmental problems (file not -found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid -input file, 3 for an internal consistency error (e.g., bug) which caused -minilzip to panic. +found, invalid command-line options, I/O errors, etc), 2 to indicate a +corrupt or invalid input file, 3 for an internal consistency error (e.g., +bug) which caused minilzip to panic. File: lzlib.info, Node: Data format, Next: Examples, Prev: Invoking minilzip, Up: Top @@ -886,7 +899,7 @@ when there is no longer anything to take away. represents a variable number of bytes. - Lzip data consist of a series of independent "members" (compressed data + Lzip data consist of one or more independent "members" (compressed data sets). The members simply appear one after another in the data stream, with no additional information before, between, or after them. Each member can encode in compressed form up to 16 EiB - 1 byte of uncompressed data. The @@ -933,10 +946,10 @@ size of a multimember data stream is unlimited. 'Member size (8 bytes)' Total size of the member, including header and trailer. This field acts - as a distributed index, allows the verification of stream integrity, - and facilitates the safe recovery of undamaged members from - multimember files. Member size should be limited to 2 PiB to prevent - the data size field from overflowing. + as a distributed index, improves the checking of stream integrity, and + facilitates the safe recovery of undamaged members from multimember + files. Lzip limits the member size to 2 PiB to prevent the data size + field from overflowing. @@ -1234,7 +1247,7 @@ int ffrsdecompress( struct LZ_Decoder * const decoder, if( LZ_decompress_errno( decoder ) == LZ_header_error || LZ_decompress_errno( decoder ) == LZ_data_error ) { LZ_decompress_sync_to_member( decoder ); continue; } - else break; + break; } len = fwrite( buffer, 1, ret, outfile ); if( len < ret ) break; @@ -1293,27 +1306,27 @@ Concept index Tag Table: Node: Top215 Node: Introduction1338 -Node: Library version6413 -Node: Buffering8957 -Node: Parameter limits10182 -Node: Compression functions11136 -Ref: member_size12946 -Ref: sync_flush14712 -Node: Decompression functions19400 -Node: Error codes26968 -Node: Error messages29259 -Node: Invoking minilzip29838 -Node: Data format39786 -Ref: coded-dict-size41232 -Node: Examples42641 -Node: Buffer compression43602 -Node: Buffer decompression45122 -Node: File compression46536 -Node: File decompression47519 -Node: File compression mm48523 -Node: Skipping data errors51552 -Node: Problems52862 -Node: Concept index53423 +Node: Library version6778 +Node: Buffering9329 +Node: Parameter limits10554 +Node: Compression functions11508 +Ref: member_size13301 +Ref: sync_flush15063 +Node: Decompression functions19751 +Node: Error codes27308 +Node: Error messages29598 +Node: Invoking minilzip30177 +Node: Data format40595 +Ref: coded-dict-size42041 +Node: Examples43446 +Node: Buffer compression44407 +Node: Buffer decompression45927 +Node: File compression47341 +Node: File decompression48324 +Node: File compression mm49328 +Node: Skipping data errors52357 +Node: Problems53662 +Node: Concept index54223 End Tag Table diff --git a/doc/lzlib.texi b/doc/lzlib.texi index 3caf9dd..75cb7ba 100644 --- a/doc/lzlib.texi +++ b/doc/lzlib.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 23 January 2022 -@set VERSION 1.13 +@set UPDATED 20 January 2024 +@set VERSION 1.14 @dircategory Compression @direntry @@ -44,7 +44,7 @@ This manual is for Lzlib (version @value{VERSION}, @value{UPDATED}). * Decompression functions:: Descriptions of the decompression functions * Error codes:: Meaning of codes returned by functions * Error messages:: Error messages corresponding to error codes -* Invoking minilzip:: Command line interface of the test program +* Invoking minilzip:: Command-line interface of the test program * Data format:: Detailed format of the compressed data * Examples:: A small tutorial with examples * Problems:: Reporting bugs @@ -52,7 +52,7 @@ This manual is for Lzlib (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2009-2022 Antonio Diaz Diaz. +Copyright @copyright{} 2009-2024 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute, and modify it. @@ -107,6 +107,13 @@ are declared in the file @samp{lzlib.h}. Usage examples of the library are given in the files @samp{bbexample.c}, @samp{ffexample.c}, and @samp{minilzip.c} from the source distribution. +As @samp{lzlib.h} can be used by C and C++ programs, it must not impose a +choice of system headers on the program by including one of them. Therefore +it is the responsibility of the program using lzlib to include before +@samp{lzlib.h} some header that declares the type @samp{uint8_t}. There are +at least four such headers in C and C++: @samp{stdint.h}, @samp{cstdint}, +@samp{inttypes.h}, and @samp{cinttypes}. + All the library functions are thread safe. The library does not install any signal handler. The decoder checks the consistency of the compressed data, so the library should never crash even in case of corrupted input. @@ -117,19 +124,19 @@ This interface is safer and less error prone than the traditional zlib interface. Compression/decompression is done when the read function is called. This -means the value returned by the position functions will not be updated until -a read call, even if a lot of data are written. If you want the data to be +means the value returned by the position functions is not updated until a +read call, even if a lot of data are written. If you want the data to be compressed in advance, just call the read function with a @var{size} equal to 0. -If all the data to be compressed are written in advance, lzlib will -automatically adjust the header of the compressed data to use the largest -dictionary size that does not exceed neither the data size nor the limit -given to @samp{LZ_compress_open}. This feature reduces the amount of memory -needed for decompression and allows minilzip to produce identical compressed -output as lzip. +If all the data to be compressed are written in advance, lzlib automatically +adjusts the header of the compressed data to use the largest dictionary size +that does not exceed neither the data size nor the limit given to +@samp{LZ_compress_open}. This feature reduces the amount of memory needed for +decompression and allows minilzip to produce identical compressed output as +lzip. -Lzlib will correctly decompress a data stream which is the concatenation of +Lzlib correctly decompresses a data stream which is the concatenation of two or more compressed data streams. The result is the concatenation of the corresponding decompressed data streams. Integrity testing of concatenated compressed data streams is also supported. @@ -140,24 +147,24 @@ about @w{2 PiB} each. In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a concrete algorithm; it is more like "any algorithm using the LZMA coding -scheme". For example, the option @samp{-0} of lzip uses the scheme in almost the -simplest way possible; issuing the longest match it can find, or a literal -byte if it can't find a match. Inversely, a much more elaborated way of -finding coding sequences of minimum size than the one currently used by lzip -could be developed, and the resulting sequence could also be coded using the -LZMA coding scheme. +scheme". For example, the option @option{-0} of lzip uses the scheme in +almost the simplest way possible; issuing the longest match it can find, or +a literal byte if it can't find a match. Inversely, a much more elaborated +way of finding coding sequences of minimum size than the one currently used +by lzip could be developed, and the resulting sequence could also be coded +using the LZMA coding scheme. Lzlib currently implements two variants of the LZMA algorithm: fast (used by -option @samp{-0} of minilzip) and normal (used by all other compression levels). +option @option{-0} of minilzip) and normal (used by all other compression levels). The high compression of LZMA comes from combining two basic, well-proven -compression ideas: sliding dictionaries (LZ77/78) and markov models (the -thing used by every compression algorithm that uses a range encoder or -similar order-0 entropy coder as its last stage) with segregation of -contexts according to what the bits are used for. +compression ideas: sliding dictionaries (LZ77) and markov models (the thing +used by every compression algorithm that uses a range encoder or similar +order-0 entropy coder as its last stage) with segregation of contexts +according to what the bits are used for. The ideas embodied in lzlib are due to (at least) the following people: -Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the +Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrei Markov (for the definition of Markov chains), G.N.N. Martin (for the definition of range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI). @@ -180,7 +187,7 @@ of them are declared in @samp{lzlib.h}. @defvr Constant LZ_API_VERSION This constant is defined in @samp{lzlib.h} and works as a version test -macro. The application should verify at compile time that LZ_API_VERSION is +macro. The application should check at compile time that LZ_API_VERSION is greater than or equal to the version required by the application: @example @@ -202,12 +209,12 @@ certain symbols and prototypes exposed. @deftypefun int LZ_api_version ( void ) If LZ_API_VERSION >= 1012, this function is declared in @samp{lzlib.h} (else it doesn't exist). It returns the LZ_API_VERSION of the library object code -being used. The application should verify at run time that the value +being used. The application should check at run time that the value returned by @code{LZ_api_version} is greater than or equal to the version -required by the application. An application may be dinamically linked at run +required by the application. An application may be dynamically linked at run time with a different version of lzlib than the one it was compiled for, and -this should not break the program as long as the library used provides the -functionality required by the application. +this should not break the application as long as the library used provides +the functionality required by the application. @example #if defined LZ_API_VERSION && LZ_API_VERSION >= 1012 @@ -297,7 +304,7 @@ Returns the largest valid match length limit [273]. These are the functions used to compress data. In case of error, all of them return -1 or 0, for signed and unsigned return values respectively, -except @samp{LZ_compress_open} whose return value must be verified by +except @samp{LZ_compress_open} whose return value must be checked by calling @samp{LZ_compress_errno} before using it. @@ -307,15 +314,15 @@ pointer that can only be used as the @var{encoder} argument for the other LZ_compress functions, or a null pointer if the encoder could not be allocated. -The returned pointer must be verified by calling -@samp{LZ_compress_errno} before using it. If @samp{LZ_compress_errno} -does not return @samp{LZ_ok}, the returned pointer must not be used and -should be freed with @samp{LZ_compress_close} to avoid memory leaks. +The returned pointer must be checked by calling @samp{LZ_compress_errno} +before using it. If @samp{LZ_compress_errno} does not return @samp{LZ_ok}, +the returned pointer must not be used and should be freed with +@samp{LZ_compress_close} to avoid memory leaks. @var{dictionary_size} sets the dictionary size to be used, in bytes. Valid values range from @w{4 KiB} to @w{512 MiB}. Note that dictionary sizes are quantized. If the size specified does not match one of the -valid sizes, it will be rounded upwards by adding up to +valid sizes, it is rounded upwards by adding up to @w{(@var{dictionary_size} / 8)} to it. @var{match_len_limit} sets the match length limit in bytes. Valid values @@ -324,7 +331,7 @@ but longer compression times. If @var{dictionary_size} is 65535 and @var{match_len_limit} is 16, the fast variant of LZMA is chosen, which produces identical compressed output as -@w{@samp{lzip -0}}. (The dictionary size used will be rounded upwards to +@w{@samp{lzip -0}}. (The dictionary size used is rounded upwards to @w{64 KiB}). @anchor{member_size} @@ -332,8 +339,8 @@ variant of LZMA is chosen, which produces identical compressed output as from @w{4 KiB} to @w{2 PiB}. A small member size may degrade compression ratio, so use it only when needed. To produce a single-member data stream, give @var{member_size} a value larger than the amount of data to be -produced. Values larger than @w{2 PiB} will be reduced to @w{2 PiB} to -prevent the uncompressed size of the member from overflowing. +produced. Values larger than @w{2 PiB} are reduced to @w{2 PiB} to prevent +the uncompressed size of the member from overflowing. @end deftypefun @@ -478,20 +485,19 @@ perhaps not yet read. These are the functions used to decompress data. In case of error, all of them return -1 or 0, for signed and unsigned return values respectively, -except @samp{LZ_decompress_open} whose return value must be verified by +except @samp{LZ_decompress_open} whose return value must be checked by calling @samp{LZ_decompress_errno} before using it. @deftypefun {struct LZ_Decoder *} LZ_decompress_open ( void ) Initializes the internal stream state for decompression and returns a -pointer that can only be used as the @var{decoder} argument for the -other LZ_decompress functions, or a null pointer if the decoder could -not be allocated. - -The returned pointer must be verified by calling -@samp{LZ_decompress_errno} before using it. If -@samp{LZ_decompress_errno} does not return @samp{LZ_ok}, the returned -pointer must not be used and should be freed with +pointer that can only be used as the @var{decoder} argument for the other +LZ_decompress functions, or a null pointer if the decoder could not be +allocated. + +The returned pointer must be checked by calling @samp{LZ_decompress_errno} +before using it. If @samp{LZ_decompress_errno} does not return @samp{LZ_ok}, +the returned pointer must not be used and should be freed with @samp{LZ_decompress_close} to avoid memory leaks. @end deftypefun @@ -523,16 +529,16 @@ internal buffers is discarded. Position counters are set to 0. @deftypefun int LZ_decompress_sync_to_member ( struct LZ_Decoder * const @var{decoder} ) -Resets the error state of @var{decoder} and enters a search state that -lasts until a new member header (or the end of the stream) is found. -After a successful call to @samp{LZ_decompress_sync_to_member}, data -written with @samp{LZ_decompress_write} will be consumed and -@samp{LZ_decompress_read} will return 0 until a header is found. - -This function is useful to discard any data preceding the first member, -or to discard the rest of the current member, for example in case of a -data error. If the decoder is already at the beginning of a member, this -function does nothing. +Resets the error state of @var{decoder} and enters a search state that lasts +until a new member header (or the end of the stream) is found. After a +successful call to @samp{LZ_decompress_sync_to_member}, data written with +@samp{LZ_decompress_write} is consumed and @samp{LZ_decompress_read} returns +0 until a header is found. + +This function is useful to discard any data preceding the first member, or +to discard the rest of the current member, for example in case of a data +error. If the decoder is already at the beginning of a member, this function +does nothing. @end deftypefun @@ -654,7 +660,7 @@ perhaps not yet read. Most library functions return -1 to indicate that they have failed. But this return value only tells you that an error has occurred. To find out -what kind of error it was, you need to verify the error code by calling +what kind of error it was, you need to check the error code by calling @samp{LZ_(de)compress_errno}. Library functions don't change the value returned by @@ -728,20 +734,21 @@ The value of @var{lz_errno} normally comes from a call to @cindex invoking @cindex options -Minilzip is a test program for the compression library lzlib, fully -compatible with lzip 1.4 or newer. +Minilzip is a test program for the compression library lzlib, compatible +with lzip 1.4 or newer. @uref{http://www.nongnu.org/lzip/lzip.html,,Lzip} is a lossless data compressor with a user interface similar to the one of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov -chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity -checking to maximize interoperability and optimize safety. Lzip can compress -about as fast as gzip @w{(lzip -0)} or compress most files more than bzip2 -@w{(lzip -9)}. Decompression speed is intermediate between gzip and bzip2. -Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip -has been designed, written, and tested with great care to replace gzip and -bzip2 as the standard general-purpose compressed format for unix-like -systems. +chain-Algorithm' (LZMA) stream format to maximize interoperability. The +maximum dictionary size is 512 MiB so that any lzip file can be decompressed +on 32-bit machines. Lzip provides accurate and robust 3-factor integrity +checking. Lzip can compress about as fast as gzip @w{(lzip -0)} or compress most +files more than bzip2 @w{(lzip -9)}. Decompression speed is intermediate between +gzip and bzip2. Lzip is better than gzip and bzip2 from a data recovery +perspective. Lzip has been designed, written, and tested with great care to +replace gzip and bzip2 as the standard general-purpose compressed format for +Unix-like systems. @noindent The format for running minilzip is: @@ -754,7 +761,8 @@ minilzip [@var{options}] [@var{files}] If no file names are specified, minilzip compresses (or decompresses) from standard input to standard output. A hyphen @samp{-} used as a @var{file} argument means standard input. It can be mixed with other @var{files} and is -read just once, the first time it appears in the command line. +read just once, the first time it appears in the command line. Remember to +prepend @file{./} to any file name beginning with a hyphen, or use @samp{--}. minilzip supports the following @uref{http://www.nongnu.org/arg-parser/manual/arg_parser_manual.html#Argument-syntax,,options}: @@ -791,19 +799,20 @@ compression ratio, so use it only when needed. Valid values range from Compress or decompress to standard output; keep input files unchanged. If compressing several files, each file is compressed independently. (The output consists of a sequence of independently compressed members). This -option (or @samp{-o}) is needed when reading from a named pipe (fifo) or +option (or @option{-o}) is needed when reading from a named pipe (fifo) or from a device. Use it also to recover as much of the decompressed data as -possible when decompressing a corrupt file. @samp{-c} overrides @samp{-o} -and @samp{-S}. @samp{-c} has no effect when testing or listing. +possible when decompressing a corrupt file. @option{-c} overrides @option{-o} +and @option{-S}. @option{-c} has no effect when testing. @item -d @itemx --decompress -Decompress the files specified. If a file does not exist, can't be opened, -or the destination file already exists and @samp{--force} has not been -specified, minilzip continues decompressing the rest of the files and exits with -error status 1. If a file fails to decompress, or is a terminal, minilzip exits -immediately with error status 2 without decompressing the rest of the files. -A terminal is considered an uncompressed file, and therefore invalid. +Decompress the files specified. The integrity of the files specified is +checked. If a file does not exist, can't be opened, or the destination file +already exists and @option{--force} has not been specified, minilzip continues +decompressing the rest of the files and exits with error status 1. If a file +fails to decompress, or is a terminal, minilzip exits immediately with error +status 2 without decompressing the rest of the files. A terminal is +considered an uncompressed file, and therefore invalid. @item -f @itemx --force @@ -820,19 +829,19 @@ Keep (don't delete) input files during compression or decompression. @item -m @var{bytes} @itemx --match-length=@var{bytes} -When compressing, set the match length limit in bytes. After a match -this long is found, the search is finished. Valid values range from 5 to -273. Larger values usually give better compression ratios but longer -compression times. +When compressing, set the match length limit in bytes. After a match this +long is found, the search is finished. Valid values range from 5 to 273. +Larger values usually give better compression ratios but longer compression +times. @item -o @var{file} @itemx --output=@var{file} -If @samp{-c} has not been also specified, write the (de)compressed output to -@var{file}; keep input files unchanged. If compressing several files, each -file is compressed independently. (The output consists of a sequence of -independently compressed members). This option (or @samp{-c}) is needed when -reading from a named pipe (fifo) or from a device. @w{@samp{-o -}} is -equivalent to @samp{-c}. @samp{-o} has no effect when testing or listing. +If @option{-c} has not been also specified, write the (de)compressed output +to @var{file}; keep input files unchanged. If compressing several files, +each file is compressed independently. (The output consists of a sequence of +independently compressed members). This option (or @option{-c}) is needed +when reading from a named pipe (fifo) or from a device. @w{@option{-o -}} is +equivalent to @option{-c}. @option{-o} has no effect when testing. When compressing and splitting the output in volumes, @var{file} is used as a prefix, and several files named @samp{@var{file}00001.lz}, @@ -845,14 +854,14 @@ Quiet operation. Suppress all messages. @item -s @var{bytes} @itemx --dictionary-size=@var{bytes} -When compressing, set the dictionary size limit in bytes. Minilzip will use -for each file the largest dictionary size that does not exceed neither -the file size nor this limit. Valid values range from @w{4 KiB} to -@w{512 MiB}. Values 12 to 29 are interpreted as powers of two, meaning -2^12 to 2^29 bytes. Dictionary sizes are quantized so that they can be -coded in just one byte (@pxref{coded-dict-size}). If the size specified -does not match one of the valid sizes, it will be rounded upwards by -adding up to @w{(@var{bytes} / 8)} to it. +When compressing, set the dictionary size limit in bytes. Minilzip uses for +each file the largest dictionary size that does not exceed neither the file +size nor this limit. Valid values range from @w{4 KiB} to @w{512 MiB}. +Values 12 to 29 are interpreted as powers of two, meaning 2^12 to 2^29 +bytes. Dictionary sizes are quantized so that they can be coded in just one +byte (@pxref{coded-dict-size}). If the size specified does not match one of +the valid sizes, it is rounded upwards by adding up to @w{(@var{bytes} / 8)} +to it. For maximum compression you should use a dictionary size limit as large as possible, but keep in mind that the decompression memory requirement @@ -860,7 +869,7 @@ is affected at compression time by the choice of dictionary size limit. @item -S @var{bytes} @itemx --volume-size=@var{bytes} -When compressing, and @samp{-c} has not been also specified, split the +When compressing, and @option{-c} has not been also specified, split the compressed output into several volume files with names @samp{original_name00001.lz}, @samp{original_name00002.lz}, etc, and set the volume size limit to @var{bytes}. Input files are kept unchanged. Each @@ -872,11 +881,11 @@ from @w{100 kB} to @w{4 EiB}. @itemx --test Check integrity of the files specified, but don't decompress them. This really performs a trial decompression and throws away the result. Use it -together with @samp{-v} to see information about the files. If a file +together with @option{-v} to see information about the files. If a file fails the test, does not exist, can't be opened, or is a terminal, minilzip -continues checking the rest of the files. A final diagnostic is shown at -verbosity level 1 or higher if any file fails the test when testing -multiple files. +continues testing the rest of the files. A final diagnostic is shown at +verbosity level 1 or higher if any file fails the test when testing multiple +files. @item -v @itemx --verbose @@ -890,18 +899,18 @@ and trailer contents (CRC, data size, member size). @item -0 .. -9 Compression level. Set the compression parameters (dictionary size and match length limit) as shown in the table below. The default compression -level is @samp{-6}, equivalent to @w{@samp{-s8MiB -m36}}. Note that -@samp{-9} can be much slower than @samp{-0}. These options have no +level is @option{-6}, equivalent to @w{@option{-s8MiB -m36}}. Note that +@option{-9} can be much slower than @option{-0}. These options have no effect when decompressing or testing. -The bidimensional parameter space of LZMA can't be mapped to a linear -scale optimal for all files. If your files are large, very repetitive, -etc, you may need to use the options @samp{--dictionary-size} and -@samp{--match-length} directly to achieve optimal performance. +The bidimensional parameter space of LZMA can't be mapped to a linear scale +optimal for all files. If your files are large, very repetitive, etc, you +may need to use the options @option{--dictionary-size} and +@option{--match-length} directly to achieve optimal performance. -If several compression levels or @samp{-s} or @samp{-m} options are -given, the last setting is used. For example @w{@samp{-9 -s64MiB}} is -equivalent to @w{@samp{-s64MiB -m273}} +If several compression levels or @option{-s} or @option{-m} options are +given, the last setting is used. For example @w{@option{-9 -s64MiB}} is +equivalent to @w{@option{-s64MiB -m273}} @multitable {Level} {Dictionary size (-s)} {Match length limit (-m)} @item Level @tab Dictionary size (-s) @tab Match length limit (-m) @@ -942,28 +951,31 @@ version of lzlib being used and the value of LZ_API_VERSION (if defined). @end table -Numbers given as arguments to options may be followed by a multiplier -and an optional @samp{B} for "byte". +Numbers given as arguments to options may be expressed in decimal, +hexadecimal, or octal (using the same syntax as integer constants in C++), +and may be followed by a multiplier and an optional @samp{B} for "byte". Table of SI and binary prefixes (unit multipliers): -@multitable {Prefix} {kilobyte (10^3 = 1000)} {|} {Prefix} {kibibyte (2^10 = 1024)} +@multitable {Prefix} {kilobyte (10^3 = 1000)} {|} {Prefix} {kibibyte (2^10 = 1024)} @item Prefix @tab Value @tab | @tab Prefix @tab Value -@item k @tab kilobyte (10^3 = 1000) @tab | @tab Ki @tab kibibyte (2^10 = 1024) -@item M @tab megabyte (10^6) @tab | @tab Mi @tab mebibyte (2^20) -@item G @tab gigabyte (10^9) @tab | @tab Gi @tab gibibyte (2^30) -@item T @tab terabyte (10^12) @tab | @tab Ti @tab tebibyte (2^40) -@item P @tab petabyte (10^15) @tab | @tab Pi @tab pebibyte (2^50) -@item E @tab exabyte (10^18) @tab | @tab Ei @tab exbibyte (2^60) -@item Z @tab zettabyte (10^21) @tab | @tab Zi @tab zebibyte (2^70) -@item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80) +@item k @tab kilobyte (10^3 = 1000) @tab | @tab Ki @tab kibibyte (2^10 = 1024) +@item M @tab megabyte (10^6) @tab | @tab Mi @tab mebibyte (2^20) +@item G @tab gigabyte (10^9) @tab | @tab Gi @tab gibibyte (2^30) +@item T @tab terabyte (10^12) @tab | @tab Ti @tab tebibyte (2^40) +@item P @tab petabyte (10^15) @tab | @tab Pi @tab pebibyte (2^50) +@item E @tab exabyte (10^18) @tab | @tab Ei @tab exbibyte (2^60) +@item Z @tab zettabyte (10^21) @tab | @tab Zi @tab zebibyte (2^70) +@item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80) +@item R @tab ronnabyte (10^27) @tab | @tab Ri @tab robibyte (2^90) +@item Q @tab quettabyte (10^30) @tab | @tab Qi @tab quebibyte (2^100) @end multitable @sp 1 -Exit status: 0 for a normal exit, 1 for environmental problems (file not -found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid -input file, 3 for an internal consistency error (e.g., bug) which caused -minilzip to panic. +Exit status: 0 for a normal exit, 1 for environmental problems +(file not found, invalid command-line options, I/O errors, etc), 2 to +indicate a corrupt or invalid input file, 3 for an internal consistency +error (e.g., bug) which caused minilzip to panic. @node Data format @@ -994,7 +1006,7 @@ represents one byte; a box like this: represents a variable number of bytes. @sp 1 -Lzip data consist of a series of independent "members" (compressed data +Lzip data consist of one or more independent "members" (compressed data sets). The members simply appear one after another in the data stream, with no additional information before, between, or after them. Each member can encode in compressed form up to @w{16 EiB - 1 byte} of uncompressed data. @@ -1050,10 +1062,10 @@ Size of the original uncompressed data. @item Member size (8 bytes) Total size of the member, including header and trailer. This field acts -as a distributed index, allows the verification of stream integrity, and +as a distributed index, improves the checking of stream integrity, and facilitates the safe recovery of undamaged members from multimember files. -Member size should be limited to @w{2 PiB} to prevent the data size field -from overflowing. +Lzip limits the member size to @w{2 PiB} to prevent the data size field from +overflowing. @end table @@ -1360,7 +1372,7 @@ int ffrsdecompress( struct LZ_Decoder * const decoder, if( LZ_decompress_errno( decoder ) == LZ_header_error || LZ_decompress_errno( decoder ) == LZ_data_error ) { LZ_decompress_sync_to_member( decoder ); continue; } - else break; + break; } len = fwrite( buffer, 1, ret, outfile ); if( len < ret ) break; diff --git a/doc/minilzip.1 b/doc/minilzip.1 index 0c4c06d..3532520 100644 --- a/doc/minilzip.1 +++ b/doc/minilzip.1 @@ -1,24 +1,25 @@ -.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH MINILZIP "1" "January 2022" "minilzip 1.13" "User Commands" +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2. +.TH MINILZIP "1" "January 2024" "minilzip 1.14" "User Commands" .SH NAME minilzip \- reduces the size of files .SH SYNOPSIS .B minilzip [\fI\,options\/\fR] [\fI\,files\/\fR] .SH DESCRIPTION -Minilzip is a test program for the compression library lzlib, fully -compatible with lzip 1.4 or newer. +Minilzip is a test program for the compression library lzlib, compatible +with lzip 1.4 or newer. .PP Lzip is a lossless data compressor with a user interface similar to the one of gzip or bzip2. Lzip uses a simplified form of the 'Lempel\-Ziv\-Markov -chain\-Algorithm' (LZMA) stream format and provides a 3 factor integrity -checking to maximize interoperability and optimize safety. Lzip can compress -about as fast as gzip (lzip \fB\-0\fR) or compress most files more than bzip2 -(lzip \fB\-9\fR). Decompression speed is intermediate between gzip and bzip2. -Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip -has been designed, written, and tested with great care to replace gzip and -bzip2 as the standard general\-purpose compressed format for unix\-like -systems. +chain\-Algorithm' (LZMA) stream format to maximize interoperability. The +maximum dictionary size is 512 MiB so that any lzip file can be decompressed +on 32\-bit machines. Lzip provides accurate and robust 3\-factor integrity +checking. Lzip can compress about as fast as gzip (lzip \fB\-0\fR) or compress most +files more than bzip2 (lzip \fB\-9\fR). Decompression speed is intermediate between +gzip and bzip2. Lzip is better than gzip and bzip2 from a data recovery +perspective. Lzip has been designed, written, and tested with great care to +replace gzip and bzip2 as the standard general\-purpose compressed format for +Unix\-like systems. .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR @@ -37,7 +38,7 @@ set member size limit in bytes write to standard output, keep input files .TP \fB\-d\fR, \fB\-\-decompress\fR -decompress +decompress, test compressed file integrity .TP \fB\-f\fR, \fB\-\-force\fR overwrite existing output files @@ -88,24 +89,24 @@ If no file names are given, or if a file is '\-', minilzip compresses or decompresses from standard input to standard output. Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... -Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12 -to 2^29 bytes. +Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12 to +2^29 bytes. .PP -The bidimensional parameter space of LZMA can't be mapped to a linear -scale optimal for all files. If your files are large, very repetitive, -etc, you may need to use the options \fB\-\-dictionary\-size\fR and \fB\-\-match\-length\fR -directly to achieve optimal performance. +The bidimensional parameter space of LZMA can't be mapped to a linear scale +optimal for all files. If your files are large, very repetitive, etc, you +may need to use the options \fB\-\-dictionary\-size\fR and \fB\-\-match\-length\fR directly +to achieve optimal performance. .PP To extract all the files from archive 'foo.tar.lz', use the commands \&'tar \fB\-xf\fR foo.tar.lz' or 'minilzip \fB\-cd\fR foo.tar.lz | tar \fB\-xf\fR \-'. .PP -Exit status: 0 for a normal exit, 1 for environmental problems (file -not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or -invalid input file, 3 for an internal consistency error (e.g., bug) which -caused minilzip to panic. +Exit status: 0 for a normal exit, 1 for environmental problems +(file not found, invalid command\-line options, I/O errors, etc), 2 to +indicate a corrupt or invalid input file, 3 for an internal consistency +error (e.g., bug) which caused minilzip to panic. .PP The ideas embodied in lzlib are due to (at least) the following people: -Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the +Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrei Markov (for the definition of Markov chains), G.N.N. Martin (for the definition of range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI). @@ -114,8 +115,9 @@ Report bugs to lzip\-bug@nongnu.org .br Lzlib home page: http://www.nongnu.org/lzip/lzlib.html .SH COPYRIGHT -Copyright \(co 2022 Antonio Diaz Diaz. -Using lzlib 1.13 +Copyright \(co 2024 Antonio Diaz Diaz. +Using lzlib 1.14 +Using LZ_API_VERSION = 1014 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2024 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -200,8 +200,6 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, } const int pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask; - const int match_price = price1( e->eb.bm_match[state][pos_state] ); - const int rep_match_price = match_price + price1( e->eb.bm_rep[state] ); const uint8_t prev_byte = Mb_peek( &e->eb.mb, 1 ); const uint8_t cur_byte = Mb_peek( &e->eb.mb, 0 ); const uint8_t match_byte = Mb_peek( &e->eb.mb, reps[0] + 1 ); @@ -213,6 +211,9 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, e->trials[1].price += LZeb_price_matched( &e->eb, prev_byte, cur_byte, match_byte ); e->trials[1].dis4 = -1; /* literal */ + const int match_price = price1( e->eb.bm_match[state][pos_state] ); + const int rep_match_price = match_price + price1( e->eb.bm_rep[state] ); + if( match_byte == cur_byte ) Tr_update( &e->trials[1], rep_match_price + LZeb_price_shortrep( &e->eb, state, pos_state ), 0, 0 ); @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2024 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/encoder_base.c b/encoder_base.c index 4535352..047f372 100644 --- a/encoder_base.c +++ b/encoder_base.c @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2024 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -140,9 +140,7 @@ static void LZeb_try_full_flush( struct LZ_encoder_base * const eb ) Lt_set_data_crc( trailer, LZeb_crc( eb ) ); Lt_set_data_size( trailer, Mb_data_position( &eb->mb ) ); Lt_set_member_size( trailer, Re_member_position( &eb->renc ) + Lt_size ); - int i; - for( i = 0; i < Lt_size; ++i ) - Cb_put_byte( &eb->renc.cb, trailer[i] ); + int i; for( i = 0; i < Lt_size; ++i ) Cb_put_byte( &eb->renc.cb, trailer[i] ); } diff --git a/encoder_base.h b/encoder_base.h index 17ffc93..094f679 100644 --- a/encoder_base.h +++ b/encoder_base.h @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2024 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -142,7 +142,7 @@ static inline int price1( const Bit_model probability ) { return get_price( bit_model_total - probability ); } static inline int price_bit( const Bit_model bm, const bool bit ) - { return ( bit ? price1( bm ) : price0( bm ) ); } + { return bit ? price1( bm ) : price0( bm ); } static inline int price_symbol3( const Bit_model bm[], int symbol ) @@ -276,8 +276,8 @@ static inline int Mb_free_bytes( const struct Matchfinder_base * const mb ) static inline bool Mb_enough_available_bytes( const struct Matchfinder_base * const mb ) - { return ( mb->pos + mb->after_size <= mb->stream_pos || - ( Mb_flushing_or_end( mb ) && mb->pos < mb->stream_pos ) ); } + { return mb->pos + mb->after_size <= mb->stream_pos || + ( Mb_flushing_or_end( mb ) && mb->pos < mb->stream_pos ); } static inline const uint8_t * Mb_ptr_to_current_pos( const struct Matchfinder_base * const mb ) @@ -340,7 +340,6 @@ static inline void Re_shift_low( struct Range_encoder * const renc ) static inline void Re_reset( struct Range_encoder * const renc, const unsigned dictionary_size ) { - int i; Cb_reset( &renc->cb ); renc->low = 0; renc->partial_member_pos = 0; @@ -348,8 +347,7 @@ static inline void Re_reset( struct Range_encoder * const renc, renc->ff_count = 0; renc->cache = 0; Lh_set_dictionary_size( renc->header, dictionary_size ); - for( i = 0; i < Lh_size; ++i ) - Cb_put_byte( &renc->cb, renc->header[i] ); + int i; for( i = 0; i < Lh_size; ++i ) Cb_put_byte( &renc->cb, renc->header[i] ); } static inline bool Re_init( struct Range_encoder * const renc, @@ -398,8 +396,7 @@ static inline void Re_encode( struct Range_encoder * const renc, { renc->range >>= 1; if( symbol & mask ) renc->low += renc->range; - if( renc->range <= 0x00FFFFFFU ) - { renc->range <<= 8; Re_shift_low( renc ); } + if( renc->range <= 0x00FFFFFFU ) { renc->range <<= 8; Re_shift_low( renc ); } } } @@ -559,7 +556,7 @@ static inline bool LZeb_init( struct LZ_encoder_base * const eb, } static inline bool LZeb_member_finished( const struct LZ_encoder_base * const eb ) - { return ( eb->member_finished && Cb_empty( &eb->renc.cb ) ); } + { return eb->member_finished && Cb_empty( &eb->renc.cb ); } static inline void LZeb_free( struct LZ_encoder_base * const eb ) { Re_free( &eb->renc ); Mb_free( &eb->mb ); } diff --git a/fast_encoder.c b/fast_encoder.c index 618c3d6..bb6363a 100644 --- a/fast_encoder.c +++ b/fast_encoder.c @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2024 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/fast_encoder.h b/fast_encoder.h index 54756bd..b9421f4 100644 --- a/fast_encoder.h +++ b/fast_encoder.h @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2024 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/ffexample.c b/ffexample.c index 59345ee..826abcd 100644 --- a/ffexample.c +++ b/ffexample.c @@ -1,5 +1,5 @@ /* File to file example - Test program for the library lzlib - Copyright (C) 2010-2022 Antonio Diaz Diaz. + Copyright (C) 2010-2024 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute, and modify it. @@ -245,7 +245,7 @@ int ffrsdecompress( struct LZ_Decoder * const decoder, if( LZ_decompress_errno( decoder ) == LZ_header_error || LZ_decompress_errno( decoder ) == LZ_data_error ) { LZ_decompress_sync_to_member( decoder ); continue; } - else break; + break; } len = fwrite( buffer, 1, ret, outfile ); if( len < ret ) break; @@ -277,10 +277,10 @@ int main( const int argc, const char * const argv[] ) { fputs( "ffexample: Not enough memory.\n", stderr ); LZ_compress_close( encoder ); LZ_decompress_close( decoder ); return 1; } if( !infile ) - { fprintf( stderr, "ffexample: Can't open input file '%s': %s\n", + { fprintf( stderr, "ffexample: %s: Can't open input file: %s\n", argv[2], strerror( errno ) ); return 1; } if( !outfile ) - { fprintf( stderr, "ffexample: Can't open output file '%s': %s\n", + { fprintf( stderr, "ffexample: %s: Can't open output file: %s\n", argv[3], strerror( errno ) ); return 1; } switch( argv[1][1] ) @@ -291,7 +291,7 @@ int main( const int argc, const char * const argv[] ) case 'm': retval = ffmmcompress( infile, outfile ); break; case 'l': retval = fflfcompress( encoder, infile, outfile ); break; case 'r': retval = ffrsdecompress( decoder, infile, outfile ); break; - default: show_help(); return ( argv[1][1] != 'h' ); + default: show_help(); return argv[1][1] != 'h'; } if( LZ_decompress_close( decoder ) < 0 || LZ_compress_close( encoder ) < 0 || @@ -1,5 +1,5 @@ /* Lzcheck - Test program for the library lzlib - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2024 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute, and modify it. @@ -26,7 +26,7 @@ const unsigned long long member_size = INT64_MAX; -enum { buffer_size = 32768 }; +enum { buffer_size = 32749 }; /* largest prime < 32768 */ uint8_t in_buffer[buffer_size]; uint8_t mid_buffer[buffer_size]; uint8_t out_buffer[buffer_size]; @@ -62,6 +62,7 @@ static struct LZ_Encoder * xopen_encoder( const int dictionary_size ) return encoder; } + static struct LZ_Decoder * xopen_decoder( void ) { struct LZ_Decoder * const decoder = LZ_decompress_open(); @@ -171,17 +172,49 @@ static int check_sync_flush( FILE * const file, const int dictionary_size ) if( line_size <= 0 ) break; /* end of file */ in_size = LZ_compress_write( encoder, line_buf, line_size ); + if( in_size < 0 ) + { + fprintf( stderr, "lzcheck: LZ_compress_write error: %s\n", + LZ_strerror( LZ_compress_errno( encoder ) ) ); + retval = 3; break; + } if( in_size < line_size ) - fprintf( stderr, "lzcheck: sync: LZ_compress_write only accepted %d of %d bytes\n", - in_size, line_size ); - LZ_compress_sync_flush( encoder ); - if( line_buf[0] & 1 ) /* read all data at once or byte by byte */ - mid_size = LZ_compress_read( encoder, mid_buffer, buffer_size ); - else for( mid_size = 0; mid_size < buffer_size; ) { - const int rd = LZ_compress_read( encoder, mid_buffer + mid_size, 1 ); + fprintf( stderr, "lzcheck: sync: LZ_compress_write only accepted %d " + "of %d bytes\n", in_size, line_size ); + mid_size = LZ_compress_read( encoder, mid_buffer, buffer_size ); + const int wr = + LZ_compress_write( encoder, line_buf + in_size, line_size - in_size ); + if( wr < 0 ) + { + fprintf( stderr, "lzcheck: LZ_compress_write error: %s\n", + LZ_strerror( LZ_compress_errno( encoder ) ) ); + retval = 3; break; + } + if( wr + in_size != line_size ) + { + fprintf( stderr, "lzcheck: sync: LZ_compress_write only accepted %d " + "of %d remaining bytes\n", wr, line_size - in_size ); + retval = 3; break; + } + in_size += wr; + LZ_compress_sync_flush( encoder ); + const int rd = LZ_compress_read( encoder, mid_buffer + mid_size, + buffer_size - mid_size ); if( rd > 0 ) mid_size += rd; - else { if( rd < 0 ) { mid_size = -1; } break; } + else if( rd < 0 ) mid_size = -1; + } + else + { + LZ_compress_sync_flush( encoder ); + if( line_buf[0] & 1 ) /* read all data at once or byte by byte */ + mid_size = LZ_compress_read( encoder, mid_buffer, buffer_size ); + else for( mid_size = 0; mid_size < buffer_size; ) + { + const int rd = LZ_compress_read( encoder, mid_buffer + mid_size, 1 ); + if( rd > 0 ) mid_size += rd; + else { if( rd < 0 ) { mid_size = -1; } break; } + } } if( mid_size < 0 ) { @@ -344,7 +377,7 @@ int main( const int argc, const char * const argv[] ) FILE * file = fopen( argv[i], "rb" ); if( !file ) { - fprintf( stderr, "lzcheck: Can't open file '%s' for reading.\n", argv[i] ); + fprintf( stderr, "lzcheck: %s: Can't open file for reading.\n", argv[i] ); ++open_failures; continue; } if( verbose ) fprintf( stderr, " Testing file '%s'\n", argv[i] ); @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2024 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -35,17 +35,13 @@ static inline State St_set_char( const State st ) static const State next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; return next[st]; } - static inline State St_set_char_rep() { return 8; } - static inline State St_set_match( const State st ) - { return ( ( st < 7 ) ? 7 : 10 ); } - + { return ( st < 7 ) ? 7 : 10; } static inline State St_set_rep( const State st ) - { return ( ( st < 7 ) ? 8 : 11 ); } - + { return ( st < 7 ) ? 8 : 11; } static inline State St_set_short_rep( const State st ) - { return ( ( st < 7 ) ? 9 : 11 ); } + { return ( st < 7 ) ? 9 : 11; } enum { @@ -182,8 +178,8 @@ static inline void CRC32_update_buf( uint32_t * const crc, static inline bool isvalid_ds( const unsigned dictionary_size ) - { return ( dictionary_size >= min_dictionary_size && - dictionary_size <= max_dictionary_size ); } + { return dictionary_size >= min_dictionary_size && + dictionary_size <= max_dictionary_size; } static inline int real_bits( unsigned value ) @@ -196,43 +192,43 @@ static inline int real_bits( unsigned value ) static const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */ -typedef uint8_t Lzip_header[6]; /* 0-3 magic bytes */ +enum { Lh_size = 6 }; +typedef uint8_t Lzip_header[Lh_size]; /* 0-3 magic bytes */ /* 4 version */ /* 5 coded dictionary size */ -enum { Lh_size = 6 }; static inline void Lh_set_magic( Lzip_header data ) { memcpy( data, lzip_magic, 4 ); data[4] = 1; } -static inline bool Lh_verify_magic( const Lzip_header data ) - { return ( memcmp( data, lzip_magic, 4 ) == 0 ); } +static inline bool Lh_check_magic( const Lzip_header data ) + { return memcmp( data, lzip_magic, 4 ) == 0; } /* detect (truncated) header */ -static inline bool Lh_verify_prefix( const Lzip_header data, const int sz ) +static inline bool Lh_check_prefix( const Lzip_header data, const int sz ) { int i; for( i = 0; i < sz && i < 4; ++i ) if( data[i] != lzip_magic[i] ) return false; - return ( sz > 0 ); + return sz > 0; } /* detect corrupt header */ -static inline bool Lh_verify_corrupt( const Lzip_header data ) +static inline bool Lh_check_corrupt( const Lzip_header data ) { int matches = 0; int i; for( i = 0; i < 4; ++i ) if( data[i] == lzip_magic[i] ) ++matches; - return ( matches > 1 && matches < 4 ); + return matches > 1 && matches < 4; } static inline uint8_t Lh_version( const Lzip_header data ) { return data[4]; } -static inline bool Lh_verify_version( const Lzip_header data ) - { return ( data[4] == 1 ); } +static inline bool Lh_check_version( const Lzip_header data ) + { return data[4] == 1; } static inline unsigned Lh_get_dictionary_size( const Lzip_header data ) { - unsigned sz = ( 1 << ( data[5] & 0x1F ) ); + unsigned sz = 1 << ( data[5] & 0x1F ); if( sz > min_dictionary_size ) sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 ); return sz; @@ -249,23 +245,23 @@ static inline bool Lh_set_dictionary_size( Lzip_header data, const unsigned sz ) unsigned i; for( i = 7; i >= 1; --i ) if( base_size - ( i * fraction ) >= sz ) - { data[5] |= ( i << 5 ); break; } + { data[5] |= i << 5; break; } } return true; } -static inline bool Lh_verify( const Lzip_header data ) +static inline bool Lh_check( const Lzip_header data ) { - return Lh_verify_magic( data ) && Lh_verify_version( data ) && + return Lh_check_magic( data ) && Lh_check_version( data ) && isvalid_ds( Lh_get_dictionary_size( data ) ); } -typedef uint8_t Lzip_trailer[20]; +enum { Lt_size = 20 }; +typedef uint8_t Lzip_trailer[Lt_size]; /* 0-3 CRC32 of the uncompressed data */ /* 4-11 size of the uncompressed data */ /* 12-19 member size including header and trailer */ -enum { Lt_size = 20 }; static inline unsigned Lt_get_data_crc( const Lzip_trailer data ) { @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2024 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -86,7 +86,7 @@ static void LZ_Decoder_init( struct LZ_Decoder * const d ) } -static bool verify_encoder( struct LZ_Encoder * const e ) +static bool check_encoder( struct LZ_Encoder * const e ) { if( !e ) return false; if( !e->lz_encoder_base || ( !e->lz_encoder && !e->flz_encoder ) || @@ -96,7 +96,7 @@ static bool verify_encoder( struct LZ_Encoder * const e ) } -static bool verify_decoder( struct LZ_Decoder * const d ) +static bool check_decoder( struct LZ_Decoder * const d ) { if( !d ) return false; if( !d->rdec ) @@ -189,7 +189,7 @@ int LZ_compress_close( struct LZ_Encoder * const e ) int LZ_compress_finish( struct LZ_Encoder * const e ) { - if( !verify_encoder( e ) || e->fatal ) return -1; + if( !check_encoder( e ) || e->fatal ) return -1; Mb_finish( &e->lz_encoder_base->mb ); /* if (open --> write --> finish) use same dictionary size as lzip. */ /* this does not save any memory. */ @@ -208,7 +208,7 @@ int LZ_compress_finish( struct LZ_Encoder * const e ) int LZ_compress_restart_member( struct LZ_Encoder * const e, const unsigned long long member_size ) { - if( !verify_encoder( e ) || e->fatal ) return -1; + if( !check_encoder( e ) || e->fatal ) return -1; if( !LZeb_member_finished( e->lz_encoder_base ) ) { e->lz_errno = LZ_sequence_error; return -1; } if( member_size < min_dictionary_size ) @@ -226,7 +226,7 @@ int LZ_compress_restart_member( struct LZ_Encoder * const e, int LZ_compress_sync_flush( struct LZ_Encoder * const e ) { - if( !verify_encoder( e ) || e->fatal ) return -1; + if( !check_encoder( e ) || e->fatal ) return -1; if( !e->lz_encoder_base->mb.at_stream_end ) e->lz_encoder_base->mb.sync_flush_pending = true; return 0; @@ -236,7 +236,7 @@ int LZ_compress_sync_flush( struct LZ_Encoder * const e ) int LZ_compress_read( struct LZ_Encoder * const e, uint8_t * const buffer, const int size ) { - if( !verify_encoder( e ) || e->fatal ) return -1; + if( !check_encoder( e ) || e->fatal ) return -1; if( size < 0 ) return 0; { struct LZ_encoder_base * const eb = e->lz_encoder_base; @@ -258,14 +258,14 @@ int LZ_compress_read( struct LZ_Encoder * const e, int LZ_compress_write( struct LZ_Encoder * const e, const uint8_t * const buffer, const int size ) { - if( !verify_encoder( e ) || e->fatal ) return -1; + if( !check_encoder( e ) || e->fatal ) return -1; return Mb_write_data( &e->lz_encoder_base->mb, buffer, size ); } int LZ_compress_write_size( struct LZ_Encoder * const e ) { - if( !verify_encoder( e ) || e->fatal ) return -1; + if( !check_encoder( e ) || e->fatal ) return -1; return Mb_free_bytes( &e->lz_encoder_base->mb ); } @@ -279,43 +279,43 @@ enum LZ_Errno LZ_compress_errno( struct LZ_Encoder * const e ) int LZ_compress_finished( struct LZ_Encoder * const e ) { - if( !verify_encoder( e ) ) return -1; - return ( Mb_data_finished( &e->lz_encoder_base->mb ) && - LZeb_member_finished( e->lz_encoder_base ) ); + if( !check_encoder( e ) ) return -1; + return Mb_data_finished( &e->lz_encoder_base->mb ) && + LZeb_member_finished( e->lz_encoder_base ); } int LZ_compress_member_finished( struct LZ_Encoder * const e ) { - if( !verify_encoder( e ) ) return -1; + if( !check_encoder( e ) ) return -1; return LZeb_member_finished( e->lz_encoder_base ); } unsigned long long LZ_compress_data_position( struct LZ_Encoder * const e ) { - if( !verify_encoder( e ) ) return 0; + if( !check_encoder( e ) ) return 0; return Mb_data_position( &e->lz_encoder_base->mb ); } unsigned long long LZ_compress_member_position( struct LZ_Encoder * const e ) { - if( !verify_encoder( e ) ) return 0; + if( !check_encoder( e ) ) return 0; return Re_member_position( &e->lz_encoder_base->renc ); } unsigned long long LZ_compress_total_in_size( struct LZ_Encoder * const e ) { - if( !verify_encoder( e ) ) return 0; + if( !check_encoder( e ) ) return 0; return e->partial_in_size + Mb_data_position( &e->lz_encoder_base->mb ); } unsigned long long LZ_compress_total_out_size( struct LZ_Encoder * const e ) { - if( !verify_encoder( e ) ) return 0; + if( !check_encoder( e ) ) return 0; return e->partial_out_size + Re_member_position( &e->lz_encoder_base->renc ); } @@ -352,7 +352,7 @@ int LZ_decompress_close( struct LZ_Decoder * const d ) int LZ_decompress_finish( struct LZ_Decoder * const d ) { - if( !verify_decoder( d ) || d->fatal ) return -1; + if( !check_decoder( d ) || d->fatal ) return -1; if( d->seeking ) { d->seeking = false; d->partial_in_size += Rd_purge( d->rdec ); } else Rd_finish( d->rdec ); @@ -362,7 +362,7 @@ int LZ_decompress_finish( struct LZ_Decoder * const d ) int LZ_decompress_reset( struct LZ_Decoder * const d ) { - if( !verify_decoder( d ) ) return -1; + if( !check_decoder( d ) ) return -1; if( d->lz_decoder ) { LZd_free( d->lz_decoder ); free( d->lz_decoder ); d->lz_decoder = 0; } d->partial_in_size = 0; @@ -379,7 +379,7 @@ int LZ_decompress_reset( struct LZ_Decoder * const d ) int LZ_decompress_sync_to_member( struct LZ_Decoder * const d ) { unsigned skipped = 0; - if( !verify_decoder( d ) ) return -1; + if( !check_decoder( d ) ) return -1; if( d->lz_decoder ) { LZd_free( d->lz_decoder ); free( d->lz_decoder ); d->lz_decoder = 0; } if( Rd_find_header( d->rdec, &skipped ) ) d->seeking = false; @@ -399,7 +399,7 @@ int LZ_decompress_read( struct LZ_Decoder * const d, uint8_t * const buffer, const int size ) { int result; - if( !verify_decoder( d ) ) return -1; + if( !check_decoder( d ) ) return -1; if( size < 0 ) return 0; if( d->fatal ) /* don't return error until pending bytes are read */ { if( d->lz_decoder && !Cb_empty( &d->lz_decoder->cb ) ) goto get_data; @@ -422,20 +422,20 @@ int LZ_decompress_read( struct LZ_Decoder * const d, rd = Rd_read_data( d->rdec, d->member_header, Lh_size ); if( rd < Lh_size || Rd_finished( d->rdec ) ) /* End Of File */ { - if( rd <= 0 || Lh_verify_prefix( d->member_header, rd ) ) + if( rd <= 0 || Lh_check_prefix( d->member_header, rd ) ) d->lz_errno = LZ_unexpected_eof; else d->lz_errno = LZ_header_error; d->fatal = true; return -1; } - if( !Lh_verify_magic( d->member_header ) ) + if( !Lh_check_magic( d->member_header ) ) { /* unreading the header prevents sync_to_member from skipping a member if leading garbage is shorter than a full header; "lgLZIP\x01\x0C" */ if( Rd_unread_data( d->rdec, rd ) ) { - if( d->first_header || !Lh_verify_corrupt( d->member_header ) ) + if( d->first_header || !Lh_check_corrupt( d->member_header ) ) d->lz_errno = LZ_header_error; else d->lz_errno = LZ_data_error; /* corrupt header */ @@ -445,12 +445,12 @@ int LZ_decompress_read( struct LZ_Decoder * const d, d->fatal = true; return -1; } - if( !Lh_verify_version( d->member_header ) || + if( !Lh_check_version( d->member_header ) || !isvalid_ds( Lh_get_dictionary_size( d->member_header ) ) ) { /* Skip a possible "LZIP" leading garbage; "LZIPLZIP\x01\x0C". Leave member_pos pointing to the first error. */ - if( Rd_unread_data( d->rdec, 1 + !Lh_verify_version( d->member_header ) ) ) + if( Rd_unread_data( d->rdec, 1 + !Lh_check_version( d->member_header ) ) ) d->lz_errno = LZ_data_error; /* bad version or bad dict size */ else d->lz_errno = LZ_library_error; @@ -500,7 +500,7 @@ int LZ_decompress_write( struct LZ_Decoder * const d, const uint8_t * const buffer, const int size ) { int result; - if( !verify_decoder( d ) || d->fatal ) return -1; + if( !check_decoder( d ) || d->fatal ) return -1; if( size < 0 ) return 0; result = Rd_write_data( d->rdec, buffer, size ); @@ -521,7 +521,7 @@ int LZ_decompress_write( struct LZ_Decoder * const d, int LZ_decompress_write_size( struct LZ_Decoder * const d ) { - if( !verify_decoder( d ) || d->fatal ) return -1; + if( !check_decoder( d ) || d->fatal ) return -1; return Rd_free_bytes( d->rdec ); } @@ -535,36 +535,36 @@ enum LZ_Errno LZ_decompress_errno( struct LZ_Decoder * const d ) int LZ_decompress_finished( struct LZ_Decoder * const d ) { - if( !verify_decoder( d ) || d->fatal ) return -1; - return ( Rd_finished( d->rdec ) && - ( !d->lz_decoder || LZd_member_finished( d->lz_decoder ) ) ); + if( !check_decoder( d ) || d->fatal ) return -1; + return Rd_finished( d->rdec ) && + ( !d->lz_decoder || LZd_member_finished( d->lz_decoder ) ); } int LZ_decompress_member_finished( struct LZ_Decoder * const d ) { - if( !verify_decoder( d ) || d->fatal ) return -1; - return ( d->lz_decoder && LZd_member_finished( d->lz_decoder ) ); + if( !check_decoder( d ) || d->fatal ) return -1; + return d->lz_decoder && LZd_member_finished( d->lz_decoder ); } int LZ_decompress_member_version( struct LZ_Decoder * const d ) { - if( !verify_decoder( d ) ) return -1; + if( !check_decoder( d ) ) return -1; return Lh_version( d->member_header ); } int LZ_decompress_dictionary_size( struct LZ_Decoder * const d ) { - if( !verify_decoder( d ) ) return -1; + if( !check_decoder( d ) ) return -1; return Lh_get_dictionary_size( d->member_header ); } unsigned LZ_decompress_data_crc( struct LZ_Decoder * const d ) { - if( verify_decoder( d ) && d->lz_decoder ) + if( check_decoder( d ) && d->lz_decoder ) return LZd_crc( d->lz_decoder ); return 0; } @@ -572,7 +572,7 @@ unsigned LZ_decompress_data_crc( struct LZ_Decoder * const d ) unsigned long long LZ_decompress_data_position( struct LZ_Decoder * const d ) { - if( verify_decoder( d ) && d->lz_decoder ) + if( check_decoder( d ) && d->lz_decoder ) return LZd_data_position( d->lz_decoder ); return 0; } @@ -580,21 +580,21 @@ unsigned long long LZ_decompress_data_position( struct LZ_Decoder * const d ) unsigned long long LZ_decompress_member_position( struct LZ_Decoder * const d ) { - if( !verify_decoder( d ) ) return 0; + if( !check_decoder( d ) ) return 0; return d->rdec->member_position; } unsigned long long LZ_decompress_total_in_size( struct LZ_Decoder * const d ) { - if( !verify_decoder( d ) ) return 0; + if( !check_decoder( d ) ) return 0; return d->partial_in_size + d->rdec->member_position; } unsigned long long LZ_decompress_total_out_size( struct LZ_Decoder * const d ) { - if( !verify_decoder( d ) ) return 0; + if( !check_decoder( d ) ) return 0; if( d->lz_decoder ) return d->partial_out_size + LZd_data_position( d->lz_decoder ); return d->partial_out_size; @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2024 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -24,9 +24,9 @@ extern "C" { /* LZ_API_VERSION was first defined in lzlib 1.8 to 1. Since lzlib 1.12, LZ_API_VERSION is defined as (major * 1000 + minor). */ -#define LZ_API_VERSION 1013 +#define LZ_API_VERSION 1014 -static const char * const LZ_version_string = "1.13"; +static const char * const LZ_version_string = "1.14"; enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error, LZ_sequence_error, LZ_header_error, LZ_unexpected_eof, @@ -1,5 +1,5 @@ /* Minilzip - Test program for the library lzlib - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,9 +16,9 @@ */ /* Exit status: 0 for a normal exit, 1 for environmental problems - (file not found, invalid flags, I/O errors, etc), 2 to indicate a - corrupt or invalid input file, 3 for an internal consistency error - (e.g., bug) which caused minilzip to panic. + (file not found, invalid command-line options, I/O errors, etc), 2 to + indicate a corrupt or invalid input file, 3 for an internal consistency + error (e.g., bug) which caused minilzip to panic. */ #define _FILE_OFFSET_BITS 64 @@ -26,10 +26,10 @@ #include <ctype.h> #include <errno.h> #include <fcntl.h> -#include <limits.h> +#include <limits.h> /* SSIZE_MAX */ #include <signal.h> #include <stdbool.h> -#include <stdint.h> +#include <stdint.h> /* SIZE_MAX */ #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -91,7 +91,7 @@ static const char * const mem_msg = "Not enough memory."; int verbosity = 0; static const char * const program_name = "minilzip"; -static const char * const program_year = "2022"; +static const char * const program_year = "2024"; static const char * invocation_name = "minilzip"; /* default value */ static const struct { const char * from; const char * to; } known_extensions[] = { @@ -116,18 +116,19 @@ static bool delete_output_on_interrupt = false; static void show_help( void ) { - printf( "Minilzip is a test program for the compression library lzlib, fully\n" - "compatible with lzip 1.4 or newer.\n" + printf( "Minilzip is a test program for the compression library lzlib, compatible\n" + "with lzip 1.4 or newer.\n" "\nLzip is a lossless data compressor with a user interface similar to the one\n" "of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n" - "chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity\n" - "checking to maximize interoperability and optimize safety. Lzip can compress\n" - "about as fast as gzip (lzip -0) or compress most files more than bzip2\n" - "(lzip -9). Decompression speed is intermediate between gzip and bzip2.\n" - "Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip\n" - "has been designed, written, and tested with great care to replace gzip and\n" - "bzip2 as the standard general-purpose compressed format for unix-like\n" - "systems.\n" + "chain-Algorithm' (LZMA) stream format to maximize interoperability. The\n" + "maximum dictionary size is 512 MiB so that any lzip file can be decompressed\n" + "on 32-bit machines. Lzip provides accurate and robust 3-factor integrity\n" + "checking. Lzip can compress about as fast as gzip (lzip -0) or compress most\n" + "files more than bzip2 (lzip -9). Decompression speed is intermediate between\n" + "gzip and bzip2. Lzip is better than gzip and bzip2 from a data recovery\n" + "perspective. Lzip has been designed, written, and tested with great care to\n" + "replace gzip and bzip2 as the standard general-purpose compressed format for\n" + "Unix-like systems.\n" "\nUsage: %s [options] [files]\n", invocation_name ); printf( "\nOptions:\n" " -h, --help display this help and exit\n" @@ -135,7 +136,7 @@ static void show_help( void ) " -a, --trailing-error exit with error status if trailing data\n" " -b, --member-size=<bytes> set member size limit in bytes\n" " -c, --stdout write to standard output, keep input files\n" - " -d, --decompress decompress\n" + " -d, --decompress decompress, test compressed file integrity\n" " -f, --force overwrite existing output files\n" " -F, --recompress force re-compression of compressed files\n" " -k, --keep keep (don't delete) input files\n" @@ -155,20 +156,20 @@ static void show_help( void ) "decompresses from standard input to standard output.\n" "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" - "Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12\n" - "to 2^29 bytes.\n" - "\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n" - "scale optimal for all files. If your files are large, very repetitive,\n" - "etc, you may need to use the options --dictionary-size and --match-length\n" - "directly to achieve optimal performance.\n" + "Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12 to\n" + "2^29 bytes.\n" + "\nThe bidimensional parameter space of LZMA can't be mapped to a linear scale\n" + "optimal for all files. If your files are large, very repetitive, etc, you\n" + "may need to use the options --dictionary-size and --match-length directly\n" + "to achieve optimal performance.\n" "\nTo extract all the files from archive 'foo.tar.lz', use the commands\n" "'tar -xf foo.tar.lz' or 'minilzip -cd foo.tar.lz | tar -xf -'.\n" - "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" - "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" - "invalid input file, 3 for an internal consistency error (e.g., bug) which\n" - "caused minilzip to panic.\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems\n" + "(file not found, invalid command-line options, I/O errors, etc), 2 to\n" + "indicate a corrupt or invalid input file, 3 for an internal consistency\n" + "error (e.g., bug) which caused minilzip to panic.\n" "\nThe ideas embodied in lzlib are due to (at least) the following people:\n" - "Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the\n" + "Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrei Markov (for the\n" "definition of Markov chains), G.N.N. Martin (for the definition of range\n" "encoding), Igor Pavlov (for putting all the above together in LZMA), and\n" "Julian Seward (for bzip2's CLI).\n" @@ -177,11 +178,25 @@ static void show_help( void ) } +static void show_lzlib_version( void ) + { + printf( "Using lzlib %s\n", LZ_version() ); +#if !defined LZ_API_VERSION + fputs( "LZ_API_VERSION is not defined.\n", stdout ); +#elif LZ_API_VERSION >= 1012 + printf( "Using LZ_API_VERSION = %u\n", LZ_api_version() ); +#else + printf( "Compiled with LZ_API_VERSION = %u. " + "Using an unknown LZ_API_VERSION\n", LZ_API_VERSION ); +#endif + } + + static void show_version( void ) { printf( "%s %s\n", program_name, PROGVERSION ); printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); - printf( "Using lzlib %s\n", LZ_version() ); + show_lzlib_version(); printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n" "This is free software: you are free to change and redistribute it.\n" "There is NO WARRANTY, to the extent permitted by law.\n" ); @@ -234,18 +249,7 @@ static int check_lib() printf( "warning: LZ_API_VERSION != LZ_api_version() (%u vs %u)\n", LZ_API_VERSION, LZ_api_version() ); } #endif - if( verbosity >= 1 ) - { - printf( "Using lzlib %s\n", LZ_version() ); -#if !defined LZ_API_VERSION - fputs( "LZ_API_VERSION is not defined.\n", stdout ); -#elif LZ_API_VERSION >= 1012 - printf( "Using LZ_API_VERSION = %u\n", LZ_api_version() ); -#else - printf( "Compiled with LZ_API_VERSION = %u. " - "Using an unknown LZ_API_VERSION\n", LZ_API_VERSION ); -#endif - } + if( verbosity >= 1 ) show_lzlib_version(); return retval; } @@ -327,27 +331,26 @@ static void Pp_show_msg( struct Pretty_print * const pp, const char * const msg static void show_header( const unsigned dictionary_size ) { - enum { factor = 1024 }; - const char * const prefix[8] = - { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + enum { factor = 1024, n = 3 }; + const char * const prefix[n] = { "Ki", "Mi", "Gi" }; const char * p = ""; const char * np = " "; unsigned num = dictionary_size; bool exact = ( num % factor == 0 ); - int i; for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + int i; for( i = 0; i < n && ( num > 9999 || ( exact && num >= factor ) ); ++i ) { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; np = ""; } fprintf( stderr, "dict %s%4u %sB, ", np, num, p ); } -/* separate large numbers >= 100_000 in groups of 3 digits using '_' */ +/* separate numbers of 5 or more digits in groups of 3 digits using '_' */ static const char * format_num3( unsigned long long num ) { - const char * const si_prefix = "kMGTPEZY"; - const char * const binary_prefix = "KMGTPEZY"; - enum { buffers = 8, bufsize = 4 * sizeof (long long) }; + enum { buffers = 8, bufsize = 4 * sizeof num, n = 10 }; + const char * const si_prefix = "kMGTPEZYRQ"; + const char * const binary_prefix = "KMGTPEZYRQ"; static char buffer[buffers][bufsize]; /* circle of static buffers for printf */ static int current = 0; int i; @@ -357,15 +360,15 @@ static const char * format_num3( unsigned long long num ) if( num > 1024 ) { char prefix = 0; /* try binary first, then si */ - for( i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i ) + for( i = 0; i < n && num != 0 && num % 1024 == 0; ++i ) { num /= 1024; prefix = binary_prefix[i]; } if( prefix ) *(--p) = 'i'; else - for( i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i ) + for( i = 0; i < n && num != 0 && num % 1000 == 0; ++i ) { num /= 1000; prefix = si_prefix[i]; } if( prefix ) *(--p) = prefix; } - const bool split = num >= 100000; + const bool split = num >= 10000; for( i = 0; ; ) { @@ -376,6 +379,16 @@ static const char * format_num3( unsigned long long num ) } +void show_option_error( const char * const arg, const char * const msg, + const char * const option_name ) + { + if( verbosity >= 0 ) + fprintf( stderr, "%s: '%s': %s option '%s'.\n", + program_name, arg, msg, option_name ); + } + + +/* Recognized formats: <num>k, <num>Ki, <num>[MGTPEZYRQ][i] */ static unsigned long long getnum( const char * const arg, const char * const option_name, const unsigned long long llimit, @@ -385,12 +398,8 @@ static unsigned long long getnum( const char * const arg, errno = 0; unsigned long long result = strtoull( arg, &tail, 0 ); if( tail == arg ) - { - if( verbosity >= 0 ) - fprintf( stderr, "%s: Bad or missing numerical argument in " - "option '%s'.\n", program_name, option_name ); - exit( 1 ); - } + { show_option_error( arg, "Bad or missing numerical argument in", + option_name ); exit( 1 ); } if( !errno && tail[0] ) { @@ -399,6 +408,8 @@ static unsigned long long getnum( const char * const arg, int i; switch( tail[0] ) { + case 'Q': exponent = 10; break; + case 'R': exponent = 9; break; case 'Y': exponent = 8; break; case 'Z': exponent = 7; break; case 'E': exponent = 6; break; @@ -410,12 +421,8 @@ static unsigned long long getnum( const char * const arg, case 'k': if( factor == 1000 ) exponent = 1; break; } if( exponent <= 0 ) - { - if( verbosity >= 0 ) - fprintf( stderr, "%s: Bad multiplier in numerical argument of " - "option '%s'.\n", program_name, option_name ); - exit( 1 ); - } + { show_option_error( arg, "Bad multiplier in numerical argument of", + option_name ); exit( 1 ); } for( i = 0; i < exponent; ++i ) { if( ulimit / factor >= result ) result *= factor; @@ -426,8 +433,8 @@ static unsigned long long getnum( const char * const arg, if( errno ) { if( verbosity >= 0 ) - fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] " - "in option '%s'.\n", program_name, format_num3( llimit ), + fprintf( stderr, "%s: '%s': Value out of limits [%s,%s] in " + "option '%s'.\n", program_name, arg, format_num3( llimit ), format_num3( ulimit ), option_name ); exit( 1 ); } @@ -508,7 +515,7 @@ static void set_d_outname( const char * const name, const int eindex ) strcpy( output_filename, name ); strcat( output_filename, ".out" ); if( verbosity >= 1 ) - fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", + fprintf( stderr, "%s: %s: Can't guess original name -- using '%s'\n", program_name, name, output_filename ); } @@ -520,7 +527,7 @@ static int open_instream( const char * const name, struct stat * const in_statsp if( program_mode == m_compress && !recompress && eindex >= 0 ) { if( verbosity >= 0 ) - fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n", + fprintf( stderr, "%s: %s: Input file already has '%s' suffix.\n", program_name, name, known_extensions[eindex].from ); return -1; } @@ -537,9 +544,9 @@ static int open_instream( const char * const name, struct stat * const in_statsp if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) ) { if( verbosity >= 0 ) - fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", + fprintf( stderr, "%s: %s: Input file is not a regular file%s.\n", program_name, name, ( can_read && one_to_one ) ? - ",\n and neither '-c' nor '-o' were specified" : "" ); + ",\n and neither '-c' nor '-o' were specified" : "" ); close( infd ); infd = -1; } @@ -558,16 +565,12 @@ static bool open_outstream( const bool force, const bool protect ) outfd = open( output_filename, flags, outfd_mode ); if( outfd >= 0 ) delete_output_on_interrupt = true; - else if( verbosity >= 0 ) - { - if( errno == EEXIST ) - fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n", - program_name, output_filename ); - else - fprintf( stderr, "%s: Can't create output file '%s': %s\n", - program_name, output_filename, strerror( errno ) ); - } - return ( outfd >= 0 ); + else if( errno == EEXIST ) + show_file_error( output_filename, + "Output file already exists, skipping.", 0 ); + else + show_file_error( output_filename, "Can't create output file", errno ); + return outfd >= 0; } @@ -585,12 +588,10 @@ static void cleanup_and_fail( const int retval ) if( delete_output_on_interrupt ) { delete_output_on_interrupt = false; - if( verbosity >= 0 ) - fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n", - program_name, output_filename ); + show_file_error( output_filename, "Deleting output file, if it exists.", 0 ); if( outfd >= 0 ) { close( outfd ); outfd = -1; } if( remove( output_filename ) != 0 && errno != ENOENT ) - show_error( "WARNING: deletion of output file (apparently) failed.", 0, false ); + show_error( "warning: deletion of output file failed", errno, false ); } exit( retval ); } @@ -635,7 +636,7 @@ static void close_and_set_permissions( const struct stat * const in_statsp ) if( in_statsp ) { const mode_t mode = in_statsp->st_mode; - /* fchown will in many cases return with EPERM, which can be safely ignored. */ + /* fchown in many cases returns with EPERM, which can be safely ignored. */ if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 ) { if( fchmod( outfd, mode ) != 0 ) warning = true; } else @@ -644,10 +645,8 @@ static void close_and_set_permissions( const struct stat * const in_statsp ) warning = true; } if( close( outfd ) != 0 ) - { - show_error( "Error closing output file", errno, false ); - cleanup_and_fail( 1 ); - } + { show_file_error( output_filename, "Error closing output file", errno ); + cleanup_and_fail( 1 ); } outfd = -1; delete_output_on_interrupt = false; if( in_statsp ) @@ -658,7 +657,8 @@ static void close_and_set_permissions( const struct stat * const in_statsp ) if( utime( output_filename, &t ) != 0 ) warning = true; } if( warning && verbosity >= 1 ) - show_error( "Can't change output file attributes.", 0, false ); + show_file_error( output_filename, + "warning: can't change output file attributes", errno ); } @@ -1033,8 +1033,8 @@ static void internal_error( const char * const msg ) int main( const int argc, const char * const argv[] ) { - /* Mapping from gzip/bzip2 style 1..9 compression modes - to the corresponding LZMA compression modes. */ + /* Mapping from gzip/bzip2 style 0..9 compression levels to the + corresponding LZMA compression parameters. */ const struct Lzma_options option_mapping[] = { { 65535, 16 }, /* -0 (65535,16 chooses fast encoder) */ @@ -1139,7 +1139,7 @@ int main( const int argc, const char * const argv[] ) case 'V': show_version(); return 0; case opt_chk: return check_lib(); case opt_lt: loose_trailing = true; break; - default : internal_error( "uncaught option." ); + default: internal_error( "uncaught option." ); } } /* end process options */ @@ -1204,11 +1204,11 @@ int main( const int argc, const char * const argv[] ) int retval = 0; const bool one_to_one = !to_stdout && program_mode != m_test && !to_file; bool stdin_used = false; + struct stat in_stats; for( i = 0; i < num_filenames; ++i ) { const char * input_filename = ""; int infd; - struct stat in_stats; Pp_set_name( &pp, filenames[i] ); if( strcmp( filenames[i], "-" ) == 0 ) @@ -1225,7 +1225,7 @@ int main( const int argc, const char * const argv[] ) eindex, one_to_one, recompress ); if( infd < 0 ) { set_retval( &retval, 1 ); continue; } if( !check_tty_in( pp.name, infd, program_mode, &retval ) ) continue; - if( one_to_one ) /* open outfd after verifying infd */ + if( one_to_one ) /* open outfd after checking infd */ { if( program_mode == m_compress ) set_c_outname( input_filename, true, volume_size > 0 ); @@ -1238,7 +1238,7 @@ int main( const int argc, const char * const argv[] ) if( one_to_one && !check_tty_out( program_mode ) ) { set_retval( &retval, 1 ); return retval; } /* don't delete a tty */ - if( to_file && outfd < 0 ) /* open outfd after verifying infd */ + if( to_file && outfd < 0 ) /* open outfd after checking infd */ { if( program_mode == m_compress ) set_c_outname( default_output_filename, false, volume_size > 0 ); @@ -1257,8 +1257,8 @@ int main( const int argc, const char * const argv[] ) tmp = compress( member_size, volume_size, infd, &encoder_options, &pp, in_statsp ); else - tmp = decompress( infd, &pp, ignore_trailing, - loose_trailing, program_mode == m_test ); + tmp = decompress( infd, &pp, ignore_trailing, loose_trailing, + program_mode == m_test ); if( close( infd ) != 0 ) { show_file_error( pp.name, "Error closing input file", errno ); set_retval( &tmp, 1 ); } @@ -1273,7 +1273,9 @@ int main( const int argc, const char * const argv[] ) ( program_mode != m_compress || volume_size == 0 ) ) remove( input_filename ); } - if( delete_output_on_interrupt ) close_and_set_permissions( 0 ); /* -o */ + if( delete_output_on_interrupt ) /* -o */ + close_and_set_permissions( ( retval == 0 && !stdin_used && + filenames_given && num_filenames == 1 ) ? &in_stats : 0 ); else if( outfd >= 0 && close( outfd ) != 0 ) /* -c */ { show_error( "Error closing stdout", errno, false ); diff --git a/testsuite/check.sh b/testsuite/check.sh index e93697e..1c5daf7 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lzlib - Compression library for the lzip format -# Copyright (C) 2009-2022 Antonio Diaz Diaz. +# Copyright (C) 2009-2024 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute, and modify it. @@ -41,6 +41,7 @@ test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } "${LZIP}" --check-lib # just print warning [ $? != 2 ] || { test_failed $LINENO ; exit 2 ; } # unless bad lzlib.h + printf "testing lzlib-%s..." "$2" "${LZIP}" -fkqm4 in @@ -74,8 +75,9 @@ done [ ! -e out.lz ] || test_failed $LINENO "${LZIP}" -qf -S100k -o out in in [ $? = 1 ] || test_failed $LINENO +{ [ ! -e out ] && [ ! -e out.lz ] ; } || test_failed $LINENO # these are for code coverage -"${LZIP}" -cdt "${in_lz}" > out 2> /dev/null +"${LZIP}" -cdt "${in_lz}" 2> /dev/null [ $? = 1 ] || test_failed $LINENO "${LZIP}" -t -- nx_file.lz 2> /dev/null [ $? = 1 ] || test_failed $LINENO @@ -100,35 +102,35 @@ done printf "LZIP\001-.............................." | "${LZIP}" -t 2> /dev/null printf "LZIP\002-.............................." | "${LZIP}" -t 2> /dev/null printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null -rm -f out || framework_failure printf "\ntesting decompression..." for i in "${in_lz}" "${in_em}" "${testdir}"/test_sync.lz ; do "${LZIP}" -t "$i" || test_failed $LINENO "$i" - "${LZIP}" -d "$i" -o copy || test_failed $LINENO "$i" - cmp in copy || test_failed $LINENO "$i" - "${LZIP}" -cd "$i" > copy || test_failed $LINENO "$i" - cmp in copy || test_failed $LINENO "$i" - "${LZIP}" -d "$i" -o - > copy || test_failed $LINENO "$i" - cmp in copy || test_failed $LINENO "$i" - "${LZIP}" -d < "$i" > copy || test_failed $LINENO "$i" - cmp in copy || test_failed $LINENO "$i" - rm -f copy || framework_failure + "${LZIP}" -d "$i" -o out || test_failed $LINENO "$i" + cmp in out || test_failed $LINENO "$i" + "${LZIP}" -cd "$i" > out || test_failed $LINENO "$i" + cmp in out || test_failed $LINENO "$i" + "${LZIP}" -d "$i" -o - > out || test_failed $LINENO "$i" + cmp in out || test_failed $LINENO "$i" + "${LZIP}" -d < "$i" > out || test_failed $LINENO "$i" + cmp in out || test_failed $LINENO "$i" + rm -f out || framework_failure done -lines=$("${LZIP}" -tvv "${in_em}" 2>&1 | wc -l) || test_failed $LINENO +lines=`"${LZIP}" -tvv "${in_em}" 2>&1 | wc -l` || test_failed $LINENO [ "${lines}" -eq 8 ] || test_failed $LINENO "${lines}" -"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO -cat "${in_lz}" > copy.lz || framework_failure -"${LZIP}" -dk copy.lz || test_failed $LINENO -cmp in copy || test_failed $LINENO -cat fox > copy || framework_failure cat "${in_lz}" > out.lz || framework_failure +"${LZIP}" -dk out.lz || test_failed $LINENO +cmp in out || test_failed $LINENO rm -f out || framework_failure +"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO +cat fox > copy || framework_failure +cat "${in_lz}" > copy.lz || framework_failure "${LZIP}" -d copy.lz out.lz 2> /dev/null # skip copy, decompress out [ $? = 1 ] || test_failed $LINENO +[ ! -e out.lz ] || test_failed $LINENO cmp fox copy || test_failed $LINENO cmp in out || test_failed $LINENO "${LZIP}" -df copy.lz || test_failed $LINENO @@ -136,15 +138,15 @@ cmp in out || test_failed $LINENO cmp in copy || test_failed $LINENO rm -f copy out || framework_failure -cat "${in_lz}" > copy.lz || framework_failure -"${LZIP}" -d -S100k copy.lz || test_failed $LINENO # ignore -S -[ ! -e copy.lz ] || test_failed $LINENO -cmp in copy || test_failed $LINENO +cat "${in_lz}" > out.lz || framework_failure +"${LZIP}" -d -S100k out.lz || test_failed $LINENO # ignore -S +[ ! -e out.lz ] || test_failed $LINENO +cmp in out || test_failed $LINENO -printf "to be overwritten" > copy || framework_failure -"${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO -cmp in copy || test_failed $LINENO -rm -f out copy || framework_failure +printf "to be overwritten" > out || framework_failure +"${LZIP}" -df -o out < "${in_lz}" || test_failed $LINENO +cmp in out || test_failed $LINENO +rm -f out || framework_failure "${LZIP}" -d -o ./- "${in_lz}" || test_failed $LINENO cmp in ./- || test_failed $LINENO rm -f ./- || framework_failure @@ -153,67 +155,68 @@ cmp in ./- || test_failed $LINENO rm -f ./- || framework_failure cat "${in_lz}" > anyothername || framework_failure -"${LZIP}" -dv - anyothername - < "${in_lz}" > copy 2> /dev/null || +"${LZIP}" -dv - anyothername - < "${in_lz}" > out 2> /dev/null || test_failed $LINENO -cmp in copy || test_failed $LINENO +cmp in out || test_failed $LINENO cmp in anyothername.out || test_failed $LINENO -rm -f copy anyothername.out || framework_failure +rm -f out anyothername.out || framework_failure "${LZIP}" -tq in "${in_lz}" [ $? = 2 ] || test_failed $LINENO "${LZIP}" -tq nx_file.lz "${in_lz}" [ $? = 1 ] || test_failed $LINENO -"${LZIP}" -cdq in "${in_lz}" > copy +"${LZIP}" -cdq in "${in_lz}" > out [ $? = 2 ] || test_failed $LINENO -cat copy in | cmp in - || test_failed $LINENO # copy must be empty -"${LZIP}" -cdq nx_file.lz "${in_lz}" > copy +cat out in | cmp in - || test_failed $LINENO # out must be empty +"${LZIP}" -cdq nx_file.lz "${in_lz}" > out # skip nx_file, decompress in [ $? = 1 ] || test_failed $LINENO -cmp in copy || test_failed $LINENO -rm -f copy || framework_failure -cat "${in_lz}" > copy.lz || framework_failure +cmp in out || test_failed $LINENO +rm -f out || framework_failure +cat "${in_lz}" > out.lz || framework_failure for i in 1 2 3 4 5 6 7 ; do - printf "g" >> copy.lz || framework_failure - "${LZIP}" -atvvvv copy.lz "${in_lz}" 2> /dev/null + printf "g" >> out.lz || framework_failure + "${LZIP}" -atvvvv out.lz "${in_lz}" 2> /dev/null [ $? = 2 ] || test_failed $LINENO $i done -"${LZIP}" -dq in copy.lz +"${LZIP}" -dq in out.lz [ $? = 2 ] || test_failed $LINENO -[ -e copy.lz ] || test_failed $LINENO -[ ! -e copy ] || test_failed $LINENO +[ -e out.lz ] || test_failed $LINENO +[ ! -e out ] || test_failed $LINENO [ ! -e in.out ] || test_failed $LINENO -"${LZIP}" -dq nx_file.lz copy.lz +"${LZIP}" -dq nx_file.lz out.lz [ $? = 1 ] || test_failed $LINENO -[ ! -e copy.lz ] || test_failed $LINENO +[ ! -e out.lz ] || test_failed $LINENO [ ! -e nx_file ] || test_failed $LINENO -cmp in copy || test_failed $LINENO +cmp in out || test_failed $LINENO +rm -f out || framework_failure cat in in > in2 || framework_failure "${LZIP}" -t "${in_lz}" "${in_lz}" || test_failed $LINENO -"${LZIP}" -cd "${in_lz}" "${in_lz}" -o out > copy2 || test_failed $LINENO +"${LZIP}" -cd "${in_lz}" "${in_lz}" -o out > out2 || test_failed $LINENO [ ! -e out ] || test_failed $LINENO # override -o -cmp in2 copy2 || test_failed $LINENO -rm -f copy2 || framework_failure -"${LZIP}" -d "${in_lz}" "${in_lz}" -o copy2 || test_failed $LINENO -cmp in2 copy2 || test_failed $LINENO -rm -f copy2 || framework_failure - -cat "${in_lz}" "${in_lz}" > copy2.lz || framework_failure -printf "\ngarbage" >> copy2.lz || framework_failure -"${LZIP}" -tvvvv copy2.lz 2> /dev/null || test_failed $LINENO -"${LZIP}" -atq copy2.lz +cmp in2 out2 || test_failed $LINENO +rm -f out2 || framework_failure +"${LZIP}" -d "${in_lz}" "${in_lz}" -o out2 || test_failed $LINENO +cmp in2 out2 || test_failed $LINENO +rm -f out2 || framework_failure + +cat "${in_lz}" "${in_lz}" > out2.lz || framework_failure +printf "\ngarbage" >> out2.lz || framework_failure +"${LZIP}" -tvvvv out2.lz 2> /dev/null || test_failed $LINENO +"${LZIP}" -atq out2.lz [ $? = 2 ] || test_failed $LINENO -"${LZIP}" -atq < copy2.lz +"${LZIP}" -atq < out2.lz [ $? = 2 ] || test_failed $LINENO -"${LZIP}" -adkq copy2.lz +"${LZIP}" -adkq out2.lz [ $? = 2 ] || test_failed $LINENO -[ ! -e copy2 ] || test_failed $LINENO -"${LZIP}" -adkq -o copy2 < copy2.lz +[ ! -e out2 ] || test_failed $LINENO +"${LZIP}" -adkq -o out2 < out2.lz [ $? = 2 ] || test_failed $LINENO -[ ! -e copy2 ] || test_failed $LINENO -printf "to be overwritten" > copy2 || framework_failure -"${LZIP}" -df copy2.lz || test_failed $LINENO -cmp in2 copy2 || test_failed $LINENO -rm -f copy2 || framework_failure +[ ! -e out2 ] || test_failed $LINENO +printf "to be overwritten" > out2 || framework_failure +"${LZIP}" -df out2.lz || test_failed $LINENO +cmp in2 out2 || test_failed $LINENO +rm -f out2 || framework_failure printf "\ntesting compression..." @@ -221,14 +224,16 @@ printf "\ntesting compression..." [ ! -e out3.lz ] || test_failed $LINENO # override -o and -S "${LZIP}" -0f in in --output=copy2.lz || test_failed $LINENO "${LZIP}" -d copy2.lz -o out2 || test_failed $LINENO +[ -e copy2.lz ] || test_failed $LINENO cmp in2 out2 || test_failed $LINENO rm -f out2 copy2.lz || framework_failure -"${LZIP}" -cf "${in_lz}" > out 2> /dev/null # /dev/null is a tty on OS/2 +"${LZIP}" -cf "${in_lz}" > lzlz 2> /dev/null # /dev/null is a tty on OS/2 [ $? = 1 ] || test_failed $LINENO -"${LZIP}" -Fvvm36 -o - -s16 "${in_lz}" > out 2> /dev/null || test_failed $LINENO -"${LZIP}" -cd out | "${LZIP}" -d > copy || test_failed $LINENO -cmp in copy || test_failed $LINENO +"${LZIP}" -Fvvm36 -o - -s16 "${in_lz}" > lzlz 2> /dev/null || test_failed $LINENO +"${LZIP}" -cd lzlz | "${LZIP}" -d > out || test_failed $LINENO +cmp in out || test_failed $LINENO +rm -f lzlz out || framework_failure "${LZIP}" -0 -o ./- in || test_failed $LINENO "${LZIP}" -cd ./- | cmp in - || test_failed $LINENO @@ -240,10 +245,10 @@ rm -f ./- || framework_failure for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do "${LZIP}" -k -$i -s16 in || test_failed $LINENO $i - mv -f in.lz copy.lz || test_failed $LINENO $i - printf "garbage" >> copy.lz || framework_failure - "${LZIP}" -df copy.lz || test_failed $LINENO $i - cmp in copy || test_failed $LINENO $i + mv in.lz out.lz || test_failed $LINENO $i + printf "garbage" >> out.lz || framework_failure + "${LZIP}" -df out.lz || test_failed $LINENO $i + cmp in out || test_failed $LINENO $i "${LZIP}" -$i -s16 in -c > out || test_failed $LINENO $i "${LZIP}" -$i -s16 in -o o_out || test_failed $LINENO $i # don't add .lz @@ -259,13 +264,13 @@ for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do cmp in copy || test_failed $LINENO $i rm -f out.lz || framework_failure - printf "to be overwritten" > out || framework_failure # don't add .lz - "${LZIP}" -f -$i -s16 -o out < in || test_failed $LINENO $i + printf "to be overwritten" > out || framework_failure + "${LZIP}" -f -$i -s16 -o out < in || test_failed $LINENO $i # don't add .lz [ ! -e out.lz ] || test_failed $LINENO "${LZIP}" -df -o copy < out || test_failed $LINENO $i cmp in copy || test_failed $LINENO $i done -rm -f out out.lz || framework_failure +rm -f copy out || framework_failure cat in in in in in in in in > in8 || framework_failure "${LZIP}" -1s12 -S100k in8 || test_failed $LINENO @@ -337,12 +342,12 @@ printf "\ntesting bad input..." headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP' body='\001\014\000\203\377\373\377\377\300\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000$\000\000\000\000\000\000\000' -cat "${in_lz}" > int.lz -printf "LZIP${body}" >> int.lz +cat "${in_lz}" > int.lz || framework_failure +printf "LZIP${body}" >> int.lz || framework_failure if "${LZIP}" -tq int.lz ; then for header in ${headers} ; do - printf "${header}${body}" > int.lz # first member - "${LZIP}" -tq int.lz + printf "${header}${body}" > int.lz || framework_failure + "${LZIP}" -tq int.lz # first member [ $? = 2 ] || test_failed $LINENO ${header} "${LZIP}" -tq < int.lz [ $? = 2 ] || test_failed $LINENO ${header} @@ -354,9 +359,9 @@ if "${LZIP}" -tq int.lz ; then [ $? = 2 ] || test_failed $LINENO ${header} "${LZIP}" -cdq --loose-trailing int.lz > /dev/null [ $? = 2 ] || test_failed $LINENO ${header} - cat "${in_lz}" > int.lz - printf "${header}${body}" >> int.lz # trailing data - "${LZIP}" -tq int.lz + cat "${in_lz}" > int.lz || framework_failure + printf "${header}${body}" >> int.lz || framework_failure + "${LZIP}" -tq int.lz # trailing data [ $? = 2 ] || test_failed $LINENO ${header} "${LZIP}" -tq < int.lz [ $? = 2 ] || test_failed $LINENO ${header} @@ -403,15 +408,15 @@ if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && [ $? = 2 ] || test_failed $LINENO $i "${LZIP}" -tq < trunc.lz [ $? = 2 ] || test_failed $LINENO $i - "${LZIP}" -cdq trunc.lz > out + "${LZIP}" -cdq trunc.lz > /dev/null [ $? = 2 ] || test_failed $LINENO $i - "${LZIP}" -dq < trunc.lz > out + "${LZIP}" -dq < trunc.lz > /dev/null [ $? = 2 ] || test_failed $LINENO $i done else printf "\nwarning: skipping truncation test: 'dd' does not work on your system." fi -rm -f in2.lz in3.lz trunc.lz out || framework_failure +rm -f in2.lz in3.lz trunc.lz || framework_failure cat "${in_lz}" > ingin.lz || framework_failure printf "g" >> ingin.lz || framework_failure @@ -420,19 +425,19 @@ cat "${in_lz}" >> ingin.lz || framework_failure [ $? = 2 ] || test_failed $LINENO "${LZIP}" -atq < ingin.lz [ $? = 2 ] || test_failed $LINENO -"${LZIP}" -acdq ingin.lz > out +"${LZIP}" -acdq ingin.lz > /dev/null [ $? = 2 ] || test_failed $LINENO -"${LZIP}" -adq < ingin.lz > out +"${LZIP}" -adq < ingin.lz > /dev/null [ $? = 2 ] || test_failed $LINENO "${LZIP}" -t ingin.lz || test_failed $LINENO "${LZIP}" -t < ingin.lz || test_failed $LINENO -"${LZIP}" -cd ingin.lz > copy || test_failed $LINENO -cmp in copy || test_failed $LINENO -"${LZIP}" -d < ingin.lz > copy || test_failed $LINENO -cmp in copy || test_failed $LINENO +"${LZIP}" -cd ingin.lz > out || test_failed $LINENO +cmp in out || test_failed $LINENO +"${LZIP}" -d < ingin.lz > out || test_failed $LINENO +cmp in out || test_failed $LINENO "${FFEXAMPLE}" -d ingin.lz | cmp in - || test_failed $LINENO "${FFEXAMPLE}" -r ingin.lz | cmp in2 - || test_failed $LINENO -rm -f copy ingin.lz in2 out || framework_failure +rm -f in2 out ingin.lz || framework_failure echo if [ ${fail} = 0 ] ; then |