From e19c8398c1946bd4616c3fa1c095149462346d15 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 21 Feb 2022 17:16:13 +0100 Subject: Merging upstream version 1.13. Signed-off-by: Daniel Baumann --- ChangeLog | 17 +- INSTALL | 4 +- Makefile.in | 23 +- NEWS | 97 +--- README | 14 +- bbexample.c | 34 +- carg_parser.c | 110 +++-- carg_parser.h | 14 +- cbuffer.c | 10 +- configure | 16 +- decoder.c | 21 +- decoder.h | 123 +++-- doc/lzlib.info | 243 +++++----- doc/lzlib.texi | 244 +++++----- doc/minilzip.1 | 35 +- encoder.c | 127 +++--- encoder.h | 5 +- encoder_base.c | 23 +- encoder_base.h | 22 +- fast_encoder.c | 35 +- fast_encoder.h | 2 +- ffexample.c | 12 +- lzcheck.c | 11 +- lzip.h | 3 +- lzlib.c | 2 +- lzlib.h | 6 +- main.c | 1222 ------------------------------------------------- minilzip.c | 1290 ++++++++++++++++++++++++++++++++++++++++++++++++++++ testsuite/check.sh | 22 +- 29 files changed, 1935 insertions(+), 1852 deletions(-) delete mode 100644 main.c create mode 100644 minilzip.c diff --git a/ChangeLog b/ChangeLog index e38a855..8d7da96 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2022-01-23 Antonio Diaz Diaz + + * Version 1.13 released. + * Set variables AR and ARFLAGS from configure. + (Reported by Hoël Bézier). + * main.c: Rename to minilzip.c. + * minilzip.c (getnum): Show option name and valid range if error. + (check_lib): Check that LZ_API_VERSION and LZ_version_string match. + * Improve several descriptions in manual, '--help', and man page. + * lzlib.texi: Change GNU Texinfo category to 'Compression'. + (Reported by Alfred M. Szmidt). + 2021-01-02 Antonio Diaz Diaz * Version 1.12 released. @@ -48,7 +60,7 @@ * main.c: Compile on DOS with DJGPP. * lzlib.texi: Improve descriptions of '-0..-9', '-m', and '-s'. Document that 'LZ_(de)compress_finish' can be called repeatedly. - * configure: Accept appending to CFLAGS, 'CFLAGS+=OPTIONS'. + * configure: Accept appending to CFLAGS; 'CFLAGS+=OPTIONS'. * Makefile.in: Rename targets 'install-bin*' to 'install-lib*'. * Makefile.in: Targets 'install-bin*' now install minilzip. * INSTALL: Document use of CFLAGS+='-D __USE_MINGW_ANSI_STDIO'. @@ -57,6 +69,7 @@ * Version 1.10 released. * LZ_compress_finish now adjusts dictionary size for each member. + (Older versions can adjust dictionary size only once). * lzlib.c (LZ_decompress_read): Detect corrupt header with HD=3. * main.c: New option '--loose-trailing'. * main.c (main): Option '-S, --volume-size' now keeps input files. @@ -235,7 +248,7 @@ * Version 0.1 released. -Copyright (C) 2009-2021 Antonio Diaz Diaz. +Copyright (C) 2009-2022 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute, and diff --git a/INSTALL b/INSTALL index a9a870e..f3b0b94 100644 --- a/INSTALL +++ b/INSTALL @@ -1,7 +1,7 @@ Requirements ------------ You will need a C99 compiler. (gcc 3.3.6 or newer is recommended). -I use gcc 6.1.0 and 4.1.2, but the code should compile with any standards +I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -74,7 +74,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2009-2021 Antonio Diaz Diaz. +Copyright (C) 2009-2022 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/Makefile.in b/Makefile.in index 94e3770..81b404b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1,6 +1,5 @@ DISTNAME = $(pkgname)-$(pkgversion) -AR = ar INSTALL = install INSTALL_PROGRAM = $(INSTALL) -m 755 INSTALL_DATA = $(INSTALL) -m 644 @@ -9,7 +8,7 @@ LDCONFIG = /sbin/ldconfig SHELL = /bin/sh CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 -objs = carg_parser.o main.o +objs = carg_parser.o minilzip.o .PHONY : all install install-bin install-info install-man \ @@ -24,27 +23,27 @@ objs = carg_parser.o main.o all : $(progname_static) $(progname_shared) lib$(libname).a : lzlib.o - $(AR) -rcs $@ $< + $(AR) $(ARFLAGS) $@ $< lib$(libname).so.$(pkgversion) : lzlib_sh.o - $(CC) $(LDFLAGS) $(CFLAGS) -fpic -fPIC -shared -Wl,--soname=lib$(libname).so.$(soversion) -o $@ $< + $(CC) $(CFLAGS) $(LDFLAGS) -fpic -fPIC -shared -Wl,--soname=lib$(libname).so.$(soversion) -o $@ $< $(progname) : $(objs) lib$(libname).a - $(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(objs) lib$(libname).a + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs) lib$(libname).a $(progname)_shared : $(objs) lib$(libname).so.$(pkgversion) - $(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(objs) lib$(libname).so.$(pkgversion) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs) lib$(libname).so.$(pkgversion) bbexample : bbexample.o lib$(libname).a - $(CC) $(LDFLAGS) $(CFLAGS) -o $@ bbexample.o lib$(libname).a + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ bbexample.o lib$(libname).a ffexample : ffexample.o lib$(libname).a - $(CC) $(LDFLAGS) $(CFLAGS) -o $@ ffexample.o lib$(libname).a + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ ffexample.o lib$(libname).a lzcheck : lzcheck.o lib$(libname).a - $(CC) $(LDFLAGS) $(CFLAGS) -o $@ lzcheck.o lib$(libname).a + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ lzcheck.o lib$(libname).a -main.o : main.c +minilzip.o : minilzip.c $(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< lzlib_sh.o : lzlib.c @@ -60,7 +59,7 @@ $(objs) : Makefile carg_parser.o : carg_parser.h lzlib.o : Makefile $(lzdeps) lzlib_sh.o : Makefile $(lzdeps) -main.o : carg_parser.h lzlib.h +minilzip.o : carg_parser.h lzlib.h bbexample.o : Makefile lzlib.h ffexample.o : Makefile lzlib.h lzcheck.o : Makefile lzlib.h @@ -76,7 +75,7 @@ $(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi man : $(VPATH)/doc/$(progname).1 $(VPATH)/doc/$(progname).1 : $(progname) - help2man -n 'reduces the size of files' -o $@ --no-info ./$(progname) + help2man -n 'reduces the size of files' -o $@ --info-page=$(pkgname) ./$(progname) Makefile : $(VPATH)/configure $(VPATH)/Makefile.in ./config.status diff --git a/NEWS b/NEWS index 7b01d52..e2ceb38 100644 --- a/NEWS +++ b/NEWS @@ -1,92 +1,15 @@ -Changes in version 1.12: +Changes in version 1.13: -The value of the version test macro 'LZ_API_VERSION' is now defined as -1000 * major + minor. For version 1.12 it is 1012. -This change does not affect the soversion. +The variables AR and ARFLAGS can now be set from configure. (Before you +needed to run 'make AR='. (Reported by Hoël Bézier). -The new function 'LZ_api_version', which returns the LZ_API_VERSION of the -library object code being used, has been added to lzlib. +In case of error in a numerical argument to a command line option, minilzip +now shows the name of the option and the range of valid values. -If end of file is found at member trailer or EOS marker, -'LZ_decompress_errno' now returns 'LZ_unexpected_eof' instead of -'LZ_data_error'. +'minilzip --check-lib' now checks that LZ_API_VERSION and LZ_version_string +match. -Decompression speed has been slightly increased. +Several descriptions have been improved in manual, '--help', and man page. -A bug has been fixed in minilzip that falsely reported a library stall when -decompressing a file with empty members. - -The new option '--check-lib', which compares the version of lzlib used to -compile minilzip with the version actually being used, has been added to -minilzip. - -Minilzip now reports an error if a file name is empty (minilzip -t ""). - -Option '-o, --output' now behaves like '-c, --stdout', but sending the -output unconditionally to a file instead of to standard output. See the new -description of '-o' in the manual. This change is not backwards compatible. -Therefore commands like: - minilzip -o foo.lz - bar < foo -must now be split into: - minilzip -o foo.lz - < foo - minilzip bar -or rewritten as: - minilzip - bar < foo > foo.lz - -When using '-c' or '-o', minilzip now checks whether the output is a -terminal only once. - -Minilzip now does not even open the output file if the input file is a terminal. - -The words 'decompressed' and 'compressed' have been replaced with the -shorter 'out' and 'in' in the verbose output of minilzip when decompressing -or testing. - -It has been documented in the manual that 'LZ_(de)compress_close' and -'LZ_(de)compress_errno' can be called with a null argument. - -It has been documented in the manual that the LZMA marker '3' ("Sync Flush" -marker) is not allowed in lzip files. Marker '3' is a device for interactive -communication between applications using lzlib, but is useless and wasteful -in a file, and is excluded from the media type 'application/lzip'. The LZMA -marker '2' ("End Of Stream" marker) is the only marker allowed in lzip -files. - -It has been documented in the manual that not calling 'LZ_decompress_finish' -prevents lzlib from detecting a truncated member. - -It has been documented in the manual that 'LZ_decompress_read' returns at -least once per member so that 'LZ_decompress_member_finished' can be called -(and trailer data retrieved) for each member, even for empty members. -Therefore, 'LZ_decompress_read' returning 0 does not mean that the end of -the stream has been reached. - -It has been documented in the manual that 'LZ_(de)compress_read' can be -called with a null buffer pointer argument. - -Real code examples for the most common uses of the library have been added -to the tutorial. - -'bbexample.c' has been simplified to not use 'LZ_(de)compress_write_size'. - -'lzcheck' now accepts options '-s' (to check LZ_compress_sync_flush) and -'-m' (to check member by member decompression). - -'lzcheck.c' now also tests member by member decompression without -intermediate calls to 'LZ_decompress_finish'. - -The new file 'ffexample.c', containing example functions for file-to-file -compression/decompression, has been added to the distribution. - -The commands needed to extract files from a tar.lz archive have been -documented in the output of 'minilzip --help' and in the man page. - -'make install-bin' no longer installs the minilzip man page. This is to -prevent 'make install-bin install-man-compress' from installing the man page -twice before compressing it. - -The new targets 'install-bin-compress' and 'install-bin-strip-compress', -which install a (stripped) minilzip and a compressed man page, have been -added to the Makefile. - -9 new test files have been added to the testsuite. +The texinfo category of the manual has been changed from 'Data Compression' +to 'Compression' to match that of gzip. (Reported by Alfred M. Szmidt). diff --git a/README b/README index ee6e6d7..86a0cb5 100644 --- a/README +++ b/README @@ -31,9 +31,13 @@ the beginning is a thing of the past. The functions and variables forming the interface of the compression library are declared in the file 'lzlib.h'. Usage examples of the library are given -in the files 'bbexample.c', 'ffexample.c', and 'main.c' from the source +in the files 'bbexample.c', 'ffexample.c', and 'minilzip.c' from the source distribution. +All the library functions are thread safe. The library does not install any +signal handler. The decoder checks the consistency of the compressed data, +so the library should never crash even in case of corrupted input. + Compression/decompression is done by repeatedly calling a couple of read/write functions until all the data have been processed by the library. This interface is safer and less error prone than the traditional zlib @@ -60,10 +64,6 @@ Lzlib is able to compress and decompress streams of unlimited size by automatically creating multimember output. The members so created are large, about 2 PiB each. -All the library functions are thread safe. The library does not install -any signal handler. The decoder checks the consistency of the compressed -data, so the library should never crash even in case of corrupted input. - In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a concrete algorithm; it is more like "any algorithm using the LZMA coding scheme". For example, the option '-0' of lzip uses the scheme in almost the @@ -73,7 +73,7 @@ finding coding sequences of minimum size than the one currently used by lzip could be developed, and the resulting sequence could also be coded using the LZMA coding scheme. -Lzlib currently implements two variants of the LZMA algorithm; fast (used by +Lzlib currently implements two variants of the LZMA algorithm: fast (used by option '-0' of minilzip) and normal (used by all other compression levels). The high compression of LZMA comes from combining two basic, well-proven @@ -93,7 +93,7 @@ been compressed. Decompressed is used to refer to data which have undergone the process of decompression. -Copyright (C) 2009-2021 Antonio Diaz Diaz. +Copyright (C) 2009-2022 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/bbexample.c b/bbexample.c index 96257ee..074f7ae 100644 --- a/bbexample.c +++ b/bbexample.c @@ -1,5 +1,5 @@ /* Buffer to buffer example - Test program for the library lzlib - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute, and modify it. @@ -28,9 +28,9 @@ #endif -/* Returns the address of a malloc'd buffer containing the file data and +/* Return the address of a malloc'd buffer containing the file data and the file size in '*file_sizep'. - In case of error, returns 0 and does not modify '*file_sizep'. + In case of error, return 0 and do not modify '*file_sizep'. */ uint8_t * read_file( const char * const name, long * const file_sizep ) { @@ -73,10 +73,10 @@ uint8_t * read_file( const char * const name, long * const file_sizep ) } -/* Compresses 'insize' bytes from 'inbuf'. - Returns the address of a malloc'd buffer containing the compressed data, +/* Compress 'insize' bytes from 'inbuf'. + Return the address of a malloc'd buffer containing the compressed data, and the size of the data in '*outlenp'. - In case of error, returns 0 and does not modify '*outlenp'. + In case of error, return 0 and do not modify '*outlenp'. */ uint8_t * bbcompressl( const uint8_t * const inbuf, const long insize, const int level, long * const outlenp ) @@ -152,10 +152,10 @@ uint8_t * bbcompressl( const uint8_t * const inbuf, const long insize, } -/* Decompresses 'insize' bytes from 'inbuf'. - Returns the address of a malloc'd buffer containing the decompressed +/* Decompress 'insize' bytes from 'inbuf'. + Return the address of a malloc'd buffer containing the decompressed data, and the size of the data in '*outlenp'. - In case of error, returns 0 and does not modify '*outlenp'. + In case of error, return 0 and do not modify '*outlenp'. */ uint8_t * bbdecompressl( const uint8_t * const inbuf, const long insize, long * const outlenp ) @@ -230,10 +230,10 @@ int full_test( const uint8_t * const inbuf, const long insize ) } -/* Compresses 'insize' bytes from 'inbuf' to 'outbuf'. - Returns the size of the compressed data in '*outlenp'. - In case of error, or if 'outsize' is too small, returns false and does - not modify '*outlenp'. +/* Compress 'insize' bytes from 'inbuf' to 'outbuf'. + Return the size of the compressed data in '*outlenp'. + In case of error, or if 'outsize' is too small, return false and do not + modify '*outlenp'. */ bool bbcompress( const uint8_t * const inbuf, const int insize, const int dictionary_size, const int match_len_limit, @@ -267,10 +267,10 @@ bool bbcompress( const uint8_t * const inbuf, const int insize, } -/* Decompresses 'insize' bytes from 'inbuf' to 'outbuf'. - Returns the size of the decompressed data in '*outlenp'. - In case of error, or if 'outsize' is too small, returns false and does - not modify '*outlenp'. +/* Decompress 'insize' bytes from 'inbuf' to 'outbuf'. + Return the size of the decompressed data in '*outlenp'. + In case of error, or if 'outsize' is too small, return false and do not + modify '*outlenp'. */ bool bbdecompress( const uint8_t * const inbuf, const int insize, uint8_t * const outbuf, const int outsize, diff --git a/carg_parser.c b/carg_parser.c index d0c05d5..181ba23 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2021 Antonio Diaz Diaz. + Copyright (C) 2006-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -32,10 +32,10 @@ static void * ap_resize_buffer( void * buf, const int min_size ) } -static char push_back_record( struct Arg_parser * const ap, - const int code, const char * const argument ) +static char push_back_record( struct Arg_parser * const ap, const int code, + const char * const long_name, + const char * const argument ) { - const int len = strlen( argument ); struct ap_Record * p; void * tmp = ap_resize_buffer( ap->data, ( ap->data_size + 1 ) * sizeof (struct ap_Record) ); @@ -43,11 +43,29 @@ static char push_back_record( struct Arg_parser * const ap, ap->data = (struct ap_Record *)tmp; p = &(ap->data[ap->data_size]); p->code = code; - p->argument = 0; - tmp = ap_resize_buffer( p->argument, len + 1 ); - if( !tmp ) return 0; - p->argument = (char *)tmp; - strncpy( p->argument, argument, len + 1 ); + if( long_name ) + { + const int len = strlen( long_name ); + p->parsed_name = (char *)malloc( len + 2 + 1 ); + if( !p->parsed_name ) return 0; + p->parsed_name[0] = p->parsed_name[1] = '-'; + strncpy( p->parsed_name + 2, long_name, len + 1 ); + } + else if( code > 0 && code < 256 ) + { + p->parsed_name = (char *)malloc( 2 + 1 ); + if( !p->parsed_name ) return 0; + p->parsed_name[0] = '-'; p->parsed_name[1] = code; p->parsed_name[2] = 0; + } + else p->parsed_name = 0; + if( argument ) + { + const int len = strlen( argument ); + p->argument = (char *)malloc( len + 1 ); + if( !p->argument ) { free( p->parsed_name ); return 0; } + strncpy( p->argument, argument, len + 1 ); + } + else p->argument = 0; ++ap->data_size; return 1; } @@ -68,12 +86,14 @@ static char add_error( struct Arg_parser * const ap, const char * const msg ) static void free_data( struct Arg_parser * const ap ) { int i; - for( i = 0; i < ap->data_size; ++i ) free( ap->data[i].argument ); + for( i = 0; i < ap->data_size; ++i ) + { free( ap->data[i].argument ); free( ap->data[i].parsed_name ); } if( ap->data ) { free( ap->data ); ap->data = 0; } ap->data_size = 0; } +/* Return 0 only if out of memory. */ static char parse_long_option( struct Arg_parser * const ap, const char * const opt, const char * const arg, const struct ap_Option options[], @@ -87,9 +107,10 @@ static char parse_long_option( struct Arg_parser * const ap, /* Test all long options for either exact match or abbreviated matches. */ for( i = 0; options[i].code != 0; ++i ) - if( options[i].name && strncmp( options[i].name, &opt[2], len ) == 0 ) + if( options[i].long_name && + strncmp( options[i].long_name, &opt[2], len ) == 0 ) { - if( strlen( options[i].name ) == len ) /* Exact match found */ + if( strlen( options[i].long_name ) == len ) /* Exact match found */ { index = i; exact = 1; break; } else if( index < 0 ) index = i; /* First nonexact match found */ else if( options[index].code != options[i].code || @@ -117,35 +138,39 @@ static char parse_long_option( struct Arg_parser * const ap, { if( options[index].has_arg == ap_no ) { - add_error( ap, "option '--" ); add_error( ap, options[index].name ); + add_error( ap, "option '--" ); add_error( ap, options[index].long_name ); add_error( ap, "' doesn't allow an argument" ); return 1; } if( options[index].has_arg == ap_yes && !opt[len+3] ) { - add_error( ap, "option '--" ); add_error( ap, options[index].name ); + add_error( ap, "option '--" ); add_error( ap, options[index].long_name ); add_error( ap, "' requires an argument" ); return 1; } - return push_back_record( ap, options[index].code, &opt[len+3] ); + return push_back_record( ap, options[index].code, + options[index].long_name, &opt[len+3] ); } if( options[index].has_arg == ap_yes ) { if( !arg || !arg[0] ) { - add_error( ap, "option '--" ); add_error( ap, options[index].name ); + add_error( ap, "option '--" ); add_error( ap, options[index].long_name ); add_error( ap, "' requires an argument" ); return 1; } ++*argindp; - return push_back_record( ap, options[index].code, arg ); + return push_back_record( ap, options[index].code, + options[index].long_name, arg ); } - return push_back_record( ap, options[index].code, "" ); + return push_back_record( ap, options[index].code, + options[index].long_name, 0 ); } +/* Return 0 only if out of memory. */ static char parse_short_option( struct Arg_parser * const ap, const char * const opt, const char * const arg, const struct ap_Option options[], @@ -156,13 +181,13 @@ static char parse_short_option( struct Arg_parser * const ap, while( cind > 0 ) { int index = -1, i; - const unsigned char code = opt[cind]; + const unsigned char c = opt[cind]; char code_str[2]; - code_str[0] = code; code_str[1] = 0; + code_str[0] = c; code_str[1] = 0; - if( code != 0 ) + if( c != 0 ) for( i = 0; options[i].code; ++i ) - if( code == options[i].code ) + if( c == options[i].code ) { index = i; break; } if( index < 0 ) @@ -176,7 +201,7 @@ static char parse_short_option( struct Arg_parser * const ap, if( options[index].has_arg != ap_no && cind > 0 && opt[cind] ) { - if( !push_back_record( ap, code, &opt[cind] ) ) return 0; + if( !push_back_record( ap, c, 0, &opt[cind] ) ) return 0; ++*argindp; cind = 0; } else if( options[index].has_arg == ap_yes ) @@ -188,9 +213,9 @@ static char parse_short_option( struct Arg_parser * const ap, return 1; } ++*argindp; cind = 0; - if( !push_back_record( ap, code, arg ) ) return 0; + if( !push_back_record( ap, c, 0, arg ) ) return 0; } - else if( !push_back_record( ap, code, "" ) ) return 0; + else if( !push_back_record( ap, c, 0, 0 ) ) return 0; } return 1; } @@ -203,7 +228,7 @@ char ap_init( struct Arg_parser * const ap, const char ** non_options = 0; /* skipped non-options */ int non_options_size = 0; /* number of skipped non-options */ int argind = 1; /* index in argv */ - int i; + char done = 0; /* false until success */ ap->data = 0; ap->error = 0; @@ -223,20 +248,20 @@ char ap_init( struct Arg_parser * const ap, if( ch2 == '-' ) { if( !argv[argind][2] ) { ++argind; break; } /* we found "--" */ - else if( !parse_long_option( ap, opt, arg, options, &argind ) ) return 0; + else if( !parse_long_option( ap, opt, arg, options, &argind ) ) goto out; } - else if( !parse_short_option( ap, opt, arg, options, &argind ) ) return 0; + else if( !parse_short_option( ap, opt, arg, options, &argind ) ) goto out; if( ap->error ) break; } else { if( in_order ) - { if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; } + { if( !push_back_record( ap, 0, 0, argv[argind++] ) ) goto out; } else { void * tmp = ap_resize_buffer( non_options, ( non_options_size + 1 ) * sizeof *non_options ); - if( !tmp ) return 0; + if( !tmp ) goto out; non_options = (const char **)tmp; non_options[non_options_size++] = argv[argind++]; } @@ -245,13 +270,15 @@ char ap_init( struct Arg_parser * const ap, if( ap->error ) free_data( ap ); else { + int i; for( i = 0; i < non_options_size; ++i ) - if( !push_back_record( ap, 0, non_options[i] ) ) return 0; + if( !push_back_record( ap, 0, 0, non_options[i] ) ) goto out; while( argind < argc ) - if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; + if( !push_back_record( ap, 0, 0, argv[argind++] ) ) goto out; } - if( non_options ) free( non_options ); - return 1; + done = 1; +out: if( non_options ) free( non_options ); + return done; } @@ -273,13 +300,20 @@ int ap_arguments( const struct Arg_parser * const ap ) int ap_code( const struct Arg_parser * const ap, const int i ) { - if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].code; - else return 0; + if( i < 0 || i >= ap_arguments( ap ) ) return 0; + return ap->data[i].code; + } + + +const char * ap_parsed_name( const struct Arg_parser * const ap, const int i ) + { + if( i < 0 || i >= ap_arguments( ap ) || !ap->data[i].parsed_name ) return ""; + return ap->data[i].parsed_name; } const char * ap_argument( const struct Arg_parser * const ap, const int i ) { - if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].argument; - else return ""; + if( i < 0 || i >= ap_arguments( ap ) || !ap->data[i].argument ) return ""; + return ap->data[i].argument; } diff --git a/carg_parser.h b/carg_parser.h index c5f2352..0c64861 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2021 Antonio Diaz Diaz. + Copyright (C) 2006-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -24,9 +24,9 @@ message. 'options' is an array of 'struct ap_Option' terminated by an element - containing a code which is zero. A null name means a short-only - option. A code value outside the unsigned char range means a - long-only option. + containing a code which is zero. A null long_name means a short-only + option. A code value outside the unsigned char range means a long-only + option. Arg_parser normally makes it appear as if all the option arguments were specified before all the non-option arguments for the purposes @@ -50,7 +50,7 @@ enum ap_Has_arg { ap_no, ap_yes, ap_maybe }; struct ap_Option { int code; /* Short option letter or code ( code != 0 ) */ - const char * name; /* Long option name (maybe null) */ + const char * long_name; /* Long option name (maybe null) */ enum ap_Has_arg has_arg; }; @@ -58,6 +58,7 @@ struct ap_Option struct ap_Record { int code; + char * parsed_name; char * argument; }; @@ -86,6 +87,9 @@ int ap_arguments( const struct Arg_parser * const ap ); Else ap_argument( i ) is the option's argument (or empty). */ int ap_code( const struct Arg_parser * const ap, const int i ); +/* Full name of the option parsed (short or long). */ +const char * ap_parsed_name( const struct Arg_parser * const ap, const int i ); + const char * ap_argument( const struct Arg_parser * const ap, const int i ); #ifdef __cplusplus diff --git a/cbuffer.c b/cbuffer.c index ee54131..812de42 100644 --- a/cbuffer.c +++ b/cbuffer.c @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -76,9 +76,9 @@ static bool Cb_unread_data( struct Circular_buffer * const cb, } -/* Copies up to 'out_size' bytes to 'out_buffer' and updates 'get'. +/* Copy up to 'out_size' bytes to 'out_buffer' and update 'get'. If 'out_buffer' is null, the bytes are discarded. - Returns the number of bytes copied or discarded. + Return the number of bytes copied or discarded. */ static unsigned Cb_read_data( struct Circular_buffer * const cb, uint8_t * const out_buffer, @@ -110,8 +110,8 @@ static unsigned Cb_read_data( struct Circular_buffer * const cb, } -/* Copies up to 'in_size' bytes from 'in_buffer' and updates 'put'. - Returns the number of bytes copied. +/* Copy up to 'in_size' bytes from 'in_buffer' and update 'put'. + Return the number of bytes copied. */ static unsigned Cb_write_data( struct Circular_buffer * const cb, const uint8_t * const in_buffer, diff --git a/configure b/configure index d74cf5d..4060472 100755 --- a/configure +++ b/configure @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lzlib - Compression library for the lzip format -# Copyright (C) 2009-2021 Antonio Diaz Diaz. +# Copyright (C) 2009-2022 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute, and modify it. pkgname=lzlib -pkgversion=1.12 +pkgversion=1.13 soversion=1 progname=minilzip progname_static=${progname} @@ -29,9 +29,11 @@ infodir='$(datarootdir)/info' libdir='$(exec_prefix)/lib' mandir='$(datarootdir)/man' CC=gcc +AR=ar CPPFLAGS= CFLAGS='-Wall -W -O2' LDFLAGS= +ARFLAGS=-rcs # checking whether we are using GNU C. /bin/sh -c "${CC} --version" > /dev/null 2>&1 || { CC=cc ; CFLAGS=-O2 ; } @@ -79,10 +81,12 @@ while [ $# != 0 ] ; do echo " --enable-shared build also a shared library [disable]" echo " --disable-ldconfig don't run ldconfig after install" echo " CC=COMPILER C compiler to use [${CC}]" + echo " AR=ARCHIVER library archiver to use [${AR}]" echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]" echo " CFLAGS=OPTIONS command line options for the C compiler [${CFLAGS}]" echo " CFLAGS+=OPTIONS append options to the current value of CFLAGS" echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]" + echo " ARFLAGS=OPTIONS command line options for the library archiver [${ARFLAGS}]" echo exit 0 ;; --version | -V) @@ -118,10 +122,12 @@ while [ $# != 0 ] ; do --disable-ldconfig) disable_ldconfig=yes ;; CC=*) CC=${optarg} ;; + AR=*) AR=${optarg} ;; CPPFLAGS=*) CPPFLAGS=${optarg} ;; CFLAGS=*) CFLAGS=${optarg} ;; CFLAGS+=*) CFLAGS="${CFLAGS} ${optarg}" ;; LDFLAGS=*) LDFLAGS=${optarg} ;; + ARFLAGS=*) ARFLAGS=${optarg} ;; --*) echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; @@ -189,13 +195,15 @@ echo "infodir = ${infodir}" echo "libdir = ${libdir}" echo "mandir = ${mandir}" echo "CC = ${CC}" +echo "AR = ${AR}" echo "CPPFLAGS = ${CPPFLAGS}" echo "CFLAGS = ${CFLAGS}" echo "LDFLAGS = ${LDFLAGS}" +echo "ARFLAGS = ${ARFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lzlib - Compression library for the lzip format -# Copyright (C) 2009-2021 Antonio Diaz Diaz. +# Copyright (C) 2009-2022 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission @@ -220,9 +228,11 @@ infodir = ${infodir} libdir = ${libdir} mandir = ${mandir} CC = ${CC} +AR = ${AR} CPPFLAGS = ${CPPFLAGS} CFLAGS = ${CFLAGS} LDFLAGS = ${LDFLAGS} +ARFLAGS = ${ARFLAGS} EOF cat "${srcdir}/Makefile.in" >> Makefile diff --git a/decoder.c b/decoder.c index 82472b3..16f6532 100644 --- a/decoder.c +++ b/decoder.c @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -49,8 +49,6 @@ static int LZd_decode_member( struct LZ_decoder * const d ) while( !Rd_finished( rdec ) ) { - int len; - const int pos_state = LZd_data_position( d ) & pos_state_mask; /* const unsigned mpos = rdec->member_position; if( mpos - old_mpos > rd_min_available_bytes ) return 5; old_mpos = mpos; */ @@ -58,23 +56,19 @@ static int LZd_decode_member( struct LZ_decoder * const d ) { if( !rdec->at_stream_end ) return 0; if( Cb_empty( &rdec->cb ) ) break; } /* decode until EOF */ if( !LZd_enough_free_bytes( d ) ) return 0; + const int pos_state = LZd_data_position( d ) & pos_state_mask; if( Rd_decode_bit( rdec, &d->bm_match[*state][pos_state] ) == 0 ) /* 1st bit */ { /* literal byte */ Bit_model * const bm = d->bm_literal[get_lit_state(LZd_peek_prev( d ))]; - if( St_is_char( *state ) ) - { - *state -= ( *state < 4 ) ? *state : 3; + if( ( *state = St_set_char( *state ) ) < 4 ) LZd_put_byte( d, Rd_decode_tree8( rdec, bm ) ); - } else - { - *state -= ( *state < 10 ) ? 3 : 6; LZd_put_byte( d, Rd_decode_matched( rdec, bm, LZd_peek( d, d->rep0 ) ) ); - } continue; } /* match or repeated match */ + int len; if( Rd_decode_bit( rdec, &d->bm_rep[*state] ) != 0 ) /* 2nd bit */ { if( Rd_decode_bit( rdec, &d->bm_rep0[*state] ) == 0 ) /* 3rd bit */ @@ -100,13 +94,12 @@ static int LZd_decode_member( struct LZ_decoder * const d ) d->rep0 = distance; } *state = St_set_rep( *state ); - len = min_match_len + Rd_decode_len( rdec, &d->rep_len_model, pos_state ); + len = Rd_decode_len( rdec, &d->rep_len_model, pos_state ); } else /* match */ { - unsigned distance; - len = min_match_len + Rd_decode_len( rdec, &d->match_len_model, pos_state ); - distance = Rd_decode_tree6( rdec, d->bm_dis_slot[get_len_state(len)] ); + len = Rd_decode_len( rdec, &d->match_len_model, pos_state ); + unsigned distance = Rd_decode_tree6( rdec, d->bm_dis_slot[get_len_state(len)] ); if( distance >= start_dis_model ) { const unsigned dis_slot = distance; diff --git a/decoder.h b/decoder.h index 15d5dfb..27de9cb 100644 --- a/decoder.h +++ b/decoder.h @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -72,8 +72,8 @@ static inline void Rd_reset( struct Range_decoder * const rdec ) rdec->member_position = 0; rdec->at_stream_end = false; } -/* Seeks a member header and updates 'get'. '*skippedp' is set to the - number of bytes skipped. Returns true if it finds a valid header. +/* Seek for a member header and update 'get'. Set '*skippedp' to the number + of bytes skipped. Return true if a valid header is found. */ static bool Rd_find_header( struct Range_decoder * const rdec, unsigned * const skippedp ) @@ -140,8 +140,7 @@ static bool Rd_try_reload( struct Range_decoder * const rdec ) int i; rdec->reload_pending = false; rdec->code = 0; - for( i = 0; i < 5; ++i ) - rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); + for( i = 0; i < 5; ++i ) rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); rdec->range = 0xFFFFFFFFU; rdec->code &= rdec->range; /* make sure that first byte is discarded */ } @@ -161,12 +160,11 @@ static inline unsigned Rd_decode( struct Range_decoder * const rdec, int i; for( i = num_bits; i > 0; --i ) { - bool bit; Rd_normalize( rdec ); rdec->range >>= 1; /* symbol <<= 1; */ /* if( rdec->code >= rdec->range ) { rdec->code -= rdec->range; symbol |= 1; } */ - bit = ( rdec->code >= rdec->range ); + const bool bit = ( rdec->code >= rdec->range ); symbol <<= 1; symbol += bit; rdec->code -= rdec->range & ( 0U - bit ); } @@ -176,42 +174,75 @@ static inline unsigned Rd_decode( struct Range_decoder * const rdec, static inline unsigned Rd_decode_bit( struct Range_decoder * const rdec, Bit_model * const probability ) { - uint32_t bound; Rd_normalize( rdec ); - bound = ( rdec->range >> bit_model_total_bits ) * *probability; + const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability; if( rdec->code < bound ) { - *probability += (bit_model_total - *probability) >> bit_model_move_bits; rdec->range = bound; + *probability += ( bit_model_total - *probability ) >> bit_model_move_bits; return 0; } else { - *probability -= *probability >> bit_model_move_bits; rdec->code -= bound; rdec->range -= bound; + *probability -= *probability >> bit_model_move_bits; return 1; } } -static inline unsigned Rd_decode_tree3( struct Range_decoder * const rdec, - Bit_model bm[] ) +static inline void Rd_decode_symbol_bit( struct Range_decoder * const rdec, + Bit_model * const probability, unsigned * symbol ) { - unsigned symbol = 2 | Rd_decode_bit( rdec, &bm[1] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - return symbol & 7; + Rd_normalize( rdec ); + *symbol <<= 1; + const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability; + if( rdec->code < bound ) + { + rdec->range = bound; + *probability += ( bit_model_total - *probability ) >> bit_model_move_bits; + } + else + { + rdec->code -= bound; + rdec->range -= bound; + *probability -= *probability >> bit_model_move_bits; + *symbol |= 1; + } + } + +static inline void Rd_decode_symbol_bit_reversed( struct Range_decoder * const rdec, + Bit_model * const probability, unsigned * model, + unsigned * symbol, const int i ) + { + Rd_normalize( rdec ); + *model <<= 1; + const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability; + if( rdec->code < bound ) + { + rdec->range = bound; + *probability += ( bit_model_total - *probability ) >> bit_model_move_bits; + } + else + { + rdec->code -= bound; + rdec->range -= bound; + *probability -= *probability >> bit_model_move_bits; + *model |= 1; + *symbol |= 1 << i; + } } static inline unsigned Rd_decode_tree6( struct Range_decoder * const rdec, Bit_model bm[] ) { - unsigned symbol = 2 | Rd_decode_bit( rdec, &bm[1] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + unsigned symbol = 1; + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); return symbol & 0x3F; } @@ -219,9 +250,14 @@ static inline unsigned Rd_decode_tree8( struct Range_decoder * const rdec, Bit_model bm[] ) { unsigned symbol = 1; - int i; - for( i = 0; i < 8; ++i ) - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); return symbol & 0xFF; } @@ -233,21 +269,19 @@ Rd_decode_tree_reversed( struct Range_decoder * const rdec, unsigned symbol = 0; int i; for( i = 0; i < num_bits; ++i ) - { - const unsigned bit = Rd_decode_bit( rdec, &bm[model] ); - model <<= 1; model += bit; - symbol |= ( bit << i ); - } + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, i ); return symbol; } static inline unsigned Rd_decode_tree_reversed4( struct Range_decoder * const rdec, Bit_model bm[] ) { - unsigned symbol = Rd_decode_bit( rdec, &bm[1] ); - symbol += Rd_decode_bit( rdec, &bm[2+symbol] ) << 1; - symbol += Rd_decode_bit( rdec, &bm[4+symbol] ) << 2; - symbol += Rd_decode_bit( rdec, &bm[8+symbol] ) << 3; + unsigned model = 1; + unsigned symbol = 0; + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 0 ); + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 1 ); + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 2 ); + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 3 ); return symbol; } @@ -270,11 +304,24 @@ static inline unsigned Rd_decode_len( struct Range_decoder * const rdec, struct Len_model * const lm, const int pos_state ) { + Bit_model * bm; + unsigned mask, offset, symbol = 1; + if( Rd_decode_bit( rdec, &lm->choice1 ) == 0 ) - return Rd_decode_tree3( rdec, lm->bm_low[pos_state] ); + { bm = lm->bm_low[pos_state]; mask = 7; offset = 0; goto len3; } if( Rd_decode_bit( rdec, &lm->choice2 ) == 0 ) - return len_low_symbols + Rd_decode_tree3( rdec, lm->bm_mid[pos_state] ); - return len_low_symbols + len_mid_symbols + Rd_decode_tree8( rdec, lm->bm_high ); + { bm = lm->bm_mid[pos_state]; mask = 7; offset = len_low_symbols; goto len3; } + bm = lm->bm_high; mask = 0xFF; offset = len_low_symbols + len_mid_symbols; + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); +len3: + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + return ( symbol & mask ) + min_match_len + offset; } diff --git a/doc/lzlib.info b/doc/lzlib.info index bef1859..d81bc88 100644 --- a/doc/lzlib.info +++ b/doc/lzlib.info @@ -1,6 +1,6 @@ This is lzlib.info, produced by makeinfo version 4.13+ from lzlib.texi. -INFO-DIR-SECTION Data Compression +INFO-DIR-SECTION Compression START-INFO-DIR-ENTRY * Lzlib: (lzlib). Compression library for the lzip format END-INFO-DIR-ENTRY @@ -11,7 +11,7 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir) Lzlib Manual ************ -This manual is for Lzlib (version 1.12, 2 January 2021). +This manual is for Lzlib (version 1.13, 23 January 2022). * Menu: @@ -30,7 +30,7 @@ This manual is for Lzlib (version 1.12, 2 January 2021). * Concept index:: Index of concepts - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute, and modify it. @@ -73,8 +73,12 @@ byte near the beginning is a thing of the past. The functions and variables forming the interface of the compression library are declared in the file 'lzlib.h'. Usage examples of the library -are given in the files 'bbexample.c', 'ffexample.c', and 'main.c' from the -source distribution. +are given in the files 'bbexample.c', 'ffexample.c', and 'minilzip.c' from +the source distribution. + + All the library functions are thread safe. The library does not install +any signal handler. The decoder checks the consistency of the compressed +data, so the library should never crash even in case of corrupted input. Compression/decompression is done by repeatedly calling a couple of read/write functions until all the data have been processed by the library. @@ -102,20 +106,16 @@ concatenated compressed data streams is also supported. automatically creating multimember output. The members so created are large, about 2 PiB each. - All the library functions are thread safe. The library does not install -any signal handler. The decoder checks the consistency of the compressed -data, so the library should never crash even in case of corrupted input. - In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a concrete algorithm; it is more like "any algorithm using the LZMA coding scheme". For example, the option '-0' of lzip uses the scheme in almost the simplest way possible; issuing the longest match it can find, or a literal byte if it can't find a match. Inversely, a much more elaborated way of -finding coding sequences of minimum size than the one currently used by -lzip could be developed, and the resulting sequence could also be coded -using the LZMA coding scheme. +finding coding sequences of minimum size than the one currently used by lzip +could be developed, and the resulting sequence could also be coded using the +LZMA coding scheme. - Lzlib currently implements two variants of the LZMA algorithm; fast + Lzlib currently implements two variants of the LZMA algorithm: fast (used by option '-0' of minilzip) and normal (used by all other compression levels). @@ -145,7 +145,8 @@ One goal of lzlib is to keep perfect backward compatibility with older versions of itself down to 1.0. Any application working with an older lzlib should work with a newer lzlib. Installing a newer lzlib should not break anything. This chapter describes the constants and functions that the -application can use to discover the version of the library being used. +application can use to discover the version of the library being used. All +of them are declared in 'lzlib.h'. -- Constant: LZ_API_VERSION This constant is defined in 'lzlib.h' and works as a version test @@ -325,13 +326,13 @@ except 'LZ_compress_open' whose return value must be verified by calling 'LZ_compress_sync_flush'. Then call 'LZ_compress_read' until it returns 0. - This function writes a LZMA marker '3' ("Sync Flush" marker) to the - compressed output. Note that the sync flush marker is not allowed in - lzip files; it is a device for interactive communication between - applications using lzlib, but is useless and wasteful in a file, and - is excluded from the media type 'application/lzip'. The LZMA marker - '2' ("End Of Stream" marker) is the only marker allowed in lzip files. - *Note Data format::. + This function writes at least one LZMA marker '3' ("Sync Flush" marker) + to the compressed output. Note that the sync flush marker is not + allowed in lzip files; it is a device for interactive communication + between applications using lzlib, but is useless and wasteful in a + file, and is excluded from the media type 'application/lzip'. The LZMA + marker '2' ("End Of Stream" marker) is the only marker allowed in lzip + files. *Note Data format::. Repeated use of 'LZ_compress_sync_flush' may degrade compression ratio, so use it only when needed. If the interval between calls to @@ -347,34 +348,30 @@ except 'LZ_compress_open' whose return value must be verified by calling -- Function: int LZ_compress_read ( struct LZ_Encoder * const ENCODER, uint8_t * const BUFFER, const int SIZE ) - The function 'LZ_compress_read' reads up to SIZE bytes from the stream - pointed to by ENCODER, storing the results in BUFFER. If - LZ_API_VERSION >= 1012, BUFFER may be a null pointer, in which case - the bytes read are discarded. - - The return value is the number of bytes actually read. This might be - less than SIZE; for example, if there aren't that many bytes left in - the stream or if more bytes have to be yet written with the function + Reads up to SIZE bytes from the stream pointed to by ENCODER, storing + the results in BUFFER. If LZ_API_VERSION >= 1012, BUFFER may be a null + pointer, in which case the bytes read are discarded. + + Returns the number of bytes actually read. This might be less than + SIZE; for example, if there aren't that many bytes left in the stream + or if more bytes have to be yet written with the function 'LZ_compress_write'. Note that reading less than SIZE bytes is not an error. -- Function: int LZ_compress_write ( struct LZ_Encoder * const ENCODER, uint8_t * const BUFFER, const int SIZE ) - The function 'LZ_compress_write' writes up to SIZE bytes from BUFFER - to the stream pointed to by ENCODER. - - The return value is the number of bytes actually written. This might be + Writes up to SIZE bytes from BUFFER to the stream pointed to by + ENCODER. Returns the number of bytes actually written. This might be less than SIZE. Note that writing less than SIZE bytes is not an error. -- Function: int LZ_compress_write_size ( struct LZ_Encoder * const ENCODER ) - The function 'LZ_compress_write_size' returns the maximum number of - bytes that can be immediately written through 'LZ_compress_write'. For - efficiency reasons, once the input buffer is full and - 'LZ_compress_write_size' returns 0, almost all the buffer must be - compressed before a size greater than 0 is returned again. (This is - done to minimize the amount of data that must be copied to the - beginning of the buffer before new data can be accepted). + Returns the maximum number of bytes that can be immediately written + through 'LZ_compress_write'. For efficiency reasons, once the input + buffer is full and 'LZ_compress_write_size' returns 0, almost all the + buffer must be compressed before a size greater than 0 is returned + again. (This is done to minimize the amount of data that must be + copied to the beginning of the buffer before new data can be accepted). It is guaranteed that an immediate call to 'LZ_compress_write' will accept a SIZE up to the returned number of bytes. @@ -472,14 +469,13 @@ except 'LZ_decompress_open' whose return value must be verified by calling -- Function: int LZ_decompress_read ( struct LZ_Decoder * const DECODER, uint8_t * const BUFFER, const int SIZE ) - The function 'LZ_decompress_read' reads up to SIZE bytes from the - stream pointed to by DECODER, storing the results in BUFFER. If - LZ_API_VERSION >= 1012, BUFFER may be a null pointer, in which case - the bytes read are discarded. - - The return value is the number of bytes actually read. This might be - less than SIZE; for example, if there aren't that many bytes left in - the stream or if more bytes have to be yet written with the function + Reads up to SIZE bytes from the stream pointed to by DECODER, storing + the results in BUFFER. If LZ_API_VERSION >= 1012, BUFFER may be a null + pointer, in which case the bytes read are discarded. + + Returns the number of bytes actually read. This might be less than + SIZE; for example, if there aren't that many bytes left in the stream + or if more bytes have to be yet written with the function 'LZ_decompress_write'. Note that reading less than SIZE bytes is not an error. @@ -499,18 +495,16 @@ except 'LZ_decompress_open' whose return value must be verified by calling -- Function: int LZ_decompress_write ( struct LZ_Decoder * const DECODER, uint8_t * const BUFFER, const int SIZE ) - The function 'LZ_decompress_write' writes up to SIZE bytes from BUFFER - to the stream pointed to by DECODER. - - The return value is the number of bytes actually written. This might be + Writes up to SIZE bytes from BUFFER to the stream pointed to by + DECODER. Returns the number of bytes actually written. This might be less than SIZE. Note that writing less than SIZE bytes is not an error. -- Function: int LZ_decompress_write_size ( struct LZ_Decoder * const DECODER ) - The function 'LZ_decompress_write_size' returns the maximum number of - bytes that can be immediately written through 'LZ_decompress_write'. - This number varies smoothly; each compressed byte consumed may be - overwritten immediately, increasing by 1 the value returned. + Returns the maximum number of bytes that can be immediately written + through 'LZ_decompress_write'. This number varies smoothly; each + compressed byte consumed may be overwritten immediately, increasing by + 1 the value returned. It is guaranteed that an immediate call to 'LZ_decompress_write' will accept a SIZE up to the returned number of bytes. @@ -530,24 +524,24 @@ except 'LZ_decompress_open' whose return value must be verified by calling -- Function: int LZ_decompress_member_finished ( struct LZ_Decoder * const DECODER ) Returns 1 if the previous call to 'LZ_decompress_read' finished reading - the current member, indicating that final values for member are + the current member, indicating that final values for the member are available through 'LZ_decompress_data_crc', 'LZ_decompress_data_position', and 'LZ_decompress_member_position'. Otherwise it returns 0. -- Function: int LZ_decompress_member_version ( struct LZ_Decoder * const DECODER ) - Returns the version of current member from member header. + Returns the version of the current member, read from the member header. -- Function: int LZ_decompress_dictionary_size ( struct LZ_Decoder * const DECODER ) - Returns the dictionary size of the current member, read from the member - header. + Returns the dictionary size of the current member, read from the + member header. -- Function: unsigned LZ_decompress_data_crc ( struct LZ_Decoder * const DECODER ) Returns the 32 bit Cyclic Redundancy Check of the data decompressed - from the current member. The returned value is valid only when + from the current member. The value returned is valid only when 'LZ_decompress_member_finished' returns 1. -- Function: unsigned long long LZ_decompress_data_position ( struct @@ -650,13 +644,14 @@ compatible with lzip 1.4 or newer. Lzip is a lossless data compressor with a user interface similar to the one of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov -chain-Algorithm' (LZMA) stream format, chosen to maximize safety and -interoperability. Lzip can compress about as fast as gzip (lzip -0) or -compress most files more than bzip2 (lzip -9). Decompression speed is -intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 -from a data recovery perspective. Lzip has been designed, written, and -tested with great care to replace gzip and bzip2 as the standard -general-purpose compressed format for unix-like systems. +chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity +checking to maximize interoperability and optimize safety. Lzip can compress +about as fast as gzip (lzip -0) or compress most files more than bzip2 +(lzip -9). Decompression speed is intermediate between gzip and bzip2. Lzip +is better than gzip and bzip2 from a data recovery perspective. Lzip has +been designed, written, and tested with great care to replace gzip and +bzip2 as the standard general-purpose compressed format for unix-like +systems. The format for running minilzip is: @@ -705,10 +700,13 @@ once, the first time it appears in the command line. '-d' '--decompress' - Decompress the files specified. If a file does not exist or can't be - opened, minilzip continues decompressing the rest of the files. If a - file fails to decompress, or is a terminal, minilzip exits immediately - without decompressing the rest of the files. + Decompress the files specified. If a file does not exist, can't be + opened, or the destination file already exists and '--force' has not + been specified, minilzip continues decompressing the rest of the files + and exits with error status 1. If a file fails to decompress, or is a + terminal, minilzip exits immediately with error status 2 without + decompressing the rest of the files. A terminal is considered an + uncompressed file, and therefore invalid. '-f' '--force' @@ -831,12 +829,14 @@ once, the first time it appears in the command line. '--check-lib' Compare the version of lzlib used to compile minilzip with the version - actually being used and exit. Report any differences found. Exit with - error status 1 if differences are found. A mismatch may indicate that - lzlib is not correctly installed or that a different version of lzlib - has been installed after compiling the shared version of minilzip. - 'minilzip -v --check-lib' shows the version of lzlib being used and - the value of 'LZ_API_VERSION' (if defined). *Note Library version::. + actually being used at run time and exit. Report any differences + found. Exit with error status 1 if differences are found. A mismatch + may indicate that lzlib is not correctly installed or that a different + version of lzlib has been installed after compiling the shared version + of minilzip. Exit with error status 2 if LZ_API_VERSION and + LZ_version_string don't match. 'minilzip -v --check-lib' shows the + version of lzlib being used and the value of LZ_API_VERSION (if + defined). *Note Library version::. Numbers given as arguments to options may be followed by a multiplier @@ -857,7 +857,7 @@ Y yottabyte (10^24) | Yi yobibyte (2^80) Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid -input file, 3 for an internal consistency error (eg, bug) which caused +input file, 3 for an internal consistency error (e.g., bug) which caused minilzip to panic.  @@ -886,9 +886,11 @@ when there is no longer anything to take away. represents a variable number of bytes. - A lzip data stream consists of a series of "members" (compressed data + Lzip data consist of a series of independent "members" (compressed data sets). The members simply appear one after another in the data stream, with -no additional information before, between, or after them. +no additional information before, between, or after them. Each member can +encode in compressed form up to 16 EiB - 1 byte of uncompressed data. The +size of a multimember data stream is unlimited. Each member has the following structure: @@ -916,7 +918,7 @@ no additional information before, between, or after them. Valid values for dictionary size range from 4 KiB to 512 MiB. 'LZMA stream' - The LZMA stream, finished by an end of stream marker. Uses default + The LZMA stream, finished by an "End Of Stream" marker. Uses default values for encoder properties. *Note Stream format: (lzip)Stream format, for a complete description. Lzip only uses the LZMA marker '2' ("End Of Stream" marker). Lzlib @@ -924,16 +926,17 @@ no additional information before, between, or after them. sync_flush::. 'CRC32 (4 bytes)' - Cyclic Redundancy Check (CRC) of the uncompressed original data. + Cyclic Redundancy Check (CRC) of the original uncompressed data. 'Data size (8 bytes)' - Size of the uncompressed original data. + Size of the original uncompressed data. 'Member size (8 bytes)' Total size of the member, including header and trailer. This field acts as a distributed index, allows the verification of stream integrity, - and facilitates safe recovery of undamaged members from multimember - files. + and facilitates the safe recovery of undamaged members from + multimember files. Member size should be limited to 2 PiB to prevent + the data size field from overflowing.  @@ -967,10 +970,10 @@ File: lzlib.info, Node: Buffer compression, Next: Buffer decompression, Up: E Buffer-to-buffer single-member compression (MEMBER_SIZE > total output). -/* Compresses 'insize' bytes from 'inbuf' to 'outbuf'. - Returns the size of the compressed data in '*outlenp'. - In case of error, or if 'outsize' is too small, returns false and does - not modify '*outlenp'. +/* Compress 'insize' bytes from 'inbuf' to 'outbuf'. + Return the size of the compressed data in '*outlenp'. + In case of error, or if 'outsize' is too small, return false and do not + modify '*outlenp'. */ bool bbcompress( const uint8_t * const inbuf, const int insize, const int dictionary_size, const int match_len_limit, @@ -1011,10 +1014,10 @@ File: lzlib.info, Node: Buffer decompression, Next: File compression, Prev: B Buffer-to-buffer decompression. -/* Decompresses 'insize' bytes from 'inbuf' to 'outbuf'. - Returns the size of the decompressed data in '*outlenp'. - In case of error, or if 'outsize' is too small, returns false and does - not modify '*outlenp'. +/* Decompress 'insize' bytes from 'inbuf' to 'outbuf'. + Return the size of the decompressed data in '*outlenp'. + In case of error, or if 'outsize' is too small, return false and do not + modify '*outlenp'. */ bool bbdecompress( const uint8_t * const inbuf, const int insize, uint8_t * const outbuf, const int outsize, @@ -1159,9 +1162,9 @@ int ffmmcompress( FILE * const infile, FILE * const outfile ) Example 2: Multimember compression (user-restarted members). (Call LZ_compress_open with MEMBER_SIZE > largest member). -/* Compresses 'infile' to 'outfile' as a multimember stream with one member +/* Compress 'infile' to 'outfile' as a multimember stream with one member for each line of text terminated by a newline character or by EOF. - Returns 0 if success, 1 if error. + Return 0 if success, 1 if error. */ int fflfcompress( struct LZ_Encoder * const encoder, FILE * const infile, FILE * const outfile ) @@ -1205,7 +1208,7 @@ File: lzlib.info, Node: Skipping data errors, Prev: File compression mm, Up: 11.6 Skipping data errors ========================= -/* Decompresses 'infile' to 'outfile' with automatic resynchronization to +/* Decompress 'infile' to 'outfile' with automatic resynchronization to next member in case of data error, including the automatic removal of leading garbage. */ @@ -1253,7 +1256,7 @@ eternity, if not longer. If you find a bug in lzlib, please send electronic mail to . Include the version number, which you can find by -running 'minilzip --version' or in 'LZ_version_string' from 'lzlib.h'. +running 'minilzip --version' and 'minilzip -v --check-lib'.  File: lzlib.info, Node: Concept index, Prev: Problems, Up: Top @@ -1288,29 +1291,29 @@ Concept index  Tag Table: -Node: Top220 -Node: Introduction1342 +Node: Top215 +Node: Introduction1338 Node: Library version6413 -Node: Buffering8918 -Node: Parameter limits10143 -Node: Compression functions11097 -Ref: member_size12907 -Ref: sync_flush14673 -Node: Decompression functions19493 -Node: Error codes27187 -Node: Error messages29478 -Node: Invoking minilzip30057 -Node: Data format39651 -Ref: coded-dict-size40957 -Node: Examples42267 -Node: Buffer compression43228 -Node: Buffer decompression44754 -Node: File compression46174 -Node: File decompression47157 -Node: File compression mm48161 -Node: Skipping data errors51193 -Node: Problems52505 -Node: Concept index53077 +Node: Buffering8957 +Node: Parameter limits10182 +Node: Compression functions11136 +Ref: member_size12946 +Ref: sync_flush14712 +Node: Decompression functions19400 +Node: Error codes26968 +Node: Error messages29259 +Node: Invoking minilzip29838 +Node: Data format39786 +Ref: coded-dict-size41232 +Node: Examples42641 +Node: Buffer compression43602 +Node: Buffer decompression45122 +Node: File compression46536 +Node: File decompression47519 +Node: File compression mm48523 +Node: Skipping data errors51552 +Node: Problems52862 +Node: Concept index53423  End Tag Table diff --git a/doc/lzlib.texi b/doc/lzlib.texi index 644a3d7..3caf9dd 100644 --- a/doc/lzlib.texi +++ b/doc/lzlib.texi @@ -6,10 +6,10 @@ @finalout @c %**end of header -@set UPDATED 2 January 2021 -@set VERSION 1.12 +@set UPDATED 23 January 2022 +@set VERSION 1.13 -@dircategory Data Compression +@dircategory Compression @direntry * Lzlib: (lzlib). Compression library for the lzip format @end direntry @@ -52,7 +52,7 @@ This manual is for Lzlib (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2009-2021 Antonio Diaz Diaz. +Copyright @copyright{} 2009-2022 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute, and modify it. @@ -77,9 +77,9 @@ taking into account both data integrity and decoder availability: The lzip format provides very safe integrity checking and some data recovery means. The program @uref{http://www.nongnu.org/lzip/manual/lziprecover_manual.html#Data-safety,,lziprecover} -can repair bit flip errors (one of the most common forms of data -corruption) in lzip files, and provides data recovery capabilities, -including error-checked merging of damaged copies of a file. +can repair bit flip errors (one of the most common forms of data corruption) +in lzip files, and provides data recovery capabilities, including +error-checked merging of damaged copies of a file. @ifnothtml @xref{Data safety,,,lziprecover}. @end ifnothtml @@ -89,8 +89,8 @@ The lzip format is as simple as possible (but not simpler). The lzip manual provides the source code of a simple decompressor along with a detailed explanation of how it works, so that with the only help of the lzip manual it would be possible for a digital archaeologist to extract -the data from a lzip file long after quantum computers eventually render -LZMA obsolete. +the data from a lzip file long after quantum computers eventually +render LZMA obsolete. @item Additionally the lzip reference implementation is copylefted, which @@ -104,8 +104,12 @@ the beginning is a thing of the past. The functions and variables forming the interface of the compression library are declared in the file @samp{lzlib.h}. Usage examples of the library are -given in the files @samp{bbexample.c}, @samp{ffexample.c}, and @samp{main.c} -from the source distribution. +given in the files @samp{bbexample.c}, @samp{ffexample.c}, and +@samp{minilzip.c} from the source distribution. + +All the library functions are thread safe. The library does not install any +signal handler. The decoder checks the consistency of the compressed data, +so the library should never crash even in case of corrupted input. Compression/decompression is done by repeatedly calling a couple of read/write functions until all the data have been processed by the library. @@ -134,22 +138,17 @@ Lzlib is able to compress and decompress streams of unlimited size by automatically creating multimember output. The members so created are large, about @w{2 PiB} each. -All the library functions are thread safe. The library does not install -any signal handler. The decoder checks the consistency of the compressed -data, so the library should never crash even in case of corrupted input. - In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a concrete algorithm; it is more like "any algorithm using the LZMA coding -scheme". For example, the option @samp{-0} of lzip uses the scheme in almost -the simplest way possible; issuing the longest match it can find, or a -literal byte if it can't find a match. Inversely, a much more elaborated way -of finding coding sequences of minimum size than the one currently used by -lzip could be developed, and the resulting sequence could also be coded -using the LZMA coding scheme. +scheme". For example, the option @samp{-0} of lzip uses the scheme in almost the +simplest way possible; issuing the longest match it can find, or a literal +byte if it can't find a match. Inversely, a much more elaborated way of +finding coding sequences of minimum size than the one currently used by lzip +could be developed, and the resulting sequence could also be coded using the +LZMA coding scheme. -Lzlib currently implements two variants of the LZMA algorithm; fast (used by -option @samp{-0} of minilzip) and normal (used by all other compression -levels). +Lzlib currently implements two variants of the LZMA algorithm: fast (used by +option @samp{-0} of minilzip) and normal (used by all other compression levels). The high compression of LZMA comes from combining two basic, well-proven compression ideas: sliding dictionaries (LZ77/78) and markov models (the @@ -176,7 +175,8 @@ One goal of lzlib is to keep perfect backward compatibility with older versions of itself down to 1.0. Any application working with an older lzlib should work with a newer lzlib. Installing a newer lzlib should not break anything. This chapter describes the constants and functions that the -application can use to discover the version of the library being used. +application can use to discover the version of the library being used. All +of them are declared in @samp{lzlib.h}. @defvr Constant LZ_API_VERSION This constant is defined in @samp{lzlib.h} and works as a version test @@ -372,12 +372,13 @@ already written with the function @samp{LZ_compress_write}. First call @samp{LZ_compress_sync_flush}. Then call @samp{LZ_compress_read} until it returns 0. -This function writes a LZMA marker @samp{3} ("Sync Flush" marker) to the -compressed output. Note that the sync flush marker is not allowed in lzip -files; it is a device for interactive communication between applications -using lzlib, but is useless and wasteful in a file, and is excluded from the -media type @samp{application/lzip}. The LZMA marker @samp{2} ("End Of -Stream" marker) is the only marker allowed in lzip files. @xref{Data format}. +This function writes at least one LZMA marker @samp{3} ("Sync Flush" marker) +to the compressed output. Note that the sync flush marker is not allowed in +lzip files; it is a device for interactive communication between +applications using lzlib, but is useless and wasteful in a file, and is +excluded from the media type @samp{application/lzip}. The LZMA marker +@samp{2} ("End Of Stream" marker) is the only marker allowed in lzip files. +@xref{Data format}. Repeated use of @samp{LZ_compress_sync_flush} may degrade compression ratio, so use it only when needed. If the interval between calls to @@ -394,36 +395,33 @@ are more bytes available than those needed to complete @var{member_size}, @deftypefun int LZ_compress_read ( struct LZ_Encoder * const @var{encoder}, uint8_t * const @var{buffer}, const int @var{size} ) -The function @samp{LZ_compress_read} reads up to @var{size} bytes from the -stream pointed to by @var{encoder}, storing the results in @var{buffer}. -If @w{LZ_API_VERSION >= 1012}, @var{buffer} may be a null pointer, in which -case the bytes read are discarded. - -The return value is the number of bytes actually read. This might be less -than @var{size}; for example, if there aren't that many bytes left in the -stream or if more bytes have to be yet written with the function +Reads up to @var{size} bytes from the stream pointed to by @var{encoder}, +storing the results in @var{buffer}. If @w{LZ_API_VERSION >= 1012}, +@var{buffer} may be a null pointer, in which case the bytes read are +discarded. + +Returns the number of bytes actually read. This might be less than +@var{size}; for example, if there aren't that many bytes left in the stream +or if more bytes have to be yet written with the function @samp{LZ_compress_write}. Note that reading less than @var{size} bytes is not an error. @end deftypefun @deftypefun int LZ_compress_write ( struct LZ_Encoder * const @var{encoder}, uint8_t * const @var{buffer}, const int @var{size} ) -The function @samp{LZ_compress_write} writes up to @var{size} bytes from -@var{buffer} to the stream pointed to by @var{encoder}. - -The return value is the number of bytes actually written. This might be -less than @var{size}. Note that writing less than @var{size} bytes is -not an error. +Writes up to @var{size} bytes from @var{buffer} to the stream pointed to by +@var{encoder}. Returns the number of bytes actually written. This might be +less than @var{size}. Note that writing less than @var{size} bytes is not an +error. @end deftypefun @deftypefun int LZ_compress_write_size ( struct LZ_Encoder * const @var{encoder} ) -The function @samp{LZ_compress_write_size} returns the maximum number of -bytes that can be immediately written through @samp{LZ_compress_write}. -For efficiency reasons, once the input buffer is full and -@samp{LZ_compress_write_size} returns 0, almost all the buffer must be -compressed before a size greater than 0 is returned again. (This is done to -minimize the amount of data that must be copied to the beginning of the +Returns the maximum number of bytes that can be immediately written through +@samp{LZ_compress_write}. For efficiency reasons, once the input buffer is +full and @samp{LZ_compress_write_size} returns 0, almost all the buffer must +be compressed before a size greater than 0 is returned again. (This is done +to minimize the amount of data that must be copied to the beginning of the buffer before new data can be accepted). It is guaranteed that an immediate call to @samp{LZ_compress_write} will @@ -478,10 +476,10 @@ perhaps not yet read. @chapter Decompression functions @cindex decompression functions -These are the functions used to decompress data. In case of error, all -of them return -1 or 0, for signed and unsigned return values -respectively, except @samp{LZ_decompress_open} whose return value must -be verified by calling @samp{LZ_decompress_errno} before using it. +These are the functions used to decompress data. In case of error, all of +them return -1 or 0, for signed and unsigned return values respectively, +except @samp{LZ_decompress_open} whose return value must be verified by +calling @samp{LZ_decompress_errno} before using it. @deftypefun {struct LZ_Decoder *} LZ_decompress_open ( void ) @@ -539,14 +537,14 @@ function does nothing. @deftypefun int LZ_decompress_read ( struct LZ_Decoder * const @var{decoder}, uint8_t * const @var{buffer}, const int @var{size} ) -The function @samp{LZ_decompress_read} reads up to @var{size} bytes from the -stream pointed to by @var{decoder}, storing the results in @var{buffer}. -If @w{LZ_API_VERSION >= 1012}, @var{buffer} may be a null pointer, in which -case the bytes read are discarded. - -The return value is the number of bytes actually read. This might be less -than @var{size}; for example, if there aren't that many bytes left in the -stream or if more bytes have to be yet written with the function +Reads up to @var{size} bytes from the stream pointed to by @var{decoder}, +storing the results in @var{buffer}. If @w{LZ_API_VERSION >= 1012}, +@var{buffer} may be a null pointer, in which case the bytes read are +discarded. + +Returns the number of bytes actually read. This might be less than +@var{size}; for example, if there aren't that many bytes left in the stream +or if more bytes have to be yet written with the function @samp{LZ_decompress_write}. Note that reading less than @var{size} bytes is not an error. @@ -571,20 +569,18 @@ recover as much data as possible from each damaged member. @deftypefun int LZ_decompress_write ( struct LZ_Decoder * const @var{decoder}, uint8_t * const @var{buffer}, const int @var{size} ) -The function @samp{LZ_decompress_write} writes up to @var{size} bytes from -@var{buffer} to the stream pointed to by @var{decoder}. - -The return value is the number of bytes actually written. This might be -less than @var{size}. Note that writing less than @var{size} bytes is -not an error. +Writes up to @var{size} bytes from @var{buffer} to the stream pointed to by +@var{decoder}. Returns the number of bytes actually written. This might be +less than @var{size}. Note that writing less than @var{size} bytes is not an +error. @end deftypefun @deftypefun int LZ_decompress_write_size ( struct LZ_Decoder * const @var{decoder} ) -The function @samp{LZ_decompress_write_size} returns the maximum number of -bytes that can be immediately written through @samp{LZ_decompress_write}. -This number varies smoothly; each compressed byte consumed may be -overwritten immediately, increasing by 1 the value returned. +Returns the maximum number of bytes that can be immediately written through +@samp{LZ_decompress_write}. This number varies smoothly; each compressed +byte consumed may be overwritten immediately, increasing by 1 the value +returned. It is guaranteed that an immediate call to @samp{LZ_decompress_write} will accept a @var{size} up to the returned number of bytes. @@ -607,26 +603,25 @@ does not imply @samp{LZ_decompress_member_finished}. @deftypefun int LZ_decompress_member_finished ( struct LZ_Decoder * const @var{decoder} ) Returns 1 if the previous call to @samp{LZ_decompress_read} finished reading -the current member, indicating that final values for member are available +the current member, indicating that final values for the member are available through @samp{LZ_decompress_data_crc}, @samp{LZ_decompress_data_position}, and @samp{LZ_decompress_member_position}. Otherwise it returns 0. @end deftypefun @deftypefun int LZ_decompress_member_version ( struct LZ_Decoder * const @var{decoder} ) -Returns the version of current member from member header. +Returns the version of the current member, read from the member header. @end deftypefun @deftypefun int LZ_decompress_dictionary_size ( struct LZ_Decoder * const @var{decoder} ) -Returns the dictionary size of the current member, read from the member -header. +Returns the dictionary size of the current member, read from the member header. @end deftypefun @deftypefun {unsigned} LZ_decompress_data_crc ( struct LZ_Decoder * const @var{decoder} ) Returns the 32 bit Cyclic Redundancy Check of the data decompressed from -the current member. The returned value is valid only when +the current member. The value returned is valid only when @samp{LZ_decompress_member_finished} returns 1. @end deftypefun @@ -672,8 +667,7 @@ examine @samp{LZ_(de)compress_errno}. The error codes are defined in the header file @samp{lzlib.h}. @deftypevr Constant {enum LZ_Errno} LZ_ok -The value of this constant is 0 and is used to indicate that there is no -error. +The value of this constant is 0 and is used to indicate that there is no error. @end deftypevr @deftypevr Constant {enum LZ_Errno} LZ_bad_argument @@ -737,16 +731,17 @@ The value of @var{lz_errno} normally comes from a call to Minilzip is a test program for the compression library lzlib, fully compatible with lzip 1.4 or newer. -@uref{http://www.nongnu.org/lzip/lzip.html,,Lzip} is a lossless data -compressor with a user interface similar to the one of gzip or bzip2. Lzip -uses a simplified form of the 'Lempel-Ziv-Markov chain-Algorithm' (LZMA) -stream format, chosen to maximize safety and interoperability. Lzip can -compress about as fast as gzip @w{(lzip -0)} or compress most files more -than bzip2 @w{(lzip -9)}. Decompression speed is intermediate between gzip -and bzip2. Lzip is better than gzip and bzip2 from a data recovery -perspective. Lzip has been designed, written, and tested with great care to -replace gzip and bzip2 as the standard general-purpose compressed format for -unix-like systems. +@uref{http://www.nongnu.org/lzip/lzip.html,,Lzip} +is a lossless data compressor with a user interface similar to the one +of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov +chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity +checking to maximize interoperability and optimize safety. Lzip can compress +about as fast as gzip @w{(lzip -0)} or compress most files more than bzip2 +@w{(lzip -9)}. Decompression speed is intermediate between gzip and bzip2. +Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip +has been designed, written, and tested with great care to replace gzip and +bzip2 as the standard general-purpose compressed format for unix-like +systems. @noindent The format for running minilzip is: @@ -803,10 +798,12 @@ and @samp{-S}. @samp{-c} has no effect when testing or listing. @item -d @itemx --decompress -Decompress the files specified. If a file does not exist or can't be -opened, minilzip continues decompressing the rest of the files. If a file -fails to decompress, or is a terminal, minilzip exits immediately without -decompressing the rest of the files. +Decompress the files specified. If a file does not exist, can't be opened, +or the destination file already exists and @samp{--force} has not been +specified, minilzip continues decompressing the rest of the files and exits with +error status 1. If a file fails to decompress, or is a terminal, minilzip exits +immediately with error status 2 without decompressing the rest of the files. +A terminal is considered an uncompressed file, and therefore invalid. @item -f @itemx --force @@ -932,12 +929,13 @@ header" error and the cause is not indeed a corrupt header. @item --check-lib Compare the @uref{#Library-version,,version of lzlib} used to compile -minilzip with the version actually being used and exit. Report any -differences found. Exit with error status 1 if differences are found. A +minilzip with the version actually being used at run time and exit. Report +any differences found. Exit with error status 1 if differences are found. A mismatch may indicate that lzlib is not correctly installed or that a different version of lzlib has been installed after compiling the shared -version of minilzip. @w{@samp{minilzip -v --check-lib}} shows the version of -lzlib being used and the value of @samp{LZ_API_VERSION} (if defined). +version of minilzip. Exit with error status 2 if LZ_API_VERSION and +LZ_version_string don't match. @w{@samp{minilzip -v --check-lib}} shows the +version of lzlib being used and the value of LZ_API_VERSION (if defined). @ifnothtml @xref{Library version}. @end ifnothtml @@ -963,9 +961,9 @@ Table of SI and binary prefixes (unit multipliers): @sp 1 Exit status: 0 for a normal exit, 1 for environmental problems (file not -found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or -invalid input file, 3 for an internal consistency error (eg, bug) which -caused minilzip to panic. +found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid +input file, 3 for an internal consistency error (e.g., bug) which caused +minilzip to panic. @node Data format @@ -996,9 +994,11 @@ represents one byte; a box like this: represents a variable number of bytes. @sp 1 -A lzip data stream consists of a series of "members" (compressed data sets). -The members simply appear one after another in the data stream, with no -additional information before, between, or after them. +Lzip data consist of a series of independent "members" (compressed data +sets). The members simply appear one after another in the data stream, with +no additional information before, between, or after them. Each member can +encode in compressed form up to @w{16 EiB - 1 byte} of uncompressed data. +The size of a multimember data stream is unlimited. Each member has the following structure: @@ -1029,7 +1029,7 @@ Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@* Valid values for dictionary size range from 4 KiB to 512 MiB. @item LZMA stream -The LZMA stream, finished by an end of stream marker. Uses default values +The LZMA stream, finished by an "End Of Stream" marker. Uses default values for encoder properties. @ifnothtml @xref{Stream format,,,lzip}, @@ -1043,15 +1043,17 @@ Lzip only uses the LZMA marker @samp{2} ("End Of Stream" marker). Lzlib also uses the LZMA marker @samp{3} ("Sync Flush" marker). @xref{sync_flush}. @item CRC32 (4 bytes) -Cyclic Redundancy Check (CRC) of the uncompressed original data. +Cyclic Redundancy Check (CRC) of the original uncompressed data. @item Data size (8 bytes) -Size of the uncompressed original data. +Size of the original uncompressed data. @item Member size (8 bytes) Total size of the member, including header and trailer. This field acts as a distributed index, allows the verification of stream integrity, and -facilitates safe recovery of undamaged members from multimember files. +facilitates the safe recovery of undamaged members from multimember files. +Member size should be limited to @w{2 PiB} to prevent the data size field +from overflowing. @end table @@ -1086,10 +1088,10 @@ Buffer-to-buffer single-member compression @w{(@var{member_size} > total output)}. @verbatim -/* Compresses 'insize' bytes from 'inbuf' to 'outbuf'. - Returns the size of the compressed data in '*outlenp'. - In case of error, or if 'outsize' is too small, returns false and does - not modify '*outlenp'. +/* Compress 'insize' bytes from 'inbuf' to 'outbuf'. + Return the size of the compressed data in '*outlenp'. + In case of error, or if 'outsize' is too small, return false and do not + modify '*outlenp'. */ bool bbcompress( const uint8_t * const inbuf, const int insize, const int dictionary_size, const int match_len_limit, @@ -1131,10 +1133,10 @@ bool bbcompress( const uint8_t * const inbuf, const int insize, Buffer-to-buffer decompression. @verbatim -/* Decompresses 'insize' bytes from 'inbuf' to 'outbuf'. - Returns the size of the decompressed data in '*outlenp'. - In case of error, or if 'outsize' is too small, returns false and does - not modify '*outlenp'. +/* Decompress 'insize' bytes from 'inbuf' to 'outbuf'. + Return the size of the decompressed data in '*outlenp'. + In case of error, or if 'outsize' is too small, return false and do not + modify '*outlenp'. */ bool bbdecompress( const uint8_t * const inbuf, const int insize, uint8_t * const outbuf, const int outsize, @@ -1285,9 +1287,9 @@ Example 2: Multimember compression (user-restarted members). (Call LZ_compress_open with @var{member_size} > largest member). @verbatim -/* Compresses 'infile' to 'outfile' as a multimember stream with one member +/* Compress 'infile' to 'outfile' as a multimember stream with one member for each line of text terminated by a newline character or by EOF. - Returns 0 if success, 1 if error. + Return 0 if success, 1 if error. */ int fflfcompress( struct LZ_Encoder * const encoder, FILE * const infile, FILE * const outfile ) @@ -1332,7 +1334,7 @@ int fflfcompress( struct LZ_Encoder * const encoder, @cindex skipping data errors @verbatim -/* Decompresses 'infile' to 'outfile' with automatic resynchronization to +/* Decompress 'infile' to 'outfile' with automatic resynchronization to next member in case of data error, including the automatic removal of leading garbage. */ @@ -1381,8 +1383,8 @@ for all eternity, if not longer. If you find a bug in lzlib, please send electronic mail to @email{lzip-bug@@nongnu.org}. Include the version number, which you can -find by running @w{@samp{minilzip --version}} or in -@samp{LZ_version_string} from @samp{lzlib.h}. +find by running @w{@samp{minilzip --version}} and +@w{@samp{minilzip -v --check-lib}}. @node Concept index diff --git a/doc/minilzip.1 b/doc/minilzip.1 index 13a2d6d..0c4c06d 100644 --- a/doc/minilzip.1 +++ b/doc/minilzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH MINILZIP "1" "January 2021" "minilzip 1.12" "User Commands" +.TH MINILZIP "1" "January 2022" "minilzip 1.13" "User Commands" .SH NAME minilzip \- reduces the size of files .SH SYNOPSIS @@ -11,13 +11,14 @@ compatible with lzip 1.4 or newer. .PP Lzip is a lossless data compressor with a user interface similar to the one of gzip or bzip2. Lzip uses a simplified form of the 'Lempel\-Ziv\-Markov -chain\-Algorithm' (LZMA) stream format, chosen to maximize safety and -interoperability. Lzip can compress about as fast as gzip (lzip \fB\-0\fR) or -compress most files more than bzip2 (lzip \fB\-9\fR). Decompression speed is -intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 from -a data recovery perspective. Lzip has been designed, written, and tested -with great care to replace gzip and bzip2 as the standard general\-purpose -compressed format for unix\-like systems. +chain\-Algorithm' (LZMA) stream format and provides a 3 factor integrity +checking to maximize interoperability and optimize safety. Lzip can compress +about as fast as gzip (lzip \fB\-0\fR) or compress most files more than bzip2 +(lzip \fB\-9\fR). Decompression speed is intermediate between gzip and bzip2. +Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip +has been designed, written, and tested with great care to replace gzip and +bzip2 as the standard general\-purpose compressed format for unix\-like +systems. .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR @@ -100,7 +101,7 @@ To extract all the files from archive 'foo.tar.lz', use the commands .PP Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or -invalid input file, 3 for an internal consistency error (eg, bug) which +invalid input file, 3 for an internal consistency error (e.g., bug) which caused minilzip to panic. .PP The ideas embodied in lzlib are due to (at least) the following people: @@ -113,9 +114,21 @@ Report bugs to lzip\-bug@nongnu.org .br Lzlib home page: http://www.nongnu.org/lzip/lzlib.html .SH COPYRIGHT -Copyright \(co 2021 Antonio Diaz Diaz. -Using lzlib 1.12 +Copyright \(co 2022 Antonio Diaz Diaz. +Using lzlib 1.13 License GPLv2+: GNU GPL version 2 or later .br This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. +.SH "SEE ALSO" +The full documentation for +.B minilzip +is maintained as a Texinfo manual. If the +.B info +and +.B minilzip +programs are properly installed at your site, the command +.IP +.B info lzlib +.PP +should give you access to the complete manual. diff --git a/encoder.c b/encoder.c index 600a444..b76dafa 100644 --- a/encoder.c +++ b/encoder.c @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -21,18 +21,7 @@ static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs { int32_t * ptr0 = e->eb.mb.pos_array + ( e->eb.mb.cyclic_pos << 1 ); int32_t * ptr1 = ptr0 + 1; - int32_t * newptr; - int len = 0, len0 = 0, len1 = 0; - int maxlen = 3; /* only used if pairs != 0 */ - int num_pairs = 0; - const int pos1 = e->eb.mb.pos + 1; - const int min_pos = ( e->eb.mb.pos > e->eb.mb.dictionary_size ) ? - e->eb.mb.pos - e->eb.mb.dictionary_size : 0; - const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb ); - int count, key2, key3, key4, newpos1; - unsigned tmp; int len_limit = e->match_len_limit; - if( len_limit > Mb_available_bytes( &e->eb.mb ) ) { e->been_flushed = true; @@ -40,12 +29,18 @@ static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs if( len_limit < 4 ) { *ptr0 = *ptr1 = 0; return 0; } } - tmp = crc32[data[0]] ^ data[1]; - key2 = tmp & ( num_prev_positions2 - 1 ); + int maxlen = 3; /* only used if pairs != 0 */ + int num_pairs = 0; + const int min_pos = ( e->eb.mb.pos > e->eb.mb.dictionary_size ) ? + e->eb.mb.pos - e->eb.mb.dictionary_size : 0; + const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb ); + + unsigned tmp = crc32[data[0]] ^ data[1]; + const int key2 = tmp & ( num_prev_positions2 - 1 ); tmp ^= (unsigned)data[2] << 8; - key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) ); - key4 = num_prev_positions2 + num_prev_positions3 + - ( ( tmp ^ ( crc32[data[3]] << 5 ) ) & e->eb.mb.key4_mask ); + const int key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) ); + const int key4 = num_prev_positions2 + num_prev_positions3 + + ( ( tmp ^ ( crc32[data[3]] << 5 ) ) & e->eb.mb.key4_mask ); if( pairs ) { @@ -54,7 +49,7 @@ static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs if( np2 > min_pos && e->eb.mb.buffer[np2-1] == data[0] ) { pairs[0].dis = e->eb.mb.pos - np2; - pairs[0].len = maxlen = 2; + pairs[0].len = maxlen = 2 + ( np2 == np3 ); num_pairs = 1; } if( np2 != np3 && np3 > min_pos && e->eb.mb.buffer[np3-1] == data[0] ) @@ -73,19 +68,22 @@ static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs } } + const int pos1 = e->eb.mb.pos + 1; e->eb.mb.prev_positions[key2] = pos1; e->eb.mb.prev_positions[key3] = pos1; - newpos1 = e->eb.mb.prev_positions[key4]; + int newpos1 = e->eb.mb.prev_positions[key4]; e->eb.mb.prev_positions[key4] = pos1; + int len = 0, len0 = 0, len1 = 0; + + int count; for( count = e->cycles; ; ) { - int delta; if( newpos1 <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; } if( e->been_flushed ) len = 0; - delta = pos1 - newpos1; - newptr = e->eb.mb.pos_array + + const int delta = pos1 - newpos1; + int32_t * const newptr = e->eb.mb.pos_array + ( ( e->eb.mb.cyclic_pos - delta + ( (e->eb.mb.cyclic_pos >= delta) ? 0 : e->eb.mb.dictionary_size + 1 ) ) << 1 ); if( data[len-delta] == data[len] ) @@ -140,7 +138,6 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e ) for( len_state = 0; len_state < len_states; ++len_state ) { int * const dsp = e->dis_slot_prices[len_state]; - int * const dp = e->dis_prices[len_state]; const Bit_model * const bmds = e->eb.bm_dis_slot[len_state]; int slot = 0; for( ; slot < end_dis_model; ++slot ) @@ -149,6 +146,7 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e ) dsp[slot] = price_symbol6( bmds, slot ) + (((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift_bits ); + int * const dp = e->dis_prices[len_state]; for( dis = 0; dis < start_dis_model; ++dis ) dp[dis] = dsp[dis]; for( ; dis < modeled_distances; ++dis ) @@ -157,7 +155,7 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e ) } -/* Returns the number of bytes advanced (ahead). +/* Return the number of bytes advanced (ahead). trials[0]..trials[ahead-1] contain the steps to encode. ( trials[0].dis4 == -1 ) means literal. A match/rep longer or equal than match_len_limit finishes the sequence. @@ -166,9 +164,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, const int reps[num_rep_distances], const State state ) { - int main_len, num_pairs, i, rep, num_trials, len; - int rep_index = 0, cur = 0; - int replens[num_rep_distances]; + int num_pairs, num_trials; + int i, rep, len; if( e->pending_num_pairs > 0 ) /* from previous call */ { @@ -177,8 +174,10 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, } else num_pairs = LZe_read_match_distances( e ); - main_len = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0; + const int main_len = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0; + int replens[num_rep_distances]; + int rep_index = 0; for( i = 0; i < num_rep_distances; ++i ) { replens[i] = Mb_true_match_len( &e->eb.mb, 0, reps[i] + 1 ); @@ -200,7 +199,6 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, return main_len; } - { const int pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask; const int match_price = price1( e->eb.bm_match[state][pos_state] ); const int rep_match_price = match_price + price1( e->eb.bm_rep[state] ); @@ -238,9 +236,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, for( rep = 0; rep < num_rep_distances; ++rep ) { - int price; if( replens[rep] < min_match_len ) continue; - price = rep_match_price + LZeb_price_rep( &e->eb, rep, state, pos_state ); + const int price = rep_match_price + LZeb_price_rep( &e->eb, rep, state, pos_state ); for( len = min_match_len; len <= replens[rep]; ++len ) Tr_update( &e->trials[len], price + Lp_price( &e->rep_len_prices, len, pos_state ), rep, 0 ); @@ -260,17 +257,10 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, if( ++len > e->pairs[i].len && ++i >= num_pairs ) break; } } - } + int cur = 0; while( true ) /* price optimization loop */ { - struct Trial *cur_trial, *next_trial; - int newlen, pos_state, triable_bytes, len_limit; - int start_len = min_match_len; - int next_price, match_price, rep_match_price; - State cur_state; - uint8_t prev_byte, cur_byte, match_byte; - if( !Mb_move_pos( &e->eb.mb ) ) return 0; if( ++cur >= num_trials ) /* no more initialized trials */ { @@ -278,8 +268,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, return cur; } - num_pairs = LZe_read_match_distances( e ); - newlen = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0; + const int num_pairs = LZe_read_match_distances( e ); + const int newlen = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0; if( newlen >= e->match_len_limit ) { e->pending_num_pairs = num_pairs; @@ -288,7 +278,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, } /* give final values to current trial */ - cur_trial = &e->trials[cur]; + struct Trial * cur_trial = &e->trials[cur]; + State cur_state; { const int dis4 = cur_trial->dis4; int prev_index = cur_trial->prev_index; @@ -319,25 +310,25 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, mtf_reps( dis4, cur_trial->reps ); /* literal is ignored */ } - pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask; - prev_byte = Mb_peek( &e->eb.mb, 1 ); - cur_byte = Mb_peek( &e->eb.mb, 0 ); - match_byte = Mb_peek( &e->eb.mb, cur_trial->reps[0] + 1 ); + const int pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask; + const uint8_t prev_byte = Mb_peek( &e->eb.mb, 1 ); + const uint8_t cur_byte = Mb_peek( &e->eb.mb, 0 ); + const uint8_t match_byte = Mb_peek( &e->eb.mb, cur_trial->reps[0] + 1 ); - next_price = cur_trial->price + - price0( e->eb.bm_match[cur_state][pos_state] ); + int next_price = cur_trial->price + + price0( e->eb.bm_match[cur_state][pos_state] ); if( St_is_char( cur_state ) ) next_price += LZeb_price_literal( &e->eb, prev_byte, cur_byte ); else next_price += LZeb_price_matched( &e->eb, prev_byte, cur_byte, match_byte ); /* try last updates to next trial */ - next_trial = &e->trials[cur+1]; + struct Trial * next_trial = &e->trials[cur+1]; Tr_update( next_trial, next_price, -1, cur ); /* literal */ - match_price = cur_trial->price + price1( e->eb.bm_match[cur_state][pos_state] ); - rep_match_price = match_price + price1( e->eb.bm_rep[cur_state] ); + const int match_price = cur_trial->price + price1( e->eb.bm_match[cur_state][pos_state] ); + const int rep_match_price = match_price + price1( e->eb.bm_rep[cur_state] ); if( match_byte == cur_byte && next_trial->dis4 != 0 && next_trial->prev_index2 == single_step_trial ) @@ -352,11 +343,11 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, } } - triable_bytes = + const int triable_bytes = min( Mb_available_bytes( &e->eb.mb ), max_num_trials - 1 - cur ); if( triable_bytes < min_match_len ) continue; - len_limit = min( e->match_len_limit, triable_bytes ); + const int len_limit = min( e->match_len_limit, triable_bytes ); /* try literal + rep0 */ if( match_byte != cur_byte && next_trial->prev_index != cur ) @@ -380,19 +371,20 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, } } + int start_len = min_match_len; + /* try rep distances */ for( rep = 0; rep < num_rep_distances; ++rep ) { const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb ); const int dis = cur_trial->reps[rep] + 1; - int price; if( data[0-dis] != data[0] || data[1-dis] != data[1] ) continue; for( len = min_match_len; len < len_limit; ++len ) if( data[len-dis] != data[len] ) break; while( num_trials < cur + len ) e->trials[++num_trials].price = infinite_price; - price = rep_match_price + LZeb_price_rep( &e->eb, rep, cur_state, pos_state ); + int price = rep_match_price + LZeb_price_rep( &e->eb, rep, cur_state, pos_state ); for( i = min_match_len; i <= len; ++i ) Tr_update( &e->trials[cur+i], price + Lp_price( &e->rep_len_prices, i, pos_state ), rep, cur ); @@ -400,17 +392,14 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, if( rep == 0 ) start_len = len + 1; /* discard shorter matches */ /* try rep + literal + rep0 */ - { int len2 = len + 1; const int limit = min( e->match_len_limit + len2, triable_bytes ); - int pos_state2; - State state2; while( len2 < limit && data[len2-dis] == data[len2] ) ++len2; len2 -= len + 1; if( len2 < min_match_len ) continue; - pos_state2 = ( pos_state + len ) & pos_state_mask; - state2 = St_set_rep( cur_state ); + int pos_state2 = ( pos_state + len ) & pos_state_mask; + State state2 = St_set_rep( cur_state ); price += Lp_price( &e->rep_len_prices, len, pos_state ) + price0( e->eb.bm_match[state2][pos_state2] ) + LZeb_price_matched( &e->eb, data[len-1], data[len], data[len-dis] ); @@ -423,21 +412,19 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, e->trials[++num_trials].price = infinite_price; Tr_update3( &e->trials[cur+len+1+len2], price, rep, cur + len + 1, cur ); } - } /* try matches */ if( newlen >= start_len && newlen <= len_limit ) { - int dis; const int normal_match_price = match_price + price0( e->eb.bm_rep[cur_state] ); while( num_trials < cur + newlen ) e->trials[++num_trials].price = infinite_price; - i = 0; + int i = 0; while( e->pairs[i].len < start_len ) ++i; - dis = e->pairs[i].dis; + int dis = e->pairs[i].dis; for( len = start_len; ; ++len ) { int price = normal_match_price + LZe_price_pair( e, dis, len, pos_state ); @@ -484,7 +471,7 @@ static bool LZe_encode_member( struct LZ_encoder * const e ) const int dis_price_count = best ? 1 : 512; const int align_price_count = best ? 1 : dis_align_size; const int price_count = ( e->match_len_limit > 36 ) ? 1013 : 4093; - int ahead, i; + int i; State * const state = &e->eb.state; if( e->eb.member_finished ) return true; @@ -494,11 +481,10 @@ static bool LZe_encode_member( struct LZ_encoder * const e ) if( Mb_data_position( &e->eb.mb ) == 0 && !Mb_data_finished( &e->eb.mb ) ) /* encode first byte */ { - const uint8_t prev_byte = 0; - uint8_t cur_byte; if( !Mb_enough_available_bytes( &e->eb.mb ) || !Re_enough_free_bytes( &e->eb.renc ) ) return true; - cur_byte = Mb_peek( &e->eb.mb, 0 ); + const uint8_t prev_byte = 0; + const uint8_t cur_byte = Mb_peek( &e->eb.mb, 0 ); Re_encode_bit( &e->eb.renc, &e->eb.bm_match[*state][0], 0 ); LZeb_encode_literal( &e->eb, prev_byte, cur_byte ); CRC32_update_byte( &e->eb.crc, cur_byte ); @@ -525,7 +511,7 @@ static bool LZe_encode_member( struct LZ_encoder * const e ) Lp_update_prices( &e->rep_len_prices ); } - ahead = LZe_sequence_optimizer( e, e->eb.reps, *state ); + int ahead = LZe_sequence_optimizer( e, e->eb.reps, *state ); e->price_counter -= ahead; for( i = 0; ahead > 0; ) @@ -542,14 +528,13 @@ static bool LZe_encode_member( struct LZ_encoder * const e ) const uint8_t prev_byte = Mb_peek( &e->eb.mb, ahead + 1 ); const uint8_t cur_byte = Mb_peek( &e->eb.mb, ahead ); CRC32_update_byte( &e->eb.crc, cur_byte ); - if( St_is_char( *state ) ) + if( ( *state = St_set_char( *state ) ) < 4 ) LZeb_encode_literal( &e->eb, prev_byte, cur_byte ); else { const uint8_t match_byte = Mb_peek( &e->eb.mb, ahead + e->eb.reps[0] + 1 ); LZeb_encode_matched( &e->eb, prev_byte, cur_byte, match_byte ); } - *state = St_set_char( *state ); } else /* match or repeated match */ { diff --git a/encoder.h b/encoder.h index 7a7ecaf..f17bb99 100644 --- a/encoder.h +++ b/encoder.h @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -194,10 +194,9 @@ static inline int LZeb_price_rep( const struct LZ_encoder_base * const eb, const int rep, const State state, const int pos_state ) { - int price; if( rep == 0 ) return price0( eb->bm_rep0[state] ) + price1( eb->bm_len[state][pos_state] ); - price = price1( eb->bm_rep0[state] ); + int price = price1( eb->bm_rep0[state] ); if( rep == 1 ) price += price0( eb->bm_rep1[state] ); else diff --git a/encoder_base.c b/encoder_base.c index c1ef9ef..4535352 100644 --- a/encoder_base.c +++ b/encoder_base.c @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -47,7 +47,6 @@ static bool Mb_init( struct Matchfinder_base * const mb, const int before_size, { const int buffer_size_limit = ( dict_factor * dict_size ) + before_size + after_size; - unsigned size; int i; mb->partial_data_pos = 0; @@ -66,9 +65,8 @@ static bool Mb_init( struct Matchfinder_base * const mb, const int before_size, mb->saved_dictionary_size = dict_size; mb->dictionary_size = dict_size; mb->pos_limit = mb->buffer_size - after_size; - size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 ); - if( mb->dictionary_size > 1 << 26 ) /* 64 MiB */ - size >>= 1; + unsigned size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 ); + if( mb->dictionary_size > 1 << 26 ) size >>= 1; /* 64 MiB */ mb->key4_mask = size - 1; /* increases with dictionary size */ size += num_prev_positions23; mb->num_prev_positions = size; @@ -88,8 +86,7 @@ static bool Mb_init( struct Matchfinder_base * const mb, const int before_size, static void Mb_adjust_array( struct Matchfinder_base * const mb ) { int size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 ); - if( mb->dictionary_size > 1 << 26 ) /* 64 MiB */ - size >>= 1; + if( mb->dictionary_size > 1 << 26 ) size >>= 1; /* 64 MiB */ mb->key4_mask = size - 1; size += mb->num_prev_positions23; mb->num_prev_positions = size; @@ -129,21 +126,21 @@ static void Mb_reset( struct Matchfinder_base * const mb ) /* End Of Stream marker => (dis == 0xFFFFFFFFU, len == min_match_len) */ static void LZeb_try_full_flush( struct LZ_encoder_base * const eb ) { - int i; - const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask; - const State state = eb->state; - Lzip_trailer trailer; if( eb->member_finished || Cb_free_bytes( &eb->renc.cb ) < max_marker_size + eb->renc.ff_count + Lt_size ) return; eb->member_finished = true; + const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask; + const State state = eb->state; Re_encode_bit( &eb->renc, &eb->bm_match[state][pos_state], 1 ); Re_encode_bit( &eb->renc, &eb->bm_rep[state], 0 ); LZeb_encode_pair( eb, 0xFFFFFFFFU, min_match_len, pos_state ); Re_flush( &eb->renc ); + Lzip_trailer trailer; Lt_set_data_crc( trailer, LZeb_crc( eb ) ); Lt_set_data_size( trailer, Mb_data_position( &eb->mb ) ); Lt_set_member_size( trailer, Re_member_position( &eb->renc ) + Lt_size ); + int i; for( i = 0; i < Lt_size; ++i ) Cb_put_byte( &eb->renc.cb, trailer[i] ); } @@ -152,13 +149,13 @@ static void LZeb_try_full_flush( struct LZ_encoder_base * const eb ) /* Sync Flush marker => (dis == 0xFFFFFFFFU, len == min_match_len + 1) */ static void LZeb_try_sync_flush( struct LZ_encoder_base * const eb ) { - const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask; - const State state = eb->state; const unsigned min_size = eb->renc.ff_count + max_marker_size; if( eb->member_finished || Cb_free_bytes( &eb->renc.cb ) < min_size + max_marker_size ) return; eb->mb.sync_flush_pending = false; const unsigned long long old_mpos = Re_member_position( &eb->renc ); + const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask; + const State state = eb->state; do { /* size of markers must be >= rd_min_available_bytes + 5 */ Re_encode_bit( &eb->renc, &eb->bm_match[state][pos_state], 1 ); Re_encode_bit( &eb->renc, &eb->bm_rep[state], 0 ); diff --git a/encoder_base.h b/encoder_base.h index e727a7d..17ffc93 100644 --- a/encoder_base.h +++ b/encoder_base.h @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -147,10 +147,9 @@ static inline int price_bit( const Bit_model bm, const bool bit ) static inline int price_symbol3( const Bit_model bm[], int symbol ) { - int price; bool bit = symbol & 1; symbol |= 8; symbol >>= 1; - price = price_bit( bm[symbol], bit ); + int price = price_bit( bm[symbol], bit ); bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit ); return price + price_bit( bm[1], symbol & 1 ); } @@ -158,10 +157,9 @@ static inline int price_symbol3( const Bit_model bm[], int symbol ) static inline int price_symbol6( const Bit_model bm[], unsigned symbol ) { - int price; bool bit = symbol & 1; symbol |= 64; symbol >>= 1; - price = price_bit( bm[symbol], bit ); + int price = price_bit( bm[symbol], bit ); bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit ); bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit ); bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit ); @@ -172,10 +170,9 @@ static inline int price_symbol6( const Bit_model bm[], unsigned symbol ) static inline int price_symbol8( const Bit_model bm[], int symbol ) { - int price; bool bit = symbol & 1; symbol |= 0x100; symbol >>= 1; - price = price_bit( bm[symbol], bit ); + int price = price_bit( bm[symbol], bit ); bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit ); bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit ); bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit ); @@ -427,10 +424,9 @@ static inline void Re_encode_bit( struct Range_encoder * const renc, static inline void Re_encode_tree3( struct Range_encoder * const renc, Bit_model bm[], const int symbol ) { - int model; bool bit = ( symbol >> 2 ) & 1; Re_encode_bit( renc, &bm[1], bit ); - model = 2 | bit; + int model = 2 | bit; bit = ( symbol >> 1 ) & 1; Re_encode_bit( renc, &bm[model], bit ); model <<= 1; model |= bit; Re_encode_bit( renc, &bm[model], symbol & 1 ); @@ -439,10 +435,9 @@ static inline void Re_encode_tree3( struct Range_encoder * const renc, static inline void Re_encode_tree6( struct Range_encoder * const renc, Bit_model bm[], const unsigned symbol ) { - int model; bool bit = ( symbol >> 5 ) & 1; Re_encode_bit( renc, &bm[1], bit ); - model = 2 | bit; + int model = 2 | bit; bit = ( symbol >> 4 ) & 1; Re_encode_bit( renc, &bm[model], bit ); model <<= 1; model |= bit; bit = ( symbol >> 3 ) & 1; @@ -583,8 +578,7 @@ static inline int LZeb_price_matched( const struct LZ_encoder_base * const eb, static inline void LZeb_encode_literal( struct LZ_encoder_base * const eb, const uint8_t prev_byte, const uint8_t symbol ) - { Re_encode_tree8( &eb->renc, eb->bm_literal[get_lit_state(prev_byte)], - symbol ); } + { Re_encode_tree8( &eb->renc, eb->bm_literal[get_lit_state(prev_byte)], symbol ); } static inline void LZeb_encode_matched( struct LZ_encoder_base * const eb, const uint8_t prev_byte, const uint8_t symbol, const uint8_t match_byte ) @@ -595,8 +589,8 @@ static inline void LZeb_encode_pair( struct LZ_encoder_base * const eb, const unsigned dis, const int len, const int pos_state ) { - const unsigned dis_slot = get_slot( dis ); Re_encode_len( &eb->renc, &eb->match_len_model, len, pos_state ); + const unsigned dis_slot = get_slot( dis ); Re_encode_tree6( &eb->renc, eb->bm_dis_slot[get_len_state(len)], dis_slot ); if( dis_slot >= start_dis_model ) diff --git a/fast_encoder.c b/fast_encoder.c index bdcbb97..618c3d6 100644 --- a/fast_encoder.c +++ b/fast_encoder.c @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -20,25 +20,24 @@ static int FLZe_longest_match_len( struct FLZ_encoder * const fe, int * const distance ) { enum { len_limit = 16 }; - const uint8_t * const data = Mb_ptr_to_current_pos( &fe->eb.mb ); int32_t * ptr0 = fe->eb.mb.pos_array + fe->eb.mb.cyclic_pos; - const int pos1 = fe->eb.mb.pos + 1; - int maxlen = 0, newpos1, count; const int available = min( Mb_available_bytes( &fe->eb.mb ), max_match_len ); if( available < len_limit ) { *ptr0 = 0; return 0; } + const uint8_t * const data = Mb_ptr_to_current_pos( &fe->eb.mb ); fe->key4 = ( ( fe->key4 << 4 ) ^ data[3] ) & fe->eb.mb.key4_mask; - newpos1 = fe->eb.mb.prev_positions[fe->key4]; + const int pos1 = fe->eb.mb.pos + 1; + int newpos1 = fe->eb.mb.prev_positions[fe->key4]; fe->eb.mb.prev_positions[fe->key4] = pos1; + int maxlen = 0, count; for( count = 4; ; ) { - int32_t * newptr; int delta; if( newpos1 <= 0 || --count < 0 || ( delta = pos1 - newpos1 ) > fe->eb.mb.dictionary_size ) { *ptr0 = 0; break; } - newptr = fe->eb.mb.pos_array + + int32_t * const newptr = fe->eb.mb.pos_array + ( fe->eb.mb.cyclic_pos - delta + ( ( fe->eb.mb.cyclic_pos >= delta ) ? 0 : fe->eb.mb.dictionary_size + 1 ) ); @@ -71,11 +70,10 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe ) if( Mb_data_position( &fe->eb.mb ) == 0 && !Mb_data_finished( &fe->eb.mb ) ) /* encode first byte */ { - const uint8_t prev_byte = 0; - uint8_t cur_byte; if( !Mb_enough_available_bytes( &fe->eb.mb ) || !Re_enough_free_bytes( &fe->eb.renc ) ) return true; - cur_byte = Mb_peek( &fe->eb.mb, 0 ); + const uint8_t prev_byte = 0; + const uint8_t cur_byte = Mb_peek( &fe->eb.mb, 0 ); Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[*state][0], 0 ); LZeb_encode_literal( &fe->eb, prev_byte, cur_byte ); CRC32_update_byte( &fe->eb.crc, cur_byte ); @@ -86,13 +84,12 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe ) while( !Mb_data_finished( &fe->eb.mb ) && Re_member_position( &fe->eb.renc ) < fe->eb.member_size_limit ) { - int match_distance = 0; /* avoid warning from gcc 6.1.0 */ - int main_len, pos_state; - int len = 0; if( !Mb_enough_available_bytes( &fe->eb.mb ) || !Re_enough_free_bytes( &fe->eb.renc ) ) return true; - main_len = FLZe_longest_match_len( fe, &match_distance ); - pos_state = Mb_data_position( &fe->eb.mb ) & pos_state_mask; + int match_distance = 0; /* avoid warning from gcc 6.1.0 */ + const int main_len = FLZe_longest_match_len( fe, &match_distance ); + const int pos_state = Mb_data_position( &fe->eb.mb ) & pos_state_mask; + int len = 0; for( i = 0; i < num_rep_distances; ++i ) { @@ -109,11 +106,10 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe ) Re_encode_bit( &fe->eb.renc, &fe->eb.bm_len[*state][pos_state], 1 ); else { - int distance; Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep1[*state], rep > 1 ); if( rep > 1 ) Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep2[*state], rep > 2 ); - distance = fe->eb.reps[rep]; + const int distance = fe->eb.reps[rep]; for( i = rep; i > 0; --i ) fe->eb.reps[i] = fe->eb.reps[i-1]; fe->eb.reps[0] = distance; } @@ -138,7 +134,6 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe ) continue; } - { const uint8_t prev_byte = Mb_peek( &fe->eb.mb, 1 ); const uint8_t cur_byte = Mb_peek( &fe->eb.mb, 0 ); const uint8_t match_byte = Mb_peek( &fe->eb.mb, fe->eb.reps[0] + 1 ); @@ -169,12 +164,10 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe ) /* literal byte */ Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[*state][pos_state], 0 ); - if( St_is_char( *state ) ) + if( ( *state = St_set_char( *state ) ) < 4 ) LZeb_encode_literal( &fe->eb, prev_byte, cur_byte ); else LZeb_encode_matched( &fe->eb, prev_byte, cur_byte, match_byte ); - *state = St_set_char( *state ); - } } LZeb_try_full_flush( &fe->eb ); diff --git a/fast_encoder.h b/fast_encoder.h index 1c3a6ff..54756bd 100644 --- a/fast_encoder.h +++ b/fast_encoder.h @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/ffexample.c b/ffexample.c index 2891f02..59345ee 100644 --- a/ffexample.c +++ b/ffexample.c @@ -1,5 +1,5 @@ /* File to file example - Test program for the library lzlib - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute, and modify it. @@ -20,7 +20,7 @@ #include #include #include -#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ #include #include #endif @@ -178,9 +178,9 @@ int ffmmcompress( FILE * const infile, FILE * const outfile ) } -/* Compresses 'infile' to 'outfile' as a multimember stream with one member +/* Compress 'infile' to 'outfile' as a multimember stream with one member for each line of text terminated by a newline character or by EOF. - Returns 0 if success, 1 if error. + Return 0 if success, 1 if error. */ int fflfcompress( struct LZ_Encoder * const encoder, FILE * const infile, FILE * const outfile ) @@ -219,7 +219,7 @@ int fflfcompress( struct LZ_Encoder * const encoder, } -/* Decompresses 'infile' to 'outfile' with automatic resynchronization to +/* Decompress 'infile' to 'outfile' with automatic resynchronization to next member in case of data error, including the automatic removal of leading garbage. */ @@ -257,7 +257,7 @@ int ffrsdecompress( struct LZ_Decoder * const decoder, int main( const int argc, const char * const argv[] ) { -#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); #endif diff --git a/lzcheck.c b/lzcheck.c index 7e00e6c..88dd4c9 100644 --- a/lzcheck.c +++ b/lzcheck.c @@ -1,5 +1,5 @@ /* Lzcheck - Test program for the library lzlib - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute, and modify it. @@ -133,10 +133,11 @@ static void xclose_decoder( struct LZ_Decoder * const decoder, } -/* Returns the next (usually newline-terminated) chunk of data from file. +/* Return the next (usually newline-terminated) chunk of data from file. The size returned in *sizep is always <= buffer_size. - If sizep is a null pointer, rewinds the file, resets state, and returns. - If file is at EOF, returns an empty line. */ + If sizep is a null pointer, rewind the file, reset state, and return. + If file is at EOF, return an empty line. +*/ static const uint8_t * next_line( FILE * const file, int * const sizep ) { static int l = 0; @@ -332,7 +333,7 @@ int main( const int argc, const char * const argv[] ) if( argc < 2 ) { - fputs( "Usage: lzcheck filename.txt...\n", stderr ); + fputs( "Usage: lzcheck [-m|-s] filename.txt...\n", stderr ); return 1; } diff --git a/lzip.h b/lzip.h index 3e3df2c..aad7e00 100644 --- a/lzip.h +++ b/lzip.h @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -168,6 +168,7 @@ static const uint32_t crc32[256] = static inline void CRC32_update_byte( uint32_t * const crc, const uint8_t byte ) { *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); } +/* about as fast as it is possible without messing with endianness */ static inline void CRC32_update_buf( uint32_t * const crc, const uint8_t * const buffer, const int size ) diff --git a/lzlib.c b/lzlib.c index b7969f2..3084fbd 100644 --- a/lzlib.c +++ b/lzlib.c @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/lzlib.h b/lzlib.h index b6374d8..ba29f97 100644 --- a/lzlib.h +++ b/lzlib.h @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -24,9 +24,9 @@ extern "C" { /* LZ_API_VERSION was first defined in lzlib 1.8 to 1. Since lzlib 1.12, LZ_API_VERSION is defined as (major * 1000 + minor). */ -#define LZ_API_VERSION 1012 +#define LZ_API_VERSION 1013 -static const char * const LZ_version_string = "1.12"; +static const char * const LZ_version_string = "1.13"; enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error, LZ_sequence_error, LZ_header_error, LZ_unexpected_eof, diff --git a/main.c b/main.c deleted file mode 100644 index c623d9b..0000000 --- a/main.c +++ /dev/null @@ -1,1222 +0,0 @@ -/* Minilzip - Test program for the library lzlib - Copyright (C) 2009-2021 Antonio Diaz Diaz. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ -/* - Exit status: 0 for a normal exit, 1 for environmental problems - (file not found, invalid flags, I/O errors, etc), 2 to indicate a - corrupt or invalid input file, 3 for an internal consistency error - (eg, bug) which caused minilzip to panic. -*/ - -#define _FILE_OFFSET_BITS 64 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) -#include -#if defined(__MSVCRT__) -#define fchmod(x,y) 0 -#define fchown(x,y,z) 0 -#define strtoull strtoul -#define SIGHUP SIGTERM -#define S_ISSOCK(x) 0 -#ifndef S_IRGRP -#define S_IRGRP 0 -#define S_IWGRP 0 -#define S_IROTH 0 -#define S_IWOTH 0 -#endif -#endif -#if defined(__DJGPP__) -#define S_ISSOCK(x) 0 -#define S_ISVTX 0 -#endif -#endif - -#include "carg_parser.h" -#include "lzlib.h" - -#ifndef O_BINARY -#define O_BINARY 0 -#endif - -#if CHAR_BIT != 8 -#error "Environments where CHAR_BIT != 8 are not supported." -#endif - -#ifndef max - #define max(x,y) ((x) >= (y) ? (x) : (y)) -#endif -#ifndef min - #define min(x,y) ((x) <= (y) ? (x) : (y)) -#endif - -static void cleanup_and_fail( const int retval ); -static void show_error( const char * const msg, const int errcode, - const bool help ); -static void show_file_error( const char * const filename, - const char * const msg, const int errcode ); -static void internal_error( const char * const msg ); -static const char * const mem_msg = "Not enough memory."; - -int verbosity = 0; - -static const char * const program_name = "minilzip"; -static const char * const program_year = "2021"; -static const char * invocation_name = "minilzip"; /* default value */ - -static const struct { const char * from; const char * to; } known_extensions[] = { - { ".lz", "" }, - { ".tlz", ".tar" }, - { 0, 0 } }; - -struct Lzma_options - { - int dictionary_size; /* 4 KiB .. 512 MiB */ - int match_len_limit; /* 5 .. 273 */ - }; - -enum Mode { m_compress, m_decompress, m_test }; - -/* Variables used in signal handler context. - They are not declared volatile because the handler never returns. */ -static char * output_filename = 0; -static int outfd = -1; -static bool delete_output_on_interrupt = false; - - -static void show_help( void ) - { - printf( "Minilzip is a test program for the compression library lzlib, fully\n" - "compatible with lzip 1.4 or newer.\n" - "\nLzip is a lossless data compressor with a user interface similar to the one\n" - "of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n" - "chain-Algorithm' (LZMA) stream format, chosen to maximize safety and\n" - "interoperability. Lzip can compress about as fast as gzip (lzip -0) or\n" - "compress most files more than bzip2 (lzip -9). Decompression speed is\n" - "intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 from\n" - "a data recovery perspective. Lzip has been designed, written, and tested\n" - "with great care to replace gzip and bzip2 as the standard general-purpose\n" - "compressed format for unix-like systems.\n" - "\nUsage: %s [options] [files]\n", invocation_name ); - printf( "\nOptions:\n" - " -h, --help display this help and exit\n" - " -V, --version output version information and exit\n" - " -a, --trailing-error exit with error status if trailing data\n" - " -b, --member-size= set member size limit in bytes\n" - " -c, --stdout write to standard output, keep input files\n" - " -d, --decompress decompress\n" - " -f, --force overwrite existing output files\n" - " -F, --recompress force re-compression of compressed files\n" - " -k, --keep keep (don't delete) input files\n" - " -m, --match-length= set match length limit in bytes [36]\n" - " -o, --output= write to , keep input files\n" - " -q, --quiet suppress all messages\n" - " -s, --dictionary-size= set dictionary size limit in bytes [8 MiB]\n" - " -S, --volume-size= set volume size limit in bytes\n" - " -t, --test test compressed file integrity\n" - " -v, --verbose be verbose (a 2nd -v gives more)\n" - " -0 .. -9 set compression level [default 6]\n" - " --fast alias for -0\n" - " --best alias for -9\n" - " --loose-trailing allow trailing data seeming corrupt header\n" - " --check-lib compare version of lzlib.h with liblz.{a,so}\n" - "\nIf no file names are given, or if a file is '-', minilzip compresses or\n" - "decompresses from standard input to standard output.\n" - "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" - "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" - "Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12\n" - "to 2^29 bytes.\n" - "\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n" - "scale optimal for all files. If your files are large, very repetitive,\n" - "etc, you may need to use the options --dictionary-size and --match-length\n" - "directly to achieve optimal performance.\n" - "\nTo extract all the files from archive 'foo.tar.lz', use the commands\n" - "'tar -xf foo.tar.lz' or 'minilzip -cd foo.tar.lz | tar -xf -'.\n" - "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" - "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" - "invalid input file, 3 for an internal consistency error (eg, bug) which\n" - "caused minilzip to panic.\n" - "\nThe ideas embodied in lzlib are due to (at least) the following people:\n" - "Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the\n" - "definition of Markov chains), G.N.N. Martin (for the definition of range\n" - "encoding), Igor Pavlov (for putting all the above together in LZMA), and\n" - "Julian Seward (for bzip2's CLI).\n" - "\nReport bugs to lzip-bug@nongnu.org\n" - "Lzlib home page: http://www.nongnu.org/lzip/lzlib.html\n" ); - } - - -static void show_version( void ) - { - printf( "%s %s\n", program_name, PROGVERSION ); - printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); - printf( "Using lzlib %s\n", LZ_version() ); - printf( "License GPLv2+: GNU GPL version 2 or later \n" - "This is free software: you are free to change and redistribute it.\n" - "There is NO WARRANTY, to the extent permitted by law.\n" ); - } - - -int check_lib() - { - bool warning = false; - if( strcmp( LZ_version_string, LZ_version() ) != 0 ) - { warning = true; - if( verbosity >= 0 ) - printf( "warning: LZ_version_string != LZ_version() (%s vs %s)\n", - LZ_version_string, LZ_version() ); } -#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012 - if( LZ_API_VERSION != LZ_api_version() ) - { warning = true; - if( verbosity >= 0 ) - printf( "warning: LZ_API_VERSION != LZ_api_version() (%u vs %u)\n", - LZ_API_VERSION, LZ_api_version() ); } -#endif - if( verbosity >= 1 ) - { - printf( "Using lzlib %s\n", LZ_version() ); -#if !defined LZ_API_VERSION - fputs( "LZ_API_VERSION is not defined.\n", stdout ); -#elif LZ_API_VERSION >= 1012 - printf( "Using LZ_API_VERSION = %u\n", LZ_api_version() ); -#else - printf( "Compiled with LZ_API_VERSION = %u. " - "Using an unknown LZ_API_VERSION\n", LZ_API_VERSION ); -#endif - } - return warning; - } - - -/* assure at least a minimum size for buffer 'buf' */ -static void * resize_buffer( void * buf, const unsigned min_size ) - { - if( buf ) buf = realloc( buf, min_size ); - else buf = malloc( min_size ); - if( !buf ) { show_error( mem_msg, 0, false ); cleanup_and_fail( 1 ); } - return buf; - } - - -struct Pretty_print - { - const char * name; - char * padded_name; - const char * stdin_name; - unsigned longest_name; - bool first_post; - }; - -static void Pp_init( struct Pretty_print * const pp, - const char * const filenames[], const int num_filenames ) - { - unsigned stdin_name_len; - int i; - pp->name = 0; - pp->padded_name = 0; - pp->stdin_name = "(stdin)"; - pp->longest_name = 0; - pp->first_post = false; - - if( verbosity <= 0 ) return; - stdin_name_len = strlen( pp->stdin_name ); - for( i = 0; i < num_filenames; ++i ) - { - const char * const s = filenames[i]; - const unsigned len = (strcmp( s, "-" ) == 0) ? stdin_name_len : strlen( s ); - if( pp->longest_name < len ) pp->longest_name = len; - } - if( pp->longest_name == 0 ) pp->longest_name = stdin_name_len; - } - -static void Pp_set_name( struct Pretty_print * const pp, - const char * const filename ) - { - unsigned name_len, padded_name_len, i = 0; - - if( filename && filename[0] && strcmp( filename, "-" ) != 0 ) - pp->name = filename; - else pp->name = pp->stdin_name; - name_len = strlen( pp->name ); - padded_name_len = max( name_len, pp->longest_name ) + 4; - pp->padded_name = resize_buffer( pp->padded_name, padded_name_len + 1 ); - while( i < 2 ) pp->padded_name[i++] = ' '; - while( i < name_len + 2 ) { pp->padded_name[i] = pp->name[i-2]; ++i; } - pp->padded_name[i++] = ':'; - while( i < padded_name_len ) pp->padded_name[i++] = ' '; - pp->padded_name[i] = 0; - pp->first_post = true; - } - -static void Pp_reset( struct Pretty_print * const pp ) - { if( pp->name && pp->name[0] ) pp->first_post = true; } - -static void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ) - { - if( verbosity >= 0 ) - { - if( pp->first_post ) - { - pp->first_post = false; - fputs( pp->padded_name, stderr ); - if( !msg ) fflush( stderr ); - } - if( msg ) fprintf( stderr, "%s\n", msg ); - } - } - - -static void show_header( const unsigned dictionary_size ) - { - enum { factor = 1024 }; - const char * const prefix[8] = - { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; - const char * p = ""; - const char * np = " "; - unsigned num = dictionary_size; - bool exact = ( num % factor == 0 ); - - int i; for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) - { num /= factor; if( num % factor != 0 ) exact = false; - p = prefix[i]; np = ""; } - fprintf( stderr, "dict %s%4u %sB, ", np, num, p ); - } - - -static unsigned long long getnum( const char * const ptr, - const unsigned long long llimit, - const unsigned long long ulimit ) - { - unsigned long long result; - char * tail; - errno = 0; - result = strtoull( ptr, &tail, 0 ); - if( tail == ptr ) - { - show_error( "Bad or missing numerical argument.", 0, true ); - exit( 1 ); - } - - if( !errno && tail[0] ) - { - const unsigned factor = ( tail[1] == 'i' ) ? 1024 : 1000; - int exponent = 0; /* 0 = bad multiplier */ - int i; - switch( tail[0] ) - { - case 'Y': exponent = 8; break; - case 'Z': exponent = 7; break; - case 'E': exponent = 6; break; - case 'P': exponent = 5; break; - case 'T': exponent = 4; break; - case 'G': exponent = 3; break; - case 'M': exponent = 2; break; - case 'K': if( factor == 1024 ) exponent = 1; break; - case 'k': if( factor == 1000 ) exponent = 1; break; - } - if( exponent <= 0 ) - { - show_error( "Bad multiplier in numerical argument.", 0, true ); - exit( 1 ); - } - for( i = 0; i < exponent; ++i ) - { - if( ulimit / factor >= result ) result *= factor; - else { errno = ERANGE; break; } - } - } - if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; - if( errno ) - { - show_error( "Numerical argument out of limits.", 0, false ); - exit( 1 ); - } - return result; - } - - -static int get_dict_size( const char * const arg ) - { - char * tail; - int dictionary_size; - const long bits = strtol( arg, &tail, 0 ); - if( bits >= LZ_min_dictionary_bits() && - bits <= LZ_max_dictionary_bits() && *tail == 0 ) - return 1 << bits; - dictionary_size = getnum( arg, LZ_min_dictionary_size(), - LZ_max_dictionary_size() ); - if( dictionary_size == 65535 ) ++dictionary_size; /* no fast encoder */ - return dictionary_size; - } - - -static void set_mode( enum Mode * const program_modep, const enum Mode new_mode ) - { - if( *program_modep != m_compress && *program_modep != new_mode ) - { - show_error( "Only one operation can be specified.", 0, true ); - exit( 1 ); - } - *program_modep = new_mode; - } - - -static int extension_index( const char * const name ) - { - int eindex; - for( eindex = 0; known_extensions[eindex].from; ++eindex ) - { - const char * const ext = known_extensions[eindex].from; - const unsigned name_len = strlen( name ); - const unsigned ext_len = strlen( ext ); - if( name_len > ext_len && - strncmp( name + name_len - ext_len, ext, ext_len ) == 0 ) - return eindex; - } - return -1; - } - - -static void set_c_outname( const char * const name, const bool force_ext, - const bool multifile ) - { - output_filename = resize_buffer( output_filename, strlen( name ) + 5 + - strlen( known_extensions[0].from ) + 1 ); - strcpy( output_filename, name ); - if( multifile ) strcat( output_filename, "00001" ); - if( force_ext || multifile ) - strcat( output_filename, known_extensions[0].from ); - } - - -static void set_d_outname( const char * const name, const int eindex ) - { - const unsigned name_len = strlen( name ); - if( eindex >= 0 ) - { - const char * const from = known_extensions[eindex].from; - const unsigned from_len = strlen( from ); - if( name_len > from_len ) - { - output_filename = resize_buffer( output_filename, name_len + - strlen( known_extensions[eindex].to ) + 1 ); - strcpy( output_filename, name ); - strcpy( output_filename + name_len - from_len, known_extensions[eindex].to ); - return; - } - } - output_filename = resize_buffer( output_filename, name_len + 4 + 1 ); - strcpy( output_filename, name ); - strcat( output_filename, ".out" ); - if( verbosity >= 1 ) - fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", - program_name, name, output_filename ); - } - - -static int open_instream( const char * const name, struct stat * const in_statsp, - const enum Mode program_mode, const int eindex, - const bool one_to_one, const bool recompress ) - { - int infd = -1; - if( program_mode == m_compress && !recompress && eindex >= 0 ) - { - if( verbosity >= 0 ) - fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n", - program_name, name, known_extensions[eindex].from ); - } - else - { - infd = open( name, O_RDONLY | O_BINARY ); - if( infd < 0 ) - show_file_error( name, "Can't open input file", errno ); - else - { - const int i = fstat( infd, in_statsp ); - const mode_t mode = in_statsp->st_mode; - const bool can_read = ( i == 0 && - ( S_ISBLK( mode ) || S_ISCHR( mode ) || - S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); - if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) ) - { - if( verbosity >= 0 ) - fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", - program_name, name, ( can_read && one_to_one ) ? - ",\n and neither '-c' nor '-o' were specified" : "" ); - close( infd ); - infd = -1; - } - } - } - return infd; - } - - -static bool open_outstream( const bool force, const bool protect ) - { - const mode_t usr_rw = S_IRUSR | S_IWUSR; - const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; - const mode_t outfd_mode = protect ? usr_rw : all_rw; - int flags = O_CREAT | O_WRONLY | O_BINARY; - if( force ) flags |= O_TRUNC; else flags |= O_EXCL; - - outfd = open( output_filename, flags, outfd_mode ); - if( outfd >= 0 ) delete_output_on_interrupt = true; - else if( verbosity >= 0 ) - { - if( errno == EEXIST ) - fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n", - program_name, output_filename ); - else - fprintf( stderr, "%s: Can't create output file '%s': %s\n", - program_name, output_filename, strerror( errno ) ); - } - return ( outfd >= 0 ); - } - - -static void set_signals( void (*action)(int) ) - { - signal( SIGHUP, action ); - signal( SIGINT, action ); - signal( SIGTERM, action ); - } - - -static void cleanup_and_fail( const int retval ) - { - set_signals( SIG_IGN ); /* ignore signals */ - if( delete_output_on_interrupt ) - { - delete_output_on_interrupt = false; - if( verbosity >= 0 ) - fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n", - program_name, output_filename ); - if( outfd >= 0 ) { close( outfd ); outfd = -1; } - if( remove( output_filename ) != 0 && errno != ENOENT ) - show_error( "WARNING: deletion of output file (apparently) failed.", 0, false ); - } - exit( retval ); - } - - -static void signal_handler( int sig ) - { - if( sig ) {} /* keep compiler happy */ - show_error( "Control-C or similar caught, quitting.", 0, false ); - cleanup_and_fail( 1 ); - } - - -static inline void set_retval( int * retval, const int new_val ) - { if( *retval < new_val ) *retval = new_val; } - - -static bool check_tty_in( const char * const input_filename, const int infd, - const enum Mode program_mode, int * const retval ) - { - if( ( program_mode == m_decompress || program_mode == m_test ) && - isatty( infd ) ) /* for example /dev/tty */ - { show_file_error( input_filename, - "I won't read compressed data from a terminal.", 0 ); - close( infd ); set_retval( retval, 1 ); - if( program_mode != m_test ) cleanup_and_fail( *retval ); - return false; } - return true; - } - -static bool check_tty_out( const enum Mode program_mode ) - { - if( program_mode == m_compress && isatty( outfd ) ) - { show_file_error( output_filename[0] ? - output_filename : "(stdout)", - "I won't write compressed data to a terminal.", 0 ); - return false; } - return true; - } - - -/* Set permissions, owner, and times. */ -static void close_and_set_permissions( const struct stat * const in_statsp ) - { - bool warning = false; - if( in_statsp ) - { - const mode_t mode = in_statsp->st_mode; - /* fchown will in many cases return with EPERM, which can be safely ignored. */ - if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 ) - { if( fchmod( outfd, mode ) != 0 ) warning = true; } - else - if( errno != EPERM || - fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 ) - warning = true; - } - if( close( outfd ) != 0 ) - { - show_error( "Error closing output file", errno, false ); - cleanup_and_fail( 1 ); - } - outfd = -1; - delete_output_on_interrupt = false; - if( in_statsp ) - { - struct utimbuf t; - t.actime = in_statsp->st_atime; - t.modtime = in_statsp->st_mtime; - if( utime( output_filename, &t ) != 0 ) warning = true; - } - if( warning && verbosity >= 1 ) - show_error( "Can't change output file attributes.", 0, false ); - } - - -/* Returns the number of bytes really read. - If (returned value < size) and (errno == 0), means EOF was reached. -*/ -static int readblock( const int fd, uint8_t * const buf, const int size ) - { - int sz = 0; - errno = 0; - while( sz < size ) - { - const int n = read( fd, buf + sz, size - sz ); - if( n > 0 ) sz += n; - else if( n == 0 ) break; /* EOF */ - else if( errno != EINTR ) break; - errno = 0; - } - return sz; - } - - -/* Returns the number of bytes really written. - If (returned value < size), it is always an error. -*/ -static int writeblock( const int fd, const uint8_t * const buf, const int size ) - { - int sz = 0; - errno = 0; - while( sz < size ) - { - const int n = write( fd, buf + sz, size - sz ); - if( n > 0 ) sz += n; - else if( n < 0 && errno != EINTR ) break; - errno = 0; - } - return sz; - } - - -static bool next_filename( void ) - { - const unsigned name_len = strlen( output_filename ); - const unsigned ext_len = strlen( known_extensions[0].from ); - int i, j; - if( name_len >= ext_len + 5 ) /* "*00001.lz" */ - for( i = name_len - ext_len - 1, j = 0; j < 5; --i, ++j ) - { - if( output_filename[i] < '9' ) { ++output_filename[i]; return true; } - else output_filename[i] = '0'; - } - return false; - } - - -static int do_compress( struct LZ_Encoder * const encoder, - const unsigned long long member_size, - const unsigned long long volume_size, const int infd, - struct Pretty_print * const pp, - const struct stat * const in_statsp ) - { - unsigned long long partial_volume_size = 0; - enum { buffer_size = 65536 }; - uint8_t buffer[buffer_size]; /* read/write buffer */ - if( verbosity >= 1 ) Pp_show_msg( pp, 0 ); - - while( true ) - { - int in_size = 0, out_size; - while( LZ_compress_write_size( encoder ) > 0 ) - { - const int size = min( LZ_compress_write_size( encoder ), buffer_size ); - const int rd = readblock( infd, buffer, size ); - if( rd != size && errno ) - { - Pp_show_msg( pp, 0 ); show_error( "Read error", errno, false ); - return 1; - } - if( rd > 0 && rd != LZ_compress_write( encoder, buffer, rd ) ) - internal_error( "library error (LZ_compress_write)." ); - if( rd < size ) LZ_compress_finish( encoder ); -/* else LZ_compress_sync_flush( encoder ); */ - in_size += rd; - } - out_size = LZ_compress_read( encoder, buffer, buffer_size ); - if( out_size < 0 ) - { - Pp_show_msg( pp, 0 ); - if( verbosity >= 0 ) - fprintf( stderr, "%s: LZ_compress_read error: %s\n", - program_name, LZ_strerror( LZ_compress_errno( encoder ) ) ); - return 1; - } - else if( out_size > 0 ) - { - const int wr = writeblock( outfd, buffer, out_size ); - if( wr != out_size ) - { - Pp_show_msg( pp, 0 ); show_error( "Write error", errno, false ); - return 1; - } - } - else if( in_size == 0 ) - internal_error( "library error (LZ_compress_read)." ); - if( LZ_compress_member_finished( encoder ) ) - { - unsigned long long size; - if( LZ_compress_finished( encoder ) == 1 ) break; - if( volume_size > 0 ) - { - partial_volume_size += LZ_compress_member_position( encoder ); - if( partial_volume_size >= volume_size - LZ_min_dictionary_size() ) - { - partial_volume_size = 0; - if( delete_output_on_interrupt ) - { - close_and_set_permissions( in_statsp ); - if( !next_filename() ) - { Pp_show_msg( pp, "Too many volume files." ); return 1; } - if( !open_outstream( true, in_statsp ) ) return 1; - } - } - size = min( member_size, volume_size - partial_volume_size ); - } - else - size = member_size; - if( LZ_compress_restart_member( encoder, size ) < 0 ) - { - Pp_show_msg( pp, 0 ); - if( verbosity >= 0 ) - fprintf( stderr, "%s: LZ_compress_restart_member error: %s\n", - program_name, LZ_strerror( LZ_compress_errno( encoder ) ) ); - return 1; - } - } - } - - if( verbosity >= 1 ) - { - const unsigned long long in_size = LZ_compress_total_in_size( encoder ); - const unsigned long long out_size = LZ_compress_total_out_size( encoder ); - if( in_size == 0 || out_size == 0 ) - fputs( " no data compressed.\n", stderr ); - else - fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved, " - "%llu in, %llu out.\n", - (double)in_size / out_size, - ( 100.0 * out_size ) / in_size, - 100.0 - ( ( 100.0 * out_size ) / in_size ), - in_size, out_size ); - } - return 0; - } - - -static int compress( const unsigned long long member_size, - const unsigned long long volume_size, const int infd, - const struct Lzma_options * const encoder_options, - struct Pretty_print * const pp, - const struct stat * const in_statsp ) - { - struct LZ_Encoder * const encoder = - LZ_compress_open( encoder_options->dictionary_size, - encoder_options->match_len_limit, ( volume_size > 0 ) ? - min( member_size, volume_size ) : member_size ); - int retval; - - if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) - { - if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error ) - Pp_show_msg( pp, "Not enough memory. Try a smaller dictionary size." ); - else - internal_error( "invalid argument to encoder." ); - retval = 1; - } - else retval = do_compress( encoder, member_size, volume_size, - infd, pp, in_statsp ); - LZ_compress_close( encoder ); - return retval; - } - - -static int do_decompress( struct LZ_Decoder * const decoder, const int infd, - struct Pretty_print * const pp, const bool ignore_trailing, - const bool loose_trailing, const bool testing ) - { - enum { buffer_size = 65536 }; - uint8_t buffer[buffer_size]; /* read/write buffer */ - unsigned long long total_in = 0; /* to detect library stall */ - bool first_member; - - for( first_member = true; ; ) - { - const int max_in_size = - min( LZ_decompress_write_size( decoder ), buffer_size ); - int in_size = 0, out_size = 0; - if( max_in_size > 0 ) - { - in_size = readblock( infd, buffer, max_in_size ); - if( in_size != max_in_size && errno ) - { - Pp_show_msg( pp, 0 ); show_error( "Read error", errno, false ); - return 1; - } - if( in_size > 0 && in_size != LZ_decompress_write( decoder, buffer, in_size ) ) - internal_error( "library error (LZ_decompress_write)." ); - if( in_size < max_in_size ) LZ_decompress_finish( decoder ); - } - while( true ) - { - const int rd = - LZ_decompress_read( decoder, (outfd >= 0) ? buffer : 0, buffer_size ); - if( rd > 0 ) - { - out_size += rd; - if( outfd >= 0 ) - { - const int wr = writeblock( outfd, buffer, rd ); - if( wr != rd ) - { - Pp_show_msg( pp, 0 ); show_error( "Write error", errno, false ); - return 1; - } - } - } - else if( rd < 0 ) { out_size = rd; break; } - if( LZ_decompress_member_finished( decoder ) == 1 ) - { - if( verbosity >= 1 ) - { - const unsigned long long data_size = LZ_decompress_data_position( decoder ); - const unsigned long long member_size = LZ_decompress_member_position( decoder ); - if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) - Pp_show_msg( pp, 0 ); - if( verbosity >= 2 ) - { - if( verbosity >= 4 ) - show_header( LZ_decompress_dictionary_size( decoder ) ); - if( data_size == 0 || member_size == 0 ) - fputs( "no data compressed. ", stderr ); - else - fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved. ", - (double)data_size / member_size, - ( 100.0 * member_size ) / data_size, - 100.0 - ( ( 100.0 * member_size ) / data_size ) ); - if( verbosity >= 4 ) - fprintf( stderr, "CRC %08X, ", LZ_decompress_data_crc( decoder ) ); - if( verbosity >= 3 ) - fprintf( stderr, "%9llu out, %8llu in. ", data_size, member_size ); - fputs( testing ? "ok\n" : "done\n", stderr ); Pp_reset( pp ); - } - } - first_member = false; - } - if( rd <= 0 ) break; - } - if( out_size < 0 || ( first_member && out_size == 0 ) ) - { - const unsigned long long member_pos = LZ_decompress_member_position( decoder ); - const enum LZ_Errno lz_errno = LZ_decompress_errno( decoder ); - if( lz_errno == LZ_library_error ) - internal_error( "library error (LZ_decompress_read)." ); - if( member_pos <= 6 ) - { - if( lz_errno == LZ_unexpected_eof ) - { - if( first_member ) - show_file_error( pp->name, "File ends unexpectedly at member header.", 0 ); - else - Pp_show_msg( pp, "Truncated header in multimember file." ); - return 2; - } - else if( lz_errno == LZ_data_error ) - { - if( member_pos == 4 ) - { if( verbosity >= 0 ) - { Pp_show_msg( pp, 0 ); - fprintf( stderr, "Version %d member format not supported.\n", - LZ_decompress_member_version( decoder ) ); } } - else if( member_pos == 5 ) - Pp_show_msg( pp, "Invalid dictionary size in member header." ); - else if( first_member ) /* for lzlib older than 1.10 */ - Pp_show_msg( pp, "Bad version or dictionary size in member header." ); - else if( !loose_trailing ) - Pp_show_msg( pp, "Corrupt header in multimember file." ); - else if( !ignore_trailing ) - Pp_show_msg( pp, "Trailing data not allowed." ); - else break; /* trailing data */ - return 2; - } - } - if( lz_errno == LZ_header_error ) - { - if( first_member ) - show_file_error( pp->name, - "Bad magic number (file not in lzip format).", 0 ); - else if( !ignore_trailing ) - Pp_show_msg( pp, "Trailing data not allowed." ); - else break; /* trailing data */ - return 2; - } - if( lz_errno == LZ_mem_error ) { Pp_show_msg( pp, mem_msg ); return 1; } - if( verbosity >= 0 ) - { - Pp_show_msg( pp, 0 ); - fprintf( stderr, "%s at pos %llu\n", ( lz_errno == LZ_unexpected_eof ) ? - "File ends unexpectedly" : "Decoder error", - LZ_decompress_total_in_size( decoder ) ); - } - return 2; - } - if( LZ_decompress_finished( decoder ) == 1 ) break; - if( in_size == 0 && out_size == 0 ) - { - const unsigned long long size = LZ_decompress_total_in_size( decoder ); - if( total_in == size ) internal_error( "library error (stalled)." ); - total_in = size; - } - } - if( verbosity == 1 ) fputs( testing ? "ok\n" : "done\n", stderr ); - return 0; - } - - -static int decompress( const int infd, struct Pretty_print * const pp, - const bool ignore_trailing, - const bool loose_trailing, const bool testing ) - { - struct LZ_Decoder * const decoder = LZ_decompress_open(); - int retval; - - if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) - { Pp_show_msg( pp, mem_msg ); retval = 1; } - else retval = do_decompress( decoder, infd, pp, ignore_trailing, - loose_trailing, testing ); - LZ_decompress_close( decoder ); - return retval; - } - - -static void show_error( const char * const msg, const int errcode, - const bool help ) - { - if( verbosity < 0 ) return; - if( msg && msg[0] ) - fprintf( stderr, "%s: %s%s%s\n", program_name, msg, - ( errcode > 0 ) ? ": " : "", - ( errcode > 0 ) ? strerror( errcode ) : "" ); - if( help ) - fprintf( stderr, "Try '%s --help' for more information.\n", - invocation_name ); - } - - -static void show_file_error( const char * const filename, - const char * const msg, const int errcode ) - { - if( verbosity >= 0 ) - fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg, - ( errcode > 0 ) ? ": " : "", - ( errcode > 0 ) ? strerror( errcode ) : "" ); - } - - -static void internal_error( const char * const msg ) - { - if( verbosity >= 0 ) - fprintf( stderr, "%s: internal error: %s\n", program_name, msg ); - exit( 3 ); - } - - -int main( const int argc, const char * const argv[] ) - { - /* Mapping from gzip/bzip2 style 1..9 compression modes - to the corresponding LZMA compression modes. */ - const struct Lzma_options option_mapping[] = - { - { 65535, 16 }, /* -0 (65535,16 chooses fast encoder) */ - { 1 << 20, 5 }, /* -1 */ - { 3 << 19, 6 }, /* -2 */ - { 1 << 21, 8 }, /* -3 */ - { 3 << 20, 12 }, /* -4 */ - { 1 << 22, 20 }, /* -5 */ - { 1 << 23, 36 }, /* -6 */ - { 1 << 24, 68 }, /* -7 */ - { 3 << 23, 132 }, /* -8 */ - { 1 << 25, 273 } }; /* -9 */ - struct Lzma_options encoder_options = option_mapping[6]; /* default = "-6" */ - const unsigned long long max_member_size = 0x0008000000000000ULL; /* 2 PiB */ - const unsigned long long max_volume_size = 0x4000000000000000ULL; /* 4 EiB */ - unsigned long long member_size = max_member_size; - unsigned long long volume_size = 0; - const char * default_output_filename = ""; - static struct Arg_parser parser; /* static because valgrind complains */ - static struct Pretty_print pp; /* and memory management in C sucks */ - static const char ** filenames = 0; - int num_filenames = 0; - enum Mode program_mode = m_compress; - int argind = 0; - int failed_tests = 0; - int retval = 0; - int i; - bool filenames_given = false; - bool force = false; - bool ignore_trailing = true; - bool keep_input_files = false; - bool loose_trailing = false; - bool recompress = false; - bool stdin_used = false; - bool to_stdout = false; - - enum { opt_chk = 256, opt_lt }; - const struct ap_Option options[] = - { - { '0', "fast", ap_no }, - { '1', 0, ap_no }, - { '2', 0, ap_no }, - { '3', 0, ap_no }, - { '4', 0, ap_no }, - { '5', 0, ap_no }, - { '6', 0, ap_no }, - { '7', 0, ap_no }, - { '8', 0, ap_no }, - { '9', "best", ap_no }, - { 'a', "trailing-error", ap_no }, - { 'b', "member-size", ap_yes }, - { 'c', "stdout", ap_no }, - { 'd', "decompress", ap_no }, - { 'f', "force", ap_no }, - { 'F', "recompress", ap_no }, - { 'h', "help", ap_no }, - { 'k', "keep", ap_no }, - { 'm', "match-length", ap_yes }, - { 'n', "threads", ap_yes }, - { 'o', "output", ap_yes }, - { 'q', "quiet", ap_no }, - { 's', "dictionary-size", ap_yes }, - { 'S', "volume-size", ap_yes }, - { 't', "test", ap_no }, - { 'v', "verbose", ap_no }, - { 'V', "version", ap_no }, - { opt_chk, "check-lib", ap_no }, - { opt_lt, "loose-trailing", ap_no }, - { 0, 0, ap_no } }; - - if( argc > 0 ) invocation_name = argv[0]; - - if( !ap_init( &parser, argc, argv, options, 0 ) ) - { show_error( mem_msg, 0, false ); return 1; } - if( ap_error( &parser ) ) /* bad option */ - { show_error( ap_error( &parser ), 0, true ); return 1; } - - for( ; argind < ap_arguments( &parser ); ++argind ) - { - const int code = ap_code( &parser, argind ); - const char * const arg = ap_argument( &parser, argind ); - if( !code ) break; /* no more options */ - switch( code ) - { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - encoder_options = option_mapping[code-'0']; break; - case 'a': ignore_trailing = false; break; - case 'b': member_size = getnum( arg, 100000, max_member_size ); break; - case 'c': to_stdout = true; break; - case 'd': set_mode( &program_mode, m_decompress ); break; - case 'f': force = true; break; - case 'F': recompress = true; break; - case 'h': show_help(); return 0; - case 'k': keep_input_files = true; break; - case 'm': encoder_options.match_len_limit = - getnum( arg, LZ_min_match_len_limit(), - LZ_max_match_len_limit() ); break; - case 'n': break; - case 'o': if( strcmp( arg, "-" ) == 0 ) to_stdout = true; - else { default_output_filename = arg; } break; - case 'q': verbosity = -1; break; - case 's': encoder_options.dictionary_size = get_dict_size( arg ); - break; - case 'S': volume_size = getnum( arg, 100000, max_volume_size ); break; - case 't': set_mode( &program_mode, m_test ); break; - case 'v': if( verbosity < 4 ) ++verbosity; break; - case 'V': show_version(); return 0; - case opt_chk: return check_lib(); - case opt_lt: loose_trailing = true; break; - default : internal_error( "uncaught option." ); - } - } /* end process options */ - - if( strcmp( PROGVERSION, LZ_version_string ) != 0 ) - internal_error( "wrong PROGVERSION." ); -#if !defined LZ_API_VERSION || LZ_API_VERSION < 1012 -#error "lzlib 1.12 or newer needed." -#else - if( LZ_api_version() < 1012 ) /* minilzip passes null to LZ_decompress_read */ - { show_error( "lzlib 1.12 or newer needed. Try --check-lib.", 0, false ); - return 1; } - if( LZ_api_version() != LZ_API_VERSION ) show_error( - "warning: wrong library API version. Try --check-lib.", 0, false ); - else -#endif - if( strcmp( LZ_version_string, LZ_version() ) != 0 ) show_error( - "warning: wrong library version_string. Try --check-lib.", 0, false ); - -#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) - setmode( STDIN_FILENO, O_BINARY ); - setmode( STDOUT_FILENO, O_BINARY ); -#endif - - num_filenames = max( 1, ap_arguments( &parser ) - argind ); - filenames = resize_buffer( filenames, num_filenames * sizeof filenames[0] ); - filenames[0] = "-"; - - for( i = 0; argind + i < ap_arguments( &parser ); ++i ) - { - filenames[i] = ap_argument( &parser, argind + i ); - if( strcmp( filenames[i], "-" ) != 0 ) filenames_given = true; - } - - if( program_mode == m_compress ) - { - if( volume_size > 0 && !to_stdout && default_output_filename[0] && - num_filenames > 1 ) - { show_error( "Only can compress one file when using '-o' and '-S'.", - 0, true ); return 1; } - } - else volume_size = 0; - if( program_mode == m_test ) to_stdout = false; /* apply overrides */ - if( program_mode == m_test || to_stdout ) default_output_filename = ""; - - output_filename = resize_buffer( output_filename, 1 ); - output_filename[0] = 0; - if( to_stdout && program_mode != m_test ) /* check tty only once */ - { outfd = STDOUT_FILENO; if( !check_tty_out( program_mode ) ) return 1; } - else outfd = -1; - - const bool to_file = !to_stdout && program_mode != m_test && - default_output_filename[0]; - if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) ) - set_signals( signal_handler ); - - Pp_init( &pp, filenames, num_filenames ); - - const bool one_to_one = !to_stdout && program_mode != m_test && !to_file; - for( i = 0; i < num_filenames; ++i ) - { - const char * input_filename = ""; - int infd; - int tmp; - struct stat in_stats; - const struct stat * in_statsp; - - Pp_set_name( &pp, filenames[i] ); - if( strcmp( filenames[i], "-" ) == 0 ) - { - if( stdin_used ) continue; else stdin_used = true; - infd = STDIN_FILENO; - if( !check_tty_in( pp.name, infd, program_mode, &retval ) ) continue; - if( one_to_one ) { outfd = STDOUT_FILENO; output_filename[0] = 0; } - } - else - { - const int eindex = extension_index( input_filename = filenames[i] ); - infd = open_instream( input_filename, &in_stats, program_mode, - eindex, one_to_one, recompress ); - if( infd < 0 ) { set_retval( &retval, 1 ); continue; } - if( !check_tty_in( pp.name, infd, program_mode, &retval ) ) continue; - if( one_to_one ) /* open outfd after verifying infd */ - { - if( program_mode == m_compress ) - set_c_outname( input_filename, true, volume_size > 0 ); - else set_d_outname( input_filename, eindex ); - if( !open_outstream( force, true ) ) - { close( infd ); set_retval( &retval, 1 ); continue; } - } - } - - if( one_to_one && !check_tty_out( program_mode ) ) - { set_retval( &retval, 1 ); return retval; } /* don't delete a tty */ - - if( to_file && outfd < 0 ) /* open outfd after verifying infd */ - { - if( program_mode == m_compress ) set_c_outname( default_output_filename, - false, volume_size > 0 ); - else - { output_filename = resize_buffer( output_filename, - strlen( default_output_filename ) + 1 ); - strcpy( output_filename, default_output_filename ); } - if( !open_outstream( force, false ) || !check_tty_out( program_mode ) ) - return 1; /* check tty only once and don't try to delete a tty */ - } - - in_statsp = ( input_filename[0] && one_to_one ) ? &in_stats : 0; - if( program_mode == m_compress ) - tmp = compress( member_size, volume_size, infd, &encoder_options, &pp, - in_statsp ); - else - tmp = decompress( infd, &pp, ignore_trailing, - loose_trailing, program_mode == m_test ); - if( close( infd ) != 0 ) - { show_file_error( pp.name, "Error closing input file", errno ); - set_retval( &tmp, 1 ); } - set_retval( &retval, tmp ); - if( tmp ) - { if( program_mode != m_test ) cleanup_and_fail( retval ); - else ++failed_tests; } - - if( delete_output_on_interrupt && one_to_one ) - close_and_set_permissions( in_statsp ); - if( input_filename[0] && !keep_input_files && one_to_one && - ( program_mode != m_compress || volume_size == 0 ) ) - remove( input_filename ); - } - if( delete_output_on_interrupt ) close_and_set_permissions( 0 ); /* -o */ - else if( outfd >= 0 && close( outfd ) != 0 ) /* -c */ - { - show_error( "Error closing stdout", errno, false ); - set_retval( &retval, 1 ); - } - if( failed_tests > 0 && verbosity >= 1 && num_filenames > 1 ) - fprintf( stderr, "%s: warning: %d %s failed the test.\n", - program_name, failed_tests, - ( failed_tests == 1 ) ? "file" : "files" ); - free( output_filename ); - free( filenames ); - ap_free( &parser ); - return retval; - } diff --git a/minilzip.c b/minilzip.c new file mode 100644 index 0000000..f9313b2 --- /dev/null +++ b/minilzip.c @@ -0,0 +1,1290 @@ +/* Minilzip - Test program for the library lzlib + Copyright (C) 2009-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + Exit status: 0 for a normal exit, 1 for environmental problems + (file not found, invalid flags, I/O errors, etc), 2 to indicate a + corrupt or invalid input file, 3 for an internal consistency error + (e.g., bug) which caused minilzip to panic. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ +#include +#if defined __MSVCRT__ +#define fchmod(x,y) 0 +#define fchown(x,y,z) 0 +#define strtoull strtoul +#define SIGHUP SIGTERM +#define S_ISSOCK(x) 0 +#ifndef S_IRGRP +#define S_IRGRP 0 +#define S_IWGRP 0 +#define S_IROTH 0 +#define S_IWOTH 0 +#endif +#endif +#if defined __DJGPP__ +#define S_ISSOCK(x) 0 +#define S_ISVTX 0 +#endif +#endif + +#include "carg_parser.h" +#include "lzlib.h" + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +#if CHAR_BIT != 8 +#error "Environments where CHAR_BIT != 8 are not supported." +#endif + +#if ( defined SIZE_MAX && SIZE_MAX < UINT_MAX ) || \ + ( defined SSIZE_MAX && SSIZE_MAX < INT_MAX ) +#error "Environments where 'size_t' is narrower than 'int' are not supported." +#endif + +#ifndef max + #define max(x,y) ((x) >= (y) ? (x) : (y)) +#endif +#ifndef min + #define min(x,y) ((x) <= (y) ? (x) : (y)) +#endif + +static void cleanup_and_fail( const int retval ); +static void show_error( const char * const msg, const int errcode, + const bool help ); +static void show_file_error( const char * const filename, + const char * const msg, const int errcode ); +static void internal_error( const char * const msg ); +static const char * const mem_msg = "Not enough memory."; + +int verbosity = 0; + +static const char * const program_name = "minilzip"; +static const char * const program_year = "2022"; +static const char * invocation_name = "minilzip"; /* default value */ + +static const struct { const char * from; const char * to; } known_extensions[] = { + { ".lz", "" }, + { ".tlz", ".tar" }, + { 0, 0 } }; + +struct Lzma_options + { + int dictionary_size; /* 4 KiB .. 512 MiB */ + int match_len_limit; /* 5 .. 273 */ + }; + +enum Mode { m_compress, m_decompress, m_test }; + +/* Variables used in signal handler context. + They are not declared volatile because the handler never returns. */ +static char * output_filename = 0; +static int outfd = -1; +static bool delete_output_on_interrupt = false; + + +static void show_help( void ) + { + printf( "Minilzip is a test program for the compression library lzlib, fully\n" + "compatible with lzip 1.4 or newer.\n" + "\nLzip is a lossless data compressor with a user interface similar to the one\n" + "of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n" + "chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity\n" + "checking to maximize interoperability and optimize safety. Lzip can compress\n" + "about as fast as gzip (lzip -0) or compress most files more than bzip2\n" + "(lzip -9). Decompression speed is intermediate between gzip and bzip2.\n" + "Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip\n" + "has been designed, written, and tested with great care to replace gzip and\n" + "bzip2 as the standard general-purpose compressed format for unix-like\n" + "systems.\n" + "\nUsage: %s [options] [files]\n", invocation_name ); + printf( "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -a, --trailing-error exit with error status if trailing data\n" + " -b, --member-size= set member size limit in bytes\n" + " -c, --stdout write to standard output, keep input files\n" + " -d, --decompress decompress\n" + " -f, --force overwrite existing output files\n" + " -F, --recompress force re-compression of compressed files\n" + " -k, --keep keep (don't delete) input files\n" + " -m, --match-length= set match length limit in bytes [36]\n" + " -o, --output= write to , keep input files\n" + " -q, --quiet suppress all messages\n" + " -s, --dictionary-size= set dictionary size limit in bytes [8 MiB]\n" + " -S, --volume-size= set volume size limit in bytes\n" + " -t, --test test compressed file integrity\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " -0 .. -9 set compression level [default 6]\n" + " --fast alias for -0\n" + " --best alias for -9\n" + " --loose-trailing allow trailing data seeming corrupt header\n" + " --check-lib compare version of lzlib.h with liblz.{a,so}\n" + "\nIf no file names are given, or if a file is '-', minilzip compresses or\n" + "decompresses from standard input to standard output.\n" + "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" + "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" + "Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12\n" + "to 2^29 bytes.\n" + "\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n" + "scale optimal for all files. If your files are large, very repetitive,\n" + "etc, you may need to use the options --dictionary-size and --match-length\n" + "directly to achieve optimal performance.\n" + "\nTo extract all the files from archive 'foo.tar.lz', use the commands\n" + "'tar -xf foo.tar.lz' or 'minilzip -cd foo.tar.lz | tar -xf -'.\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" + "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" + "invalid input file, 3 for an internal consistency error (e.g., bug) which\n" + "caused minilzip to panic.\n" + "\nThe ideas embodied in lzlib are due to (at least) the following people:\n" + "Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the\n" + "definition of Markov chains), G.N.N. Martin (for the definition of range\n" + "encoding), Igor Pavlov (for putting all the above together in LZMA), and\n" + "Julian Seward (for bzip2's CLI).\n" + "\nReport bugs to lzip-bug@nongnu.org\n" + "Lzlib home page: http://www.nongnu.org/lzip/lzlib.html\n" ); + } + + +static void show_version( void ) + { + printf( "%s %s\n", program_name, PROGVERSION ); + printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); + printf( "Using lzlib %s\n", LZ_version() ); + printf( "License GPLv2+: GNU GPL version 2 or later \n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n" ); + } + + +static inline void set_retval( int * retval, const int new_val ) + { if( *retval < new_val ) *retval = new_val; } + + +static int check_lzlib_ver() /* . or .[a-z.-]* */ + { +#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012 + const unsigned char * p = (unsigned char *)LZ_version_string; + unsigned major = 0, minor = 0; + while( major < 100000 && isdigit( *p ) ) + { major *= 10; major += *p - '0'; ++p; } + if( *p == '.' ) ++p; + else +out: { show_error( "Invalid LZ_version_string in lzlib.h", 0, false ); return 2; } + while( minor < 100 && isdigit( *p ) ) + { minor *= 10; minor += *p - '0'; ++p; } + if( *p && *p != '-' && *p != '.' && !islower( *p ) ) goto out; + const unsigned version = major * 1000 + minor; + if( LZ_API_VERSION != version ) + { + if( verbosity >= 0 ) + fprintf( stderr, "%s: Version mismatch in lzlib.h: " + "LZ_API_VERSION = %u, should be %u.\n", + program_name, LZ_API_VERSION, version ); + return 2; + } +#endif + return 0; + } + + +static int check_lib() + { + int retval = check_lzlib_ver(); + if( strcmp( LZ_version_string, LZ_version() ) != 0 ) + { set_retval( &retval, 1 ); + if( verbosity >= 0 ) + printf( "warning: LZ_version_string != LZ_version() (%s vs %s)\n", + LZ_version_string, LZ_version() ); } +#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012 + if( LZ_API_VERSION != LZ_api_version() ) + { set_retval( &retval, 1 ); + if( verbosity >= 0 ) + printf( "warning: LZ_API_VERSION != LZ_api_version() (%u vs %u)\n", + LZ_API_VERSION, LZ_api_version() ); } +#endif + if( verbosity >= 1 ) + { + printf( "Using lzlib %s\n", LZ_version() ); +#if !defined LZ_API_VERSION + fputs( "LZ_API_VERSION is not defined.\n", stdout ); +#elif LZ_API_VERSION >= 1012 + printf( "Using LZ_API_VERSION = %u\n", LZ_api_version() ); +#else + printf( "Compiled with LZ_API_VERSION = %u. " + "Using an unknown LZ_API_VERSION\n", LZ_API_VERSION ); +#endif + } + return retval; + } + + +/* assure at least a minimum size for buffer 'buf' */ +static void * resize_buffer( void * buf, const unsigned min_size ) + { + if( buf ) buf = realloc( buf, min_size ); + else buf = malloc( min_size ); + if( !buf ) { show_error( mem_msg, 0, false ); cleanup_and_fail( 1 ); } + return buf; + } + + +struct Pretty_print + { + const char * name; + char * padded_name; + const char * stdin_name; + unsigned longest_name; + bool first_post; + }; + +static void Pp_init( struct Pretty_print * const pp, + const char * const filenames[], const int num_filenames ) + { + pp->name = 0; + pp->padded_name = 0; + pp->stdin_name = "(stdin)"; + pp->longest_name = 0; + pp->first_post = false; + + if( verbosity <= 0 ) return; + const unsigned stdin_name_len = strlen( pp->stdin_name ); + int i; + for( i = 0; i < num_filenames; ++i ) + { + const char * const s = filenames[i]; + const unsigned len = (strcmp( s, "-" ) == 0) ? stdin_name_len : strlen( s ); + if( pp->longest_name < len ) pp->longest_name = len; + } + if( pp->longest_name == 0 ) pp->longest_name = stdin_name_len; + } + +static void Pp_set_name( struct Pretty_print * const pp, + const char * const filename ) + { + unsigned name_len, padded_name_len, i = 0; + + if( filename && filename[0] && strcmp( filename, "-" ) != 0 ) + pp->name = filename; + else pp->name = pp->stdin_name; + name_len = strlen( pp->name ); + padded_name_len = max( name_len, pp->longest_name ) + 4; + pp->padded_name = resize_buffer( pp->padded_name, padded_name_len + 1 ); + while( i < 2 ) pp->padded_name[i++] = ' '; + while( i < name_len + 2 ) { pp->padded_name[i] = pp->name[i-2]; ++i; } + pp->padded_name[i++] = ':'; + while( i < padded_name_len ) pp->padded_name[i++] = ' '; + pp->padded_name[i] = 0; + pp->first_post = true; + } + +static void Pp_reset( struct Pretty_print * const pp ) + { if( pp->name && pp->name[0] ) pp->first_post = true; } + +static void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ) + { + if( verbosity < 0 ) return; + if( pp->first_post ) + { + pp->first_post = false; + fputs( pp->padded_name, stderr ); + if( !msg ) fflush( stderr ); + } + if( msg ) fprintf( stderr, "%s\n", msg ); + } + + +static void show_header( const unsigned dictionary_size ) + { + enum { factor = 1024 }; + const char * const prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + const char * p = ""; + const char * np = " "; + unsigned num = dictionary_size; + bool exact = ( num % factor == 0 ); + + int i; for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; + p = prefix[i]; np = ""; } + fprintf( stderr, "dict %s%4u %sB, ", np, num, p ); + } + + +/* separate large numbers >= 100_000 in groups of 3 digits using '_' */ +static const char * format_num3( unsigned long long num ) + { + const char * const si_prefix = "kMGTPEZY"; + const char * const binary_prefix = "KMGTPEZY"; + enum { buffers = 8, bufsize = 4 * sizeof (long long) }; + static char buffer[buffers][bufsize]; /* circle of static buffers for printf */ + static int current = 0; + int i; + char * const buf = buffer[current++]; current %= buffers; + char * p = buf + bufsize - 1; /* fill the buffer backwards */ + *p = 0; /* terminator */ + if( num > 1024 ) + { + char prefix = 0; /* try binary first, then si */ + for( i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i ) + { num /= 1024; prefix = binary_prefix[i]; } + if( prefix ) *(--p) = 'i'; + else + for( i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i ) + { num /= 1000; prefix = si_prefix[i]; } + if( prefix ) *(--p) = prefix; + } + const bool split = num >= 100000; + + for( i = 0; ; ) + { + *(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break; + if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; } + } + return p; + } + + +static unsigned long long getnum( const char * const arg, + const char * const option_name, + const unsigned long long llimit, + const unsigned long long ulimit ) + { + char * tail; + errno = 0; + unsigned long long result = strtoull( arg, &tail, 0 ); + if( tail == arg ) + { + if( verbosity >= 0 ) + fprintf( stderr, "%s: Bad or missing numerical argument in " + "option '%s'.\n", program_name, option_name ); + exit( 1 ); + } + + if( !errno && tail[0] ) + { + const unsigned factor = ( tail[1] == 'i' ) ? 1024 : 1000; + int exponent = 0; /* 0 = bad multiplier */ + int i; + switch( tail[0] ) + { + case 'Y': exponent = 8; break; + case 'Z': exponent = 7; break; + case 'E': exponent = 6; break; + case 'P': exponent = 5; break; + case 'T': exponent = 4; break; + case 'G': exponent = 3; break; + case 'M': exponent = 2; break; + case 'K': if( factor == 1024 ) exponent = 1; break; + case 'k': if( factor == 1000 ) exponent = 1; break; + } + if( exponent <= 0 ) + { + if( verbosity >= 0 ) + fprintf( stderr, "%s: Bad multiplier in numerical argument of " + "option '%s'.\n", program_name, option_name ); + exit( 1 ); + } + for( i = 0; i < exponent; ++i ) + { + if( ulimit / factor >= result ) result *= factor; + else { errno = ERANGE; break; } + } + } + if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; + if( errno ) + { + if( verbosity >= 0 ) + fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] " + "in option '%s'.\n", program_name, format_num3( llimit ), + format_num3( ulimit ), option_name ); + exit( 1 ); + } + return result; + } + + +static int get_dict_size( const char * const arg, const char * const option_name ) + { + char * tail; + const long bits = strtol( arg, &tail, 0 ); + if( bits >= LZ_min_dictionary_bits() && + bits <= LZ_max_dictionary_bits() && *tail == 0 ) + return 1 << bits; + int dictionary_size = getnum( arg, option_name, LZ_min_dictionary_size(), + LZ_max_dictionary_size() ); + if( dictionary_size == 65535 ) ++dictionary_size; /* no fast encoder */ + return dictionary_size; + } + + +static void set_mode( enum Mode * const program_modep, const enum Mode new_mode ) + { + if( *program_modep != m_compress && *program_modep != new_mode ) + { + show_error( "Only one operation can be specified.", 0, true ); + exit( 1 ); + } + *program_modep = new_mode; + } + + +static int extension_index( const char * const name ) + { + int eindex; + for( eindex = 0; known_extensions[eindex].from; ++eindex ) + { + const char * const ext = known_extensions[eindex].from; + const unsigned name_len = strlen( name ); + const unsigned ext_len = strlen( ext ); + if( name_len > ext_len && + strncmp( name + name_len - ext_len, ext, ext_len ) == 0 ) + return eindex; + } + return -1; + } + + +static void set_c_outname( const char * const name, const bool force_ext, + const bool multifile ) + { + output_filename = resize_buffer( output_filename, strlen( name ) + 5 + + strlen( known_extensions[0].from ) + 1 ); + strcpy( output_filename, name ); + if( multifile ) strcat( output_filename, "00001" ); + if( force_ext || multifile ) + strcat( output_filename, known_extensions[0].from ); + } + + +static void set_d_outname( const char * const name, const int eindex ) + { + const unsigned name_len = strlen( name ); + if( eindex >= 0 ) + { + const char * const from = known_extensions[eindex].from; + const unsigned from_len = strlen( from ); + if( name_len > from_len ) + { + output_filename = resize_buffer( output_filename, name_len + + strlen( known_extensions[eindex].to ) + 1 ); + strcpy( output_filename, name ); + strcpy( output_filename + name_len - from_len, known_extensions[eindex].to ); + return; + } + } + output_filename = resize_buffer( output_filename, name_len + 4 + 1 ); + strcpy( output_filename, name ); + strcat( output_filename, ".out" ); + if( verbosity >= 1 ) + fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", + program_name, name, output_filename ); + } + + +static int open_instream( const char * const name, struct stat * const in_statsp, + const enum Mode program_mode, const int eindex, + const bool one_to_one, const bool recompress ) + { + if( program_mode == m_compress && !recompress && eindex >= 0 ) + { + if( verbosity >= 0 ) + fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n", + program_name, name, known_extensions[eindex].from ); + return -1; + } + int infd = open( name, O_RDONLY | O_BINARY ); + if( infd < 0 ) + show_file_error( name, "Can't open input file", errno ); + else + { + const int i = fstat( infd, in_statsp ); + const mode_t mode = in_statsp->st_mode; + const bool can_read = ( i == 0 && + ( S_ISBLK( mode ) || S_ISCHR( mode ) || + S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); + if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) ) + { + if( verbosity >= 0 ) + fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", + program_name, name, ( can_read && one_to_one ) ? + ",\n and neither '-c' nor '-o' were specified" : "" ); + close( infd ); + infd = -1; + } + } + return infd; + } + + +static bool open_outstream( const bool force, const bool protect ) + { + const mode_t usr_rw = S_IRUSR | S_IWUSR; + const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; + const mode_t outfd_mode = protect ? usr_rw : all_rw; + int flags = O_CREAT | O_WRONLY | O_BINARY; + if( force ) flags |= O_TRUNC; else flags |= O_EXCL; + + outfd = open( output_filename, flags, outfd_mode ); + if( outfd >= 0 ) delete_output_on_interrupt = true; + else if( verbosity >= 0 ) + { + if( errno == EEXIST ) + fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n", + program_name, output_filename ); + else + fprintf( stderr, "%s: Can't create output file '%s': %s\n", + program_name, output_filename, strerror( errno ) ); + } + return ( outfd >= 0 ); + } + + +static void set_signals( void (*action)(int) ) + { + signal( SIGHUP, action ); + signal( SIGINT, action ); + signal( SIGTERM, action ); + } + + +static void cleanup_and_fail( const int retval ) + { + set_signals( SIG_IGN ); /* ignore signals */ + if( delete_output_on_interrupt ) + { + delete_output_on_interrupt = false; + if( verbosity >= 0 ) + fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n", + program_name, output_filename ); + if( outfd >= 0 ) { close( outfd ); outfd = -1; } + if( remove( output_filename ) != 0 && errno != ENOENT ) + show_error( "WARNING: deletion of output file (apparently) failed.", 0, false ); + } + exit( retval ); + } + + +static void signal_handler( int sig ) + { + if( sig ) {} /* keep compiler happy */ + show_error( "Control-C or similar caught, quitting.", 0, false ); + cleanup_and_fail( 1 ); + } + + +static bool check_tty_in( const char * const input_filename, const int infd, + const enum Mode program_mode, int * const retval ) + { + if( ( program_mode == m_decompress || program_mode == m_test ) && + isatty( infd ) ) /* for example /dev/tty */ + { show_file_error( input_filename, + "I won't read compressed data from a terminal.", 0 ); + close( infd ); set_retval( retval, 2 ); + if( program_mode != m_test ) cleanup_and_fail( *retval ); + return false; } + return true; + } + +static bool check_tty_out( const enum Mode program_mode ) + { + if( program_mode == m_compress && isatty( outfd ) ) + { show_file_error( output_filename[0] ? + output_filename : "(stdout)", + "I won't write compressed data to a terminal.", 0 ); + return false; } + return true; + } + + +/* Set permissions, owner, and times. */ +static void close_and_set_permissions( const struct stat * const in_statsp ) + { + bool warning = false; + if( in_statsp ) + { + const mode_t mode = in_statsp->st_mode; + /* fchown will in many cases return with EPERM, which can be safely ignored. */ + if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 ) + { if( fchmod( outfd, mode ) != 0 ) warning = true; } + else + if( errno != EPERM || + fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 ) + warning = true; + } + if( close( outfd ) != 0 ) + { + show_error( "Error closing output file", errno, false ); + cleanup_and_fail( 1 ); + } + outfd = -1; + delete_output_on_interrupt = false; + if( in_statsp ) + { + struct utimbuf t; + t.actime = in_statsp->st_atime; + t.modtime = in_statsp->st_mtime; + if( utime( output_filename, &t ) != 0 ) warning = true; + } + if( warning && verbosity >= 1 ) + show_error( "Can't change output file attributes.", 0, false ); + } + + +/* Return the number of bytes really read. + If (value returned < size) and (errno == 0), means EOF was reached. +*/ +static int readblock( const int fd, uint8_t * const buf, const int size ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = read( fd, buf + sz, size - sz ); + if( n > 0 ) sz += n; + else if( n == 0 ) break; /* EOF */ + else if( errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +/* Return the number of bytes really written. + If (value returned < size), it is always an error. +*/ +static int writeblock( const int fd, const uint8_t * const buf, const int size ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = write( fd, buf + sz, size - sz ); + if( n > 0 ) sz += n; + else if( n < 0 && errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +static bool next_filename( void ) + { + const unsigned name_len = strlen( output_filename ); + const unsigned ext_len = strlen( known_extensions[0].from ); + int i, j; + if( name_len >= ext_len + 5 ) /* "*00001.lz" */ + for( i = name_len - ext_len - 1, j = 0; j < 5; --i, ++j ) + { + if( output_filename[i] < '9' ) { ++output_filename[i]; return true; } + else output_filename[i] = '0'; + } + return false; + } + + +static int do_compress( struct LZ_Encoder * const encoder, + const unsigned long long member_size, + const unsigned long long volume_size, const int infd, + struct Pretty_print * const pp, + const struct stat * const in_statsp ) + { + unsigned long long partial_volume_size = 0; + enum { buffer_size = 65536 }; + uint8_t buffer[buffer_size]; /* read/write buffer */ + if( verbosity >= 1 ) Pp_show_msg( pp, 0 ); + + while( true ) + { + int in_size = 0; + while( LZ_compress_write_size( encoder ) > 0 ) + { + const int size = min( LZ_compress_write_size( encoder ), buffer_size ); + const int rd = readblock( infd, buffer, size ); + if( rd != size && errno ) + { + Pp_show_msg( pp, 0 ); show_error( "Read error", errno, false ); + return 1; + } + if( rd > 0 && rd != LZ_compress_write( encoder, buffer, rd ) ) + internal_error( "library error (LZ_compress_write)." ); + if( rd < size ) LZ_compress_finish( encoder ); +/* else LZ_compress_sync_flush( encoder ); */ + in_size += rd; + } + const int out_size = LZ_compress_read( encoder, buffer, buffer_size ); + if( out_size < 0 ) + { + Pp_show_msg( pp, 0 ); + if( verbosity >= 0 ) + fprintf( stderr, "%s: LZ_compress_read error: %s\n", + program_name, LZ_strerror( LZ_compress_errno( encoder ) ) ); + return 1; + } + else if( out_size > 0 ) + { + const int wr = writeblock( outfd, buffer, out_size ); + if( wr != out_size ) + { + Pp_show_msg( pp, 0 ); show_error( "Write error", errno, false ); + return 1; + } + } + else if( in_size == 0 ) + internal_error( "library error (LZ_compress_read)." ); + if( LZ_compress_member_finished( encoder ) ) + { + unsigned long long size; + if( LZ_compress_finished( encoder ) == 1 ) break; + if( volume_size > 0 ) + { + partial_volume_size += LZ_compress_member_position( encoder ); + if( partial_volume_size >= volume_size - LZ_min_dictionary_size() ) + { + partial_volume_size = 0; + if( delete_output_on_interrupt ) + { + close_and_set_permissions( in_statsp ); + if( !next_filename() ) + { Pp_show_msg( pp, "Too many volume files." ); return 1; } + if( !open_outstream( true, in_statsp ) ) return 1; + } + } + size = min( member_size, volume_size - partial_volume_size ); + } + else + size = member_size; + if( LZ_compress_restart_member( encoder, size ) < 0 ) + { + Pp_show_msg( pp, 0 ); + if( verbosity >= 0 ) + fprintf( stderr, "%s: LZ_compress_restart_member error: %s\n", + program_name, LZ_strerror( LZ_compress_errno( encoder ) ) ); + return 1; + } + } + } + + if( verbosity >= 1 ) + { + const unsigned long long in_size = LZ_compress_total_in_size( encoder ); + const unsigned long long out_size = LZ_compress_total_out_size( encoder ); + if( in_size == 0 || out_size == 0 ) + fputs( " no data compressed.\n", stderr ); + else + fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved, " + "%llu in, %llu out.\n", + (double)in_size / out_size, + ( 100.0 * out_size ) / in_size, + 100.0 - ( ( 100.0 * out_size ) / in_size ), + in_size, out_size ); + } + return 0; + } + + +static int compress( const unsigned long long member_size, + const unsigned long long volume_size, const int infd, + const struct Lzma_options * const encoder_options, + struct Pretty_print * const pp, + const struct stat * const in_statsp ) + { + struct LZ_Encoder * const encoder = + LZ_compress_open( encoder_options->dictionary_size, + encoder_options->match_len_limit, ( volume_size > 0 ) ? + min( member_size, volume_size ) : member_size ); + int retval; + + if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) + { + if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error ) + Pp_show_msg( pp, "Not enough memory. Try a smaller dictionary size." ); + else + internal_error( "invalid argument to encoder." ); + retval = 1; + } + else retval = do_compress( encoder, member_size, volume_size, + infd, pp, in_statsp ); + LZ_compress_close( encoder ); + return retval; + } + + +static int do_decompress( struct LZ_Decoder * const decoder, const int infd, + struct Pretty_print * const pp, const bool ignore_trailing, + const bool loose_trailing, const bool testing ) + { + enum { buffer_size = 65536 }; + uint8_t buffer[buffer_size]; /* read/write buffer */ + unsigned long long total_in = 0; /* to detect library stall */ + bool first_member; + + for( first_member = true; ; ) + { + const int max_in_size = + min( LZ_decompress_write_size( decoder ), buffer_size ); + int in_size = 0, out_size = 0; + if( max_in_size > 0 ) + { + in_size = readblock( infd, buffer, max_in_size ); + if( in_size != max_in_size && errno ) + { + Pp_show_msg( pp, 0 ); show_error( "Read error", errno, false ); + return 1; + } + if( in_size > 0 && in_size != LZ_decompress_write( decoder, buffer, in_size ) ) + internal_error( "library error (LZ_decompress_write)." ); + if( in_size < max_in_size ) LZ_decompress_finish( decoder ); + } + while( true ) + { + const int rd = + LZ_decompress_read( decoder, (outfd >= 0) ? buffer : 0, buffer_size ); + if( rd > 0 ) + { + out_size += rd; + if( outfd >= 0 ) + { + const int wr = writeblock( outfd, buffer, rd ); + if( wr != rd ) + { + Pp_show_msg( pp, 0 ); show_error( "Write error", errno, false ); + return 1; + } + } + } + else if( rd < 0 ) { out_size = rd; break; } + if( LZ_decompress_member_finished( decoder ) == 1 ) + { + if( verbosity >= 1 ) + { + const unsigned long long data_size = LZ_decompress_data_position( decoder ); + const unsigned long long member_size = LZ_decompress_member_position( decoder ); + if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) + Pp_show_msg( pp, 0 ); + if( verbosity >= 2 ) + { + if( verbosity >= 4 ) + show_header( LZ_decompress_dictionary_size( decoder ) ); + if( data_size == 0 || member_size == 0 ) + fputs( "no data compressed. ", stderr ); + else + fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved. ", + (double)data_size / member_size, + ( 100.0 * member_size ) / data_size, + 100.0 - ( ( 100.0 * member_size ) / data_size ) ); + if( verbosity >= 4 ) + fprintf( stderr, "CRC %08X, ", LZ_decompress_data_crc( decoder ) ); + if( verbosity >= 3 ) + fprintf( stderr, "%9llu out, %8llu in. ", data_size, member_size ); + fputs( testing ? "ok\n" : "done\n", stderr ); Pp_reset( pp ); + } + } + first_member = false; /* member decompressed successfully */ + } + if( rd <= 0 ) break; + } + if( out_size < 0 || ( first_member && out_size == 0 ) ) + { + const unsigned long long member_pos = LZ_decompress_member_position( decoder ); + const enum LZ_Errno lz_errno = LZ_decompress_errno( decoder ); + if( lz_errno == LZ_library_error ) + internal_error( "library error (LZ_decompress_read)." ); + if( member_pos <= 6 ) + { + if( lz_errno == LZ_unexpected_eof ) + { + if( first_member ) + show_file_error( pp->name, "File ends unexpectedly at member header.", 0 ); + else + Pp_show_msg( pp, "Truncated header in multimember file." ); + return 2; + } + else if( lz_errno == LZ_data_error ) + { + if( member_pos == 4 ) + { if( verbosity >= 0 ) + { Pp_show_msg( pp, 0 ); + fprintf( stderr, "Version %d member format not supported.\n", + LZ_decompress_member_version( decoder ) ); } } + else if( member_pos == 5 ) + Pp_show_msg( pp, "Invalid dictionary size in member header." ); + else if( first_member ) /* for lzlib older than 1.10 */ + Pp_show_msg( pp, "Bad version or dictionary size in member header." ); + else if( !loose_trailing ) + Pp_show_msg( pp, "Corrupt header in multimember file." ); + else if( !ignore_trailing ) + Pp_show_msg( pp, "Trailing data not allowed." ); + else break; /* trailing data */ + return 2; + } + } + if( lz_errno == LZ_header_error ) + { + if( first_member ) + show_file_error( pp->name, + "Bad magic number (file not in lzip format).", 0 ); + else if( !ignore_trailing ) + Pp_show_msg( pp, "Trailing data not allowed." ); + else break; /* trailing data */ + return 2; + } + if( lz_errno == LZ_mem_error ) { Pp_show_msg( pp, mem_msg ); return 1; } + if( verbosity >= 0 ) + { + Pp_show_msg( pp, 0 ); + fprintf( stderr, "%s at pos %llu\n", ( lz_errno == LZ_unexpected_eof ) ? + "File ends unexpectedly" : "Decoder error", + LZ_decompress_total_in_size( decoder ) ); + } + return 2; + } + if( LZ_decompress_finished( decoder ) == 1 ) break; + if( in_size == 0 && out_size == 0 ) + { + const unsigned long long size = LZ_decompress_total_in_size( decoder ); + if( total_in == size ) internal_error( "library error (stalled)." ); + total_in = size; + } + } + if( verbosity == 1 ) fputs( testing ? "ok\n" : "done\n", stderr ); + return 0; + } + + +static int decompress( const int infd, struct Pretty_print * const pp, + const bool ignore_trailing, + const bool loose_trailing, const bool testing ) + { + struct LZ_Decoder * const decoder = LZ_decompress_open(); + int retval; + + if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) + { Pp_show_msg( pp, mem_msg ); retval = 1; } + else retval = do_decompress( decoder, infd, pp, ignore_trailing, + loose_trailing, testing ); + LZ_decompress_close( decoder ); + return retval; + } + + +static void show_error( const char * const msg, const int errcode, + const bool help ) + { + if( verbosity < 0 ) return; + if( msg && msg[0] ) + fprintf( stderr, "%s: %s%s%s\n", program_name, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? strerror( errcode ) : "" ); + if( help ) + fprintf( stderr, "Try '%s --help' for more information.\n", + invocation_name ); + } + + +static void show_file_error( const char * const filename, + const char * const msg, const int errcode ) + { + if( verbosity >= 0 ) + fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? strerror( errcode ) : "" ); + } + + +static void internal_error( const char * const msg ) + { + if( verbosity >= 0 ) + fprintf( stderr, "%s: internal error: %s\n", program_name, msg ); + exit( 3 ); + } + + +int main( const int argc, const char * const argv[] ) + { + /* Mapping from gzip/bzip2 style 1..9 compression modes + to the corresponding LZMA compression modes. */ + const struct Lzma_options option_mapping[] = + { + { 65535, 16 }, /* -0 (65535,16 chooses fast encoder) */ + { 1 << 20, 5 }, /* -1 */ + { 3 << 19, 6 }, /* -2 */ + { 1 << 21, 8 }, /* -3 */ + { 3 << 20, 12 }, /* -4 */ + { 1 << 22, 20 }, /* -5 */ + { 1 << 23, 36 }, /* -6 */ + { 1 << 24, 68 }, /* -7 */ + { 3 << 23, 132 }, /* -8 */ + { 1 << 25, 273 } }; /* -9 */ + struct Lzma_options encoder_options = option_mapping[6]; /* default = "-6" */ + const unsigned long long max_member_size = 0x0008000000000000ULL; /* 2 PiB */ + const unsigned long long max_volume_size = 0x4000000000000000ULL; /* 4 EiB */ + unsigned long long member_size = max_member_size; + unsigned long long volume_size = 0; + const char * default_output_filename = ""; + enum Mode program_mode = m_compress; + int i; + bool force = false; + bool ignore_trailing = true; + bool keep_input_files = false; + bool loose_trailing = false; + bool recompress = false; + bool to_stdout = false; + if( argc > 0 ) invocation_name = argv[0]; + + enum { opt_chk = 256, opt_lt }; + const struct ap_Option options[] = + { + { '0', "fast", ap_no }, + { '1', 0, ap_no }, + { '2', 0, ap_no }, + { '3', 0, ap_no }, + { '4', 0, ap_no }, + { '5', 0, ap_no }, + { '6', 0, ap_no }, + { '7', 0, ap_no }, + { '8', 0, ap_no }, + { '9', "best", ap_no }, + { 'a', "trailing-error", ap_no }, + { 'b', "member-size", ap_yes }, + { 'c', "stdout", ap_no }, + { 'd', "decompress", ap_no }, + { 'f', "force", ap_no }, + { 'F', "recompress", ap_no }, + { 'h', "help", ap_no }, + { 'k', "keep", ap_no }, + { 'm', "match-length", ap_yes }, + { 'n', "threads", ap_yes }, + { 'o', "output", ap_yes }, + { 'q', "quiet", ap_no }, + { 's', "dictionary-size", ap_yes }, + { 'S', "volume-size", ap_yes }, + { 't', "test", ap_no }, + { 'v', "verbose", ap_no }, + { 'V', "version", ap_no }, + { opt_chk, "check-lib", ap_no }, + { opt_lt, "loose-trailing", ap_no }, + { 0, 0, ap_no } }; + + /* static because valgrind complains and memory management in C sucks */ + static struct Arg_parser parser; + if( !ap_init( &parser, argc, argv, options, 0 ) ) + { show_error( mem_msg, 0, false ); return 1; } + if( ap_error( &parser ) ) /* bad option */ + { show_error( ap_error( &parser ), 0, true ); return 1; } + + int argind = 0; + for( ; argind < ap_arguments( &parser ); ++argind ) + { + const int code = ap_code( &parser, argind ); + if( !code ) break; /* no more options */ + const char * const pn = ap_parsed_name( &parser, argind ); + const char * const arg = ap_argument( &parser, argind ); + switch( code ) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + encoder_options = option_mapping[code-'0']; break; + case 'a': ignore_trailing = false; break; + case 'b': member_size = getnum( arg, pn, 100000, max_member_size ); break; + case 'c': to_stdout = true; break; + case 'd': set_mode( &program_mode, m_decompress ); break; + case 'f': force = true; break; + case 'F': recompress = true; break; + case 'h': show_help(); return 0; + case 'k': keep_input_files = true; break; + case 'm': encoder_options.match_len_limit = + getnum( arg, pn, LZ_min_match_len_limit(), + LZ_max_match_len_limit() ); break; + case 'n': break; + case 'o': if( strcmp( arg, "-" ) == 0 ) to_stdout = true; + else { default_output_filename = arg; } break; + case 'q': verbosity = -1; break; + case 's': encoder_options.dictionary_size = get_dict_size( arg, pn ); + break; + case 'S': volume_size = getnum( arg, pn, 100000, max_volume_size ); break; + case 't': set_mode( &program_mode, m_test ); break; + case 'v': if( verbosity < 4 ) ++verbosity; break; + case 'V': show_version(); return 0; + case opt_chk: return check_lib(); + case opt_lt: loose_trailing = true; break; + default : internal_error( "uncaught option." ); + } + } /* end process options */ + + if( strcmp( PROGVERSION, LZ_version_string ) != 0 ) + internal_error( "wrong PROGVERSION." ); +#if !defined LZ_API_VERSION || LZ_API_VERSION < 1012 +#error "lzlib 1.12 or newer needed." +#else + if( LZ_api_version() < 1012 ) /* minilzip passes null to LZ_decompress_read */ + { show_error( "lzlib 1.12 or newer needed. Try --check-lib.", 0, false ); + return 1; } + if( LZ_api_version() != LZ_API_VERSION ) show_error( + "warning: wrong library API version. Try --check-lib.", 0, false ); + else +#endif + if( strcmp( LZ_version_string, LZ_version() ) != 0 ) show_error( + "warning: wrong library version_string. Try --check-lib.", 0, false ); + +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + static const char ** filenames = 0; + int num_filenames = max( 1, ap_arguments( &parser ) - argind ); + filenames = resize_buffer( filenames, num_filenames * sizeof filenames[0] ); + filenames[0] = "-"; + + bool filenames_given = false; + for( i = 0; argind + i < ap_arguments( &parser ); ++i ) + { + filenames[i] = ap_argument( &parser, argind + i ); + if( strcmp( filenames[i], "-" ) != 0 ) filenames_given = true; + } + + if( program_mode == m_compress ) + { + if( volume_size > 0 && !to_stdout && default_output_filename[0] && + num_filenames > 1 ) + { show_error( "Only can compress one file when using '-o' and '-S'.", + 0, true ); return 1; } + } + else volume_size = 0; + if( program_mode == m_test ) to_stdout = false; /* apply overrides */ + if( program_mode == m_test || to_stdout ) default_output_filename = ""; + + output_filename = resize_buffer( output_filename, 1 ); + output_filename[0] = 0; + if( to_stdout && program_mode != m_test ) /* check tty only once */ + { outfd = STDOUT_FILENO; if( !check_tty_out( program_mode ) ) return 1; } + else outfd = -1; + + const bool to_file = !to_stdout && program_mode != m_test && + default_output_filename[0]; + if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) ) + set_signals( signal_handler ); + + static struct Pretty_print pp; + Pp_init( &pp, filenames, num_filenames ); + + int failed_tests = 0; + int retval = 0; + const bool one_to_one = !to_stdout && program_mode != m_test && !to_file; + bool stdin_used = false; + for( i = 0; i < num_filenames; ++i ) + { + const char * input_filename = ""; + int infd; + struct stat in_stats; + + Pp_set_name( &pp, filenames[i] ); + if( strcmp( filenames[i], "-" ) == 0 ) + { + if( stdin_used ) continue; else stdin_used = true; + infd = STDIN_FILENO; + if( !check_tty_in( pp.name, infd, program_mode, &retval ) ) continue; + if( one_to_one ) { outfd = STDOUT_FILENO; output_filename[0] = 0; } + } + else + { + const int eindex = extension_index( input_filename = filenames[i] ); + infd = open_instream( input_filename, &in_stats, program_mode, + eindex, one_to_one, recompress ); + if( infd < 0 ) { set_retval( &retval, 1 ); continue; } + if( !check_tty_in( pp.name, infd, program_mode, &retval ) ) continue; + if( one_to_one ) /* open outfd after verifying infd */ + { + if( program_mode == m_compress ) + set_c_outname( input_filename, true, volume_size > 0 ); + else set_d_outname( input_filename, eindex ); + if( !open_outstream( force, true ) ) + { close( infd ); set_retval( &retval, 1 ); continue; } + } + } + + if( one_to_one && !check_tty_out( program_mode ) ) + { set_retval( &retval, 1 ); return retval; } /* don't delete a tty */ + + if( to_file && outfd < 0 ) /* open outfd after verifying infd */ + { + if( program_mode == m_compress ) set_c_outname( default_output_filename, + false, volume_size > 0 ); + else + { output_filename = resize_buffer( output_filename, + strlen( default_output_filename ) + 1 ); + strcpy( output_filename, default_output_filename ); } + if( !open_outstream( force, false ) || !check_tty_out( program_mode ) ) + return 1; /* check tty only once and don't try to delete a tty */ + } + + const struct stat * const in_statsp = + ( input_filename[0] && one_to_one ) ? &in_stats : 0; + int tmp; + if( program_mode == m_compress ) + tmp = compress( member_size, volume_size, infd, &encoder_options, &pp, + in_statsp ); + else + tmp = decompress( infd, &pp, ignore_trailing, + loose_trailing, program_mode == m_test ); + if( close( infd ) != 0 ) + { show_file_error( pp.name, "Error closing input file", errno ); + set_retval( &tmp, 1 ); } + set_retval( &retval, tmp ); + if( tmp ) + { if( program_mode != m_test ) cleanup_and_fail( retval ); + else ++failed_tests; } + + if( delete_output_on_interrupt && one_to_one ) + close_and_set_permissions( in_statsp ); + if( input_filename[0] && !keep_input_files && one_to_one && + ( program_mode != m_compress || volume_size == 0 ) ) + remove( input_filename ); + } + if( delete_output_on_interrupt ) close_and_set_permissions( 0 ); /* -o */ + else if( outfd >= 0 && close( outfd ) != 0 ) /* -c */ + { + show_error( "Error closing stdout", errno, false ); + set_retval( &retval, 1 ); + } + if( failed_tests > 0 && verbosity >= 1 && num_filenames > 1 ) + fprintf( stderr, "%s: warning: %d %s failed the test.\n", + program_name, failed_tests, + ( failed_tests == 1 ) ? "file" : "files" ); + free( output_filename ); + free( filenames ); + ap_free( &parser ); + return retval; + } diff --git a/testsuite/check.sh b/testsuite/check.sh index 44e2428..e93697e 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lzlib - Compression library for the lzip format -# Copyright (C) 2009-2021 Antonio Diaz Diaz. +# Copyright (C) 2009-2022 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute, and modify it. @@ -39,7 +39,8 @@ fox_lz="${testdir}"/fox.lz fail=0 test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } -"${LZIP}" --check-lib # just print warning +"${LZIP}" --check-lib # just print warning +[ $? != 2 ] || { test_failed $LINENO ; exit 2 ; } # unless bad lzlib.h printf "testing lzlib-%s..." "$2" "${LZIP}" -fkqm4 in @@ -99,6 +100,7 @@ done printf "LZIP\001-.............................." | "${LZIP}" -t 2> /dev/null printf "LZIP\002-.............................." | "${LZIP}" -t 2> /dev/null printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null +rm -f out || framework_failure printf "\ntesting decompression..." @@ -118,25 +120,28 @@ done lines=$("${LZIP}" -tvv "${in_em}" 2>&1 | wc -l) || test_failed $LINENO [ "${lines}" -eq 8 ] || test_failed $LINENO "${lines}" +"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO cat "${in_lz}" > copy.lz || framework_failure "${LZIP}" -dk copy.lz || test_failed $LINENO cmp in copy || test_failed $LINENO -printf "to be overwritten" > copy || framework_failure -"${LZIP}" -d copy.lz 2> /dev/null +cat fox > copy || framework_failure +cat "${in_lz}" > out.lz || framework_failure +rm -f out || framework_failure +"${LZIP}" -d copy.lz out.lz 2> /dev/null # skip copy, decompress out [ $? = 1 ] || test_failed $LINENO +cmp fox copy || test_failed $LINENO +cmp in out || test_failed $LINENO "${LZIP}" -df copy.lz || test_failed $LINENO [ ! -e copy.lz ] || test_failed $LINENO cmp in copy || test_failed $LINENO +rm -f copy out || framework_failure -rm -f copy || framework_failure cat "${in_lz}" > copy.lz || framework_failure "${LZIP}" -d -S100k copy.lz || test_failed $LINENO # ignore -S [ ! -e copy.lz ] || test_failed $LINENO cmp in copy || test_failed $LINENO printf "to be overwritten" > copy || framework_failure -"${LZIP}" -d -o copy < "${in_lz}" 2> /dev/null -[ $? = 1 ] || test_failed $LINENO "${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO cmp in copy || test_failed $LINENO rm -f out copy || framework_failure @@ -160,7 +165,7 @@ rm -f copy anyothername.out || framework_failure [ $? = 1 ] || test_failed $LINENO "${LZIP}" -cdq in "${in_lz}" > copy [ $? = 2 ] || test_failed $LINENO -cat copy in | cmp in - || test_failed $LINENO +cat copy in | cmp in - || test_failed $LINENO # copy must be empty "${LZIP}" -cdq nx_file.lz "${in_lz}" > copy [ $? = 1 ] || test_failed $LINENO cmp in copy || test_failed $LINENO @@ -381,7 +386,6 @@ for i in fox_v2.lz fox_s11.lz fox_de20.lz \ [ $? = 2 ] || test_failed $LINENO $i done -"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO for i in fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do "${LZIP}" -cdq "${testdir}"/$i > out [ $? = 2 ] || test_failed $LINENO $i -- cgit v1.2.3