diff options
-rw-r--r-- | ChangeLog | 17 | ||||
-rw-r--r-- | INSTALL | 4 | ||||
-rw-r--r-- | Makefile.in | 23 | ||||
-rw-r--r-- | NEWS | 97 | ||||
-rw-r--r-- | README | 14 | ||||
-rw-r--r-- | bbexample.c | 34 | ||||
-rw-r--r-- | carg_parser.c | 110 | ||||
-rw-r--r-- | carg_parser.h | 14 | ||||
-rw-r--r-- | cbuffer.c | 10 | ||||
-rwxr-xr-x | configure | 16 | ||||
-rw-r--r-- | decoder.c | 21 | ||||
-rw-r--r-- | decoder.h | 123 | ||||
-rw-r--r-- | doc/lzlib.info | 243 | ||||
-rw-r--r-- | doc/lzlib.texi | 244 | ||||
-rw-r--r-- | doc/minilzip.1 | 35 | ||||
-rw-r--r-- | encoder.c | 127 | ||||
-rw-r--r-- | encoder.h | 5 | ||||
-rw-r--r-- | encoder_base.c | 23 | ||||
-rw-r--r-- | encoder_base.h | 22 | ||||
-rw-r--r-- | fast_encoder.c | 35 | ||||
-rw-r--r-- | fast_encoder.h | 2 | ||||
-rw-r--r-- | ffexample.c | 12 | ||||
-rw-r--r-- | lzcheck.c | 11 | ||||
-rw-r--r-- | lzip.h | 3 | ||||
-rw-r--r-- | lzlib.c | 2 | ||||
-rw-r--r-- | lzlib.h | 6 | ||||
-rw-r--r-- | minilzip.c (renamed from main.c) | 256 | ||||
-rwxr-xr-x | testsuite/check.sh | 22 |
28 files changed, 807 insertions, 724 deletions
@@ -1,3 +1,15 @@ +2022-01-23 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.13 released. + * Set variables AR and ARFLAGS from configure. + (Reported by Hoël Bézier). + * main.c: Rename to minilzip.c. + * minilzip.c (getnum): Show option name and valid range if error. + (check_lib): Check that LZ_API_VERSION and LZ_version_string match. + * Improve several descriptions in manual, '--help', and man page. + * lzlib.texi: Change GNU Texinfo category to 'Compression'. + (Reported by Alfred M. Szmidt). + 2021-01-02 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.12 released. @@ -48,7 +60,7 @@ * main.c: Compile on DOS with DJGPP. * lzlib.texi: Improve descriptions of '-0..-9', '-m', and '-s'. Document that 'LZ_(de)compress_finish' can be called repeatedly. - * configure: Accept appending to CFLAGS, 'CFLAGS+=OPTIONS'. + * configure: Accept appending to CFLAGS; 'CFLAGS+=OPTIONS'. * Makefile.in: Rename targets 'install-bin*' to 'install-lib*'. * Makefile.in: Targets 'install-bin*' now install minilzip. * INSTALL: Document use of CFLAGS+='-D __USE_MINGW_ANSI_STDIO'. @@ -57,6 +69,7 @@ * Version 1.10 released. * LZ_compress_finish now adjusts dictionary size for each member. + (Older versions can adjust dictionary size only once). * lzlib.c (LZ_decompress_read): Detect corrupt header with HD=3. * main.c: New option '--loose-trailing'. * main.c (main): Option '-S, --volume-size' now keeps input files. @@ -235,7 +248,7 @@ * Version 0.1 released. -Copyright (C) 2009-2021 Antonio Diaz Diaz. +Copyright (C) 2009-2022 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute, and @@ -1,7 +1,7 @@ Requirements ------------ You will need a C99 compiler. (gcc 3.3.6 or newer is recommended). -I use gcc 6.1.0 and 4.1.2, but the code should compile with any standards +I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -74,7 +74,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2009-2021 Antonio Diaz Diaz. +Copyright (C) 2009-2022 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/Makefile.in b/Makefile.in index 94e3770..81b404b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1,6 +1,5 @@ DISTNAME = $(pkgname)-$(pkgversion) -AR = ar INSTALL = install INSTALL_PROGRAM = $(INSTALL) -m 755 INSTALL_DATA = $(INSTALL) -m 644 @@ -9,7 +8,7 @@ LDCONFIG = /sbin/ldconfig SHELL = /bin/sh CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 -objs = carg_parser.o main.o +objs = carg_parser.o minilzip.o .PHONY : all install install-bin install-info install-man \ @@ -24,27 +23,27 @@ objs = carg_parser.o main.o all : $(progname_static) $(progname_shared) lib$(libname).a : lzlib.o - $(AR) -rcs $@ $< + $(AR) $(ARFLAGS) $@ $< lib$(libname).so.$(pkgversion) : lzlib_sh.o - $(CC) $(LDFLAGS) $(CFLAGS) -fpic -fPIC -shared -Wl,--soname=lib$(libname).so.$(soversion) -o $@ $< + $(CC) $(CFLAGS) $(LDFLAGS) -fpic -fPIC -shared -Wl,--soname=lib$(libname).so.$(soversion) -o $@ $< $(progname) : $(objs) lib$(libname).a - $(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(objs) lib$(libname).a + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs) lib$(libname).a $(progname)_shared : $(objs) lib$(libname).so.$(pkgversion) - $(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(objs) lib$(libname).so.$(pkgversion) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs) lib$(libname).so.$(pkgversion) bbexample : bbexample.o lib$(libname).a - $(CC) $(LDFLAGS) $(CFLAGS) -o $@ bbexample.o lib$(libname).a + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ bbexample.o lib$(libname).a ffexample : ffexample.o lib$(libname).a - $(CC) $(LDFLAGS) $(CFLAGS) -o $@ ffexample.o lib$(libname).a + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ ffexample.o lib$(libname).a lzcheck : lzcheck.o lib$(libname).a - $(CC) $(LDFLAGS) $(CFLAGS) -o $@ lzcheck.o lib$(libname).a + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ lzcheck.o lib$(libname).a -main.o : main.c +minilzip.o : minilzip.c $(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< lzlib_sh.o : lzlib.c @@ -60,7 +59,7 @@ $(objs) : Makefile carg_parser.o : carg_parser.h lzlib.o : Makefile $(lzdeps) lzlib_sh.o : Makefile $(lzdeps) -main.o : carg_parser.h lzlib.h +minilzip.o : carg_parser.h lzlib.h bbexample.o : Makefile lzlib.h ffexample.o : Makefile lzlib.h lzcheck.o : Makefile lzlib.h @@ -76,7 +75,7 @@ $(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi man : $(VPATH)/doc/$(progname).1 $(VPATH)/doc/$(progname).1 : $(progname) - help2man -n 'reduces the size of files' -o $@ --no-info ./$(progname) + help2man -n 'reduces the size of files' -o $@ --info-page=$(pkgname) ./$(progname) Makefile : $(VPATH)/configure $(VPATH)/Makefile.in ./config.status @@ -1,92 +1,15 @@ -Changes in version 1.12: +Changes in version 1.13: -The value of the version test macro 'LZ_API_VERSION' is now defined as -1000 * major + minor. For version 1.12 it is 1012. -This change does not affect the soversion. +The variables AR and ARFLAGS can now be set from configure. (Before you +needed to run 'make AR=<ar_command>'. (Reported by Hoël Bézier). -The new function 'LZ_api_version', which returns the LZ_API_VERSION of the -library object code being used, has been added to lzlib. +In case of error in a numerical argument to a command line option, minilzip +now shows the name of the option and the range of valid values. -If end of file is found at member trailer or EOS marker, -'LZ_decompress_errno' now returns 'LZ_unexpected_eof' instead of -'LZ_data_error'. +'minilzip --check-lib' now checks that LZ_API_VERSION and LZ_version_string +match. -Decompression speed has been slightly increased. +Several descriptions have been improved in manual, '--help', and man page. -A bug has been fixed in minilzip that falsely reported a library stall when -decompressing a file with empty members. - -The new option '--check-lib', which compares the version of lzlib used to -compile minilzip with the version actually being used, has been added to -minilzip. - -Minilzip now reports an error if a file name is empty (minilzip -t ""). - -Option '-o, --output' now behaves like '-c, --stdout', but sending the -output unconditionally to a file instead of to standard output. See the new -description of '-o' in the manual. This change is not backwards compatible. -Therefore commands like: - minilzip -o foo.lz - bar < foo -must now be split into: - minilzip -o foo.lz - < foo - minilzip bar -or rewritten as: - minilzip - bar < foo > foo.lz - -When using '-c' or '-o', minilzip now checks whether the output is a -terminal only once. - -Minilzip now does not even open the output file if the input file is a terminal. - -The words 'decompressed' and 'compressed' have been replaced with the -shorter 'out' and 'in' in the verbose output of minilzip when decompressing -or testing. - -It has been documented in the manual that 'LZ_(de)compress_close' and -'LZ_(de)compress_errno' can be called with a null argument. - -It has been documented in the manual that the LZMA marker '3' ("Sync Flush" -marker) is not allowed in lzip files. Marker '3' is a device for interactive -communication between applications using lzlib, but is useless and wasteful -in a file, and is excluded from the media type 'application/lzip'. The LZMA -marker '2' ("End Of Stream" marker) is the only marker allowed in lzip -files. - -It has been documented in the manual that not calling 'LZ_decompress_finish' -prevents lzlib from detecting a truncated member. - -It has been documented in the manual that 'LZ_decompress_read' returns at -least once per member so that 'LZ_decompress_member_finished' can be called -(and trailer data retrieved) for each member, even for empty members. -Therefore, 'LZ_decompress_read' returning 0 does not mean that the end of -the stream has been reached. - -It has been documented in the manual that 'LZ_(de)compress_read' can be -called with a null buffer pointer argument. - -Real code examples for the most common uses of the library have been added -to the tutorial. - -'bbexample.c' has been simplified to not use 'LZ_(de)compress_write_size'. - -'lzcheck' now accepts options '-s' (to check LZ_compress_sync_flush) and -'-m' (to check member by member decompression). - -'lzcheck.c' now also tests member by member decompression without -intermediate calls to 'LZ_decompress_finish'. - -The new file 'ffexample.c', containing example functions for file-to-file -compression/decompression, has been added to the distribution. - -The commands needed to extract files from a tar.lz archive have been -documented in the output of 'minilzip --help' and in the man page. - -'make install-bin' no longer installs the minilzip man page. This is to -prevent 'make install-bin install-man-compress' from installing the man page -twice before compressing it. - -The new targets 'install-bin-compress' and 'install-bin-strip-compress', -which install a (stripped) minilzip and a compressed man page, have been -added to the Makefile. - -9 new test files have been added to the testsuite. +The texinfo category of the manual has been changed from 'Data Compression' +to 'Compression' to match that of gzip. (Reported by Alfred M. Szmidt). @@ -31,9 +31,13 @@ the beginning is a thing of the past. The functions and variables forming the interface of the compression library are declared in the file 'lzlib.h'. Usage examples of the library are given -in the files 'bbexample.c', 'ffexample.c', and 'main.c' from the source +in the files 'bbexample.c', 'ffexample.c', and 'minilzip.c' from the source distribution. +All the library functions are thread safe. The library does not install any +signal handler. The decoder checks the consistency of the compressed data, +so the library should never crash even in case of corrupted input. + Compression/decompression is done by repeatedly calling a couple of read/write functions until all the data have been processed by the library. This interface is safer and less error prone than the traditional zlib @@ -60,10 +64,6 @@ Lzlib is able to compress and decompress streams of unlimited size by automatically creating multimember output. The members so created are large, about 2 PiB each. -All the library functions are thread safe. The library does not install -any signal handler. The decoder checks the consistency of the compressed -data, so the library should never crash even in case of corrupted input. - In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a concrete algorithm; it is more like "any algorithm using the LZMA coding scheme". For example, the option '-0' of lzip uses the scheme in almost the @@ -73,7 +73,7 @@ finding coding sequences of minimum size than the one currently used by lzip could be developed, and the resulting sequence could also be coded using the LZMA coding scheme. -Lzlib currently implements two variants of the LZMA algorithm; fast (used by +Lzlib currently implements two variants of the LZMA algorithm: fast (used by option '-0' of minilzip) and normal (used by all other compression levels). The high compression of LZMA comes from combining two basic, well-proven @@ -93,7 +93,7 @@ been compressed. Decompressed is used to refer to data which have undergone the process of decompression. -Copyright (C) 2009-2021 Antonio Diaz Diaz. +Copyright (C) 2009-2022 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/bbexample.c b/bbexample.c index 96257ee..074f7ae 100644 --- a/bbexample.c +++ b/bbexample.c @@ -1,5 +1,5 @@ /* Buffer to buffer example - Test program for the library lzlib - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute, and modify it. @@ -28,9 +28,9 @@ #endif -/* Returns the address of a malloc'd buffer containing the file data and +/* Return the address of a malloc'd buffer containing the file data and the file size in '*file_sizep'. - In case of error, returns 0 and does not modify '*file_sizep'. + In case of error, return 0 and do not modify '*file_sizep'. */ uint8_t * read_file( const char * const name, long * const file_sizep ) { @@ -73,10 +73,10 @@ uint8_t * read_file( const char * const name, long * const file_sizep ) } -/* Compresses 'insize' bytes from 'inbuf'. - Returns the address of a malloc'd buffer containing the compressed data, +/* Compress 'insize' bytes from 'inbuf'. + Return the address of a malloc'd buffer containing the compressed data, and the size of the data in '*outlenp'. - In case of error, returns 0 and does not modify '*outlenp'. + In case of error, return 0 and do not modify '*outlenp'. */ uint8_t * bbcompressl( const uint8_t * const inbuf, const long insize, const int level, long * const outlenp ) @@ -152,10 +152,10 @@ uint8_t * bbcompressl( const uint8_t * const inbuf, const long insize, } -/* Decompresses 'insize' bytes from 'inbuf'. - Returns the address of a malloc'd buffer containing the decompressed +/* Decompress 'insize' bytes from 'inbuf'. + Return the address of a malloc'd buffer containing the decompressed data, and the size of the data in '*outlenp'. - In case of error, returns 0 and does not modify '*outlenp'. + In case of error, return 0 and do not modify '*outlenp'. */ uint8_t * bbdecompressl( const uint8_t * const inbuf, const long insize, long * const outlenp ) @@ -230,10 +230,10 @@ int full_test( const uint8_t * const inbuf, const long insize ) } -/* Compresses 'insize' bytes from 'inbuf' to 'outbuf'. - Returns the size of the compressed data in '*outlenp'. - In case of error, or if 'outsize' is too small, returns false and does - not modify '*outlenp'. +/* Compress 'insize' bytes from 'inbuf' to 'outbuf'. + Return the size of the compressed data in '*outlenp'. + In case of error, or if 'outsize' is too small, return false and do not + modify '*outlenp'. */ bool bbcompress( const uint8_t * const inbuf, const int insize, const int dictionary_size, const int match_len_limit, @@ -267,10 +267,10 @@ bool bbcompress( const uint8_t * const inbuf, const int insize, } -/* Decompresses 'insize' bytes from 'inbuf' to 'outbuf'. - Returns the size of the decompressed data in '*outlenp'. - In case of error, or if 'outsize' is too small, returns false and does - not modify '*outlenp'. +/* Decompress 'insize' bytes from 'inbuf' to 'outbuf'. + Return the size of the decompressed data in '*outlenp'. + In case of error, or if 'outsize' is too small, return false and do not + modify '*outlenp'. */ bool bbdecompress( const uint8_t * const inbuf, const int insize, uint8_t * const outbuf, const int outsize, diff --git a/carg_parser.c b/carg_parser.c index d0c05d5..181ba23 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2021 Antonio Diaz Diaz. + Copyright (C) 2006-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -32,10 +32,10 @@ static void * ap_resize_buffer( void * buf, const int min_size ) } -static char push_back_record( struct Arg_parser * const ap, - const int code, const char * const argument ) +static char push_back_record( struct Arg_parser * const ap, const int code, + const char * const long_name, + const char * const argument ) { - const int len = strlen( argument ); struct ap_Record * p; void * tmp = ap_resize_buffer( ap->data, ( ap->data_size + 1 ) * sizeof (struct ap_Record) ); @@ -43,11 +43,29 @@ static char push_back_record( struct Arg_parser * const ap, ap->data = (struct ap_Record *)tmp; p = &(ap->data[ap->data_size]); p->code = code; - p->argument = 0; - tmp = ap_resize_buffer( p->argument, len + 1 ); - if( !tmp ) return 0; - p->argument = (char *)tmp; - strncpy( p->argument, argument, len + 1 ); + if( long_name ) + { + const int len = strlen( long_name ); + p->parsed_name = (char *)malloc( len + 2 + 1 ); + if( !p->parsed_name ) return 0; + p->parsed_name[0] = p->parsed_name[1] = '-'; + strncpy( p->parsed_name + 2, long_name, len + 1 ); + } + else if( code > 0 && code < 256 ) + { + p->parsed_name = (char *)malloc( 2 + 1 ); + if( !p->parsed_name ) return 0; + p->parsed_name[0] = '-'; p->parsed_name[1] = code; p->parsed_name[2] = 0; + } + else p->parsed_name = 0; + if( argument ) + { + const int len = strlen( argument ); + p->argument = (char *)malloc( len + 1 ); + if( !p->argument ) { free( p->parsed_name ); return 0; } + strncpy( p->argument, argument, len + 1 ); + } + else p->argument = 0; ++ap->data_size; return 1; } @@ -68,12 +86,14 @@ static char add_error( struct Arg_parser * const ap, const char * const msg ) static void free_data( struct Arg_parser * const ap ) { int i; - for( i = 0; i < ap->data_size; ++i ) free( ap->data[i].argument ); + for( i = 0; i < ap->data_size; ++i ) + { free( ap->data[i].argument ); free( ap->data[i].parsed_name ); } if( ap->data ) { free( ap->data ); ap->data = 0; } ap->data_size = 0; } +/* Return 0 only if out of memory. */ static char parse_long_option( struct Arg_parser * const ap, const char * const opt, const char * const arg, const struct ap_Option options[], @@ -87,9 +107,10 @@ static char parse_long_option( struct Arg_parser * const ap, /* Test all long options for either exact match or abbreviated matches. */ for( i = 0; options[i].code != 0; ++i ) - if( options[i].name && strncmp( options[i].name, &opt[2], len ) == 0 ) + if( options[i].long_name && + strncmp( options[i].long_name, &opt[2], len ) == 0 ) { - if( strlen( options[i].name ) == len ) /* Exact match found */ + if( strlen( options[i].long_name ) == len ) /* Exact match found */ { index = i; exact = 1; break; } else if( index < 0 ) index = i; /* First nonexact match found */ else if( options[index].code != options[i].code || @@ -117,35 +138,39 @@ static char parse_long_option( struct Arg_parser * const ap, { if( options[index].has_arg == ap_no ) { - add_error( ap, "option '--" ); add_error( ap, options[index].name ); + add_error( ap, "option '--" ); add_error( ap, options[index].long_name ); add_error( ap, "' doesn't allow an argument" ); return 1; } if( options[index].has_arg == ap_yes && !opt[len+3] ) { - add_error( ap, "option '--" ); add_error( ap, options[index].name ); + add_error( ap, "option '--" ); add_error( ap, options[index].long_name ); add_error( ap, "' requires an argument" ); return 1; } - return push_back_record( ap, options[index].code, &opt[len+3] ); + return push_back_record( ap, options[index].code, + options[index].long_name, &opt[len+3] ); } if( options[index].has_arg == ap_yes ) { if( !arg || !arg[0] ) { - add_error( ap, "option '--" ); add_error( ap, options[index].name ); + add_error( ap, "option '--" ); add_error( ap, options[index].long_name ); add_error( ap, "' requires an argument" ); return 1; } ++*argindp; - return push_back_record( ap, options[index].code, arg ); + return push_back_record( ap, options[index].code, + options[index].long_name, arg ); } - return push_back_record( ap, options[index].code, "" ); + return push_back_record( ap, options[index].code, + options[index].long_name, 0 ); } +/* Return 0 only if out of memory. */ static char parse_short_option( struct Arg_parser * const ap, const char * const opt, const char * const arg, const struct ap_Option options[], @@ -156,13 +181,13 @@ static char parse_short_option( struct Arg_parser * const ap, while( cind > 0 ) { int index = -1, i; - const unsigned char code = opt[cind]; + const unsigned char c = opt[cind]; char code_str[2]; - code_str[0] = code; code_str[1] = 0; + code_str[0] = c; code_str[1] = 0; - if( code != 0 ) + if( c != 0 ) for( i = 0; options[i].code; ++i ) - if( code == options[i].code ) + if( c == options[i].code ) { index = i; break; } if( index < 0 ) @@ -176,7 +201,7 @@ static char parse_short_option( struct Arg_parser * const ap, if( options[index].has_arg != ap_no && cind > 0 && opt[cind] ) { - if( !push_back_record( ap, code, &opt[cind] ) ) return 0; + if( !push_back_record( ap, c, 0, &opt[cind] ) ) return 0; ++*argindp; cind = 0; } else if( options[index].has_arg == ap_yes ) @@ -188,9 +213,9 @@ static char parse_short_option( struct Arg_parser * const ap, return 1; } ++*argindp; cind = 0; - if( !push_back_record( ap, code, arg ) ) return 0; + if( !push_back_record( ap, c, 0, arg ) ) return 0; } - else if( !push_back_record( ap, code, "" ) ) return 0; + else if( !push_back_record( ap, c, 0, 0 ) ) return 0; } return 1; } @@ -203,7 +228,7 @@ char ap_init( struct Arg_parser * const ap, const char ** non_options = 0; /* skipped non-options */ int non_options_size = 0; /* number of skipped non-options */ int argind = 1; /* index in argv */ - int i; + char done = 0; /* false until success */ ap->data = 0; ap->error = 0; @@ -223,20 +248,20 @@ char ap_init( struct Arg_parser * const ap, if( ch2 == '-' ) { if( !argv[argind][2] ) { ++argind; break; } /* we found "--" */ - else if( !parse_long_option( ap, opt, arg, options, &argind ) ) return 0; + else if( !parse_long_option( ap, opt, arg, options, &argind ) ) goto out; } - else if( !parse_short_option( ap, opt, arg, options, &argind ) ) return 0; + else if( !parse_short_option( ap, opt, arg, options, &argind ) ) goto out; if( ap->error ) break; } else { if( in_order ) - { if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; } + { if( !push_back_record( ap, 0, 0, argv[argind++] ) ) goto out; } else { void * tmp = ap_resize_buffer( non_options, ( non_options_size + 1 ) * sizeof *non_options ); - if( !tmp ) return 0; + if( !tmp ) goto out; non_options = (const char **)tmp; non_options[non_options_size++] = argv[argind++]; } @@ -245,13 +270,15 @@ char ap_init( struct Arg_parser * const ap, if( ap->error ) free_data( ap ); else { + int i; for( i = 0; i < non_options_size; ++i ) - if( !push_back_record( ap, 0, non_options[i] ) ) return 0; + if( !push_back_record( ap, 0, 0, non_options[i] ) ) goto out; while( argind < argc ) - if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; + if( !push_back_record( ap, 0, 0, argv[argind++] ) ) goto out; } - if( non_options ) free( non_options ); - return 1; + done = 1; +out: if( non_options ) free( non_options ); + return done; } @@ -273,13 +300,20 @@ int ap_arguments( const struct Arg_parser * const ap ) int ap_code( const struct Arg_parser * const ap, const int i ) { - if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].code; - else return 0; + if( i < 0 || i >= ap_arguments( ap ) ) return 0; + return ap->data[i].code; + } + + +const char * ap_parsed_name( const struct Arg_parser * const ap, const int i ) + { + if( i < 0 || i >= ap_arguments( ap ) || !ap->data[i].parsed_name ) return ""; + return ap->data[i].parsed_name; } const char * ap_argument( const struct Arg_parser * const ap, const int i ) { - if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].argument; - else return ""; + if( i < 0 || i >= ap_arguments( ap ) || !ap->data[i].argument ) return ""; + return ap->data[i].argument; } diff --git a/carg_parser.h b/carg_parser.h index c5f2352..0c64861 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2021 Antonio Diaz Diaz. + Copyright (C) 2006-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -24,9 +24,9 @@ message. 'options' is an array of 'struct ap_Option' terminated by an element - containing a code which is zero. A null name means a short-only - option. A code value outside the unsigned char range means a - long-only option. + containing a code which is zero. A null long_name means a short-only + option. A code value outside the unsigned char range means a long-only + option. Arg_parser normally makes it appear as if all the option arguments were specified before all the non-option arguments for the purposes @@ -50,7 +50,7 @@ enum ap_Has_arg { ap_no, ap_yes, ap_maybe }; struct ap_Option { int code; /* Short option letter or code ( code != 0 ) */ - const char * name; /* Long option name (maybe null) */ + const char * long_name; /* Long option name (maybe null) */ enum ap_Has_arg has_arg; }; @@ -58,6 +58,7 @@ struct ap_Option struct ap_Record { int code; + char * parsed_name; char * argument; }; @@ -86,6 +87,9 @@ int ap_arguments( const struct Arg_parser * const ap ); Else ap_argument( i ) is the option's argument (or empty). */ int ap_code( const struct Arg_parser * const ap, const int i ); +/* Full name of the option parsed (short or long). */ +const char * ap_parsed_name( const struct Arg_parser * const ap, const int i ); + const char * ap_argument( const struct Arg_parser * const ap, const int i ); #ifdef __cplusplus @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -76,9 +76,9 @@ static bool Cb_unread_data( struct Circular_buffer * const cb, } -/* Copies up to 'out_size' bytes to 'out_buffer' and updates 'get'. +/* Copy up to 'out_size' bytes to 'out_buffer' and update 'get'. If 'out_buffer' is null, the bytes are discarded. - Returns the number of bytes copied or discarded. + Return the number of bytes copied or discarded. */ static unsigned Cb_read_data( struct Circular_buffer * const cb, uint8_t * const out_buffer, @@ -110,8 +110,8 @@ static unsigned Cb_read_data( struct Circular_buffer * const cb, } -/* Copies up to 'in_size' bytes from 'in_buffer' and updates 'put'. - Returns the number of bytes copied. +/* Copy up to 'in_size' bytes from 'in_buffer' and update 'put'. + Return the number of bytes copied. */ static unsigned Cb_write_data( struct Circular_buffer * const cb, const uint8_t * const in_buffer, @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lzlib - Compression library for the lzip format -# Copyright (C) 2009-2021 Antonio Diaz Diaz. +# Copyright (C) 2009-2022 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute, and modify it. pkgname=lzlib -pkgversion=1.12 +pkgversion=1.13 soversion=1 progname=minilzip progname_static=${progname} @@ -29,9 +29,11 @@ infodir='$(datarootdir)/info' libdir='$(exec_prefix)/lib' mandir='$(datarootdir)/man' CC=gcc +AR=ar CPPFLAGS= CFLAGS='-Wall -W -O2' LDFLAGS= +ARFLAGS=-rcs # checking whether we are using GNU C. /bin/sh -c "${CC} --version" > /dev/null 2>&1 || { CC=cc ; CFLAGS=-O2 ; } @@ -79,10 +81,12 @@ while [ $# != 0 ] ; do echo " --enable-shared build also a shared library [disable]" echo " --disable-ldconfig don't run ldconfig after install" echo " CC=COMPILER C compiler to use [${CC}]" + echo " AR=ARCHIVER library archiver to use [${AR}]" echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]" echo " CFLAGS=OPTIONS command line options for the C compiler [${CFLAGS}]" echo " CFLAGS+=OPTIONS append options to the current value of CFLAGS" echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]" + echo " ARFLAGS=OPTIONS command line options for the library archiver [${ARFLAGS}]" echo exit 0 ;; --version | -V) @@ -118,10 +122,12 @@ while [ $# != 0 ] ; do --disable-ldconfig) disable_ldconfig=yes ;; CC=*) CC=${optarg} ;; + AR=*) AR=${optarg} ;; CPPFLAGS=*) CPPFLAGS=${optarg} ;; CFLAGS=*) CFLAGS=${optarg} ;; CFLAGS+=*) CFLAGS="${CFLAGS} ${optarg}" ;; LDFLAGS=*) LDFLAGS=${optarg} ;; + ARFLAGS=*) ARFLAGS=${optarg} ;; --*) echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; @@ -189,13 +195,15 @@ echo "infodir = ${infodir}" echo "libdir = ${libdir}" echo "mandir = ${mandir}" echo "CC = ${CC}" +echo "AR = ${AR}" echo "CPPFLAGS = ${CPPFLAGS}" echo "CFLAGS = ${CFLAGS}" echo "LDFLAGS = ${LDFLAGS}" +echo "ARFLAGS = ${ARFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lzlib - Compression library for the lzip format -# Copyright (C) 2009-2021 Antonio Diaz Diaz. +# Copyright (C) 2009-2022 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission @@ -220,9 +228,11 @@ infodir = ${infodir} libdir = ${libdir} mandir = ${mandir} CC = ${CC} +AR = ${AR} CPPFLAGS = ${CPPFLAGS} CFLAGS = ${CFLAGS} LDFLAGS = ${LDFLAGS} +ARFLAGS = ${ARFLAGS} EOF cat "${srcdir}/Makefile.in" >> Makefile @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -49,8 +49,6 @@ static int LZd_decode_member( struct LZ_decoder * const d ) while( !Rd_finished( rdec ) ) { - int len; - const int pos_state = LZd_data_position( d ) & pos_state_mask; /* const unsigned mpos = rdec->member_position; if( mpos - old_mpos > rd_min_available_bytes ) return 5; old_mpos = mpos; */ @@ -58,23 +56,19 @@ static int LZd_decode_member( struct LZ_decoder * const d ) { if( !rdec->at_stream_end ) return 0; if( Cb_empty( &rdec->cb ) ) break; } /* decode until EOF */ if( !LZd_enough_free_bytes( d ) ) return 0; + const int pos_state = LZd_data_position( d ) & pos_state_mask; if( Rd_decode_bit( rdec, &d->bm_match[*state][pos_state] ) == 0 ) /* 1st bit */ { /* literal byte */ Bit_model * const bm = d->bm_literal[get_lit_state(LZd_peek_prev( d ))]; - if( St_is_char( *state ) ) - { - *state -= ( *state < 4 ) ? *state : 3; + if( ( *state = St_set_char( *state ) ) < 4 ) LZd_put_byte( d, Rd_decode_tree8( rdec, bm ) ); - } else - { - *state -= ( *state < 10 ) ? 3 : 6; LZd_put_byte( d, Rd_decode_matched( rdec, bm, LZd_peek( d, d->rep0 ) ) ); - } continue; } /* match or repeated match */ + int len; if( Rd_decode_bit( rdec, &d->bm_rep[*state] ) != 0 ) /* 2nd bit */ { if( Rd_decode_bit( rdec, &d->bm_rep0[*state] ) == 0 ) /* 3rd bit */ @@ -100,13 +94,12 @@ static int LZd_decode_member( struct LZ_decoder * const d ) d->rep0 = distance; } *state = St_set_rep( *state ); - len = min_match_len + Rd_decode_len( rdec, &d->rep_len_model, pos_state ); + len = Rd_decode_len( rdec, &d->rep_len_model, pos_state ); } else /* match */ { - unsigned distance; - len = min_match_len + Rd_decode_len( rdec, &d->match_len_model, pos_state ); - distance = Rd_decode_tree6( rdec, d->bm_dis_slot[get_len_state(len)] ); + len = Rd_decode_len( rdec, &d->match_len_model, pos_state ); + unsigned distance = Rd_decode_tree6( rdec, d->bm_dis_slot[get_len_state(len)] ); if( distance >= start_dis_model ) { const unsigned dis_slot = distance; @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -72,8 +72,8 @@ static inline void Rd_reset( struct Range_decoder * const rdec ) rdec->member_position = 0; rdec->at_stream_end = false; } -/* Seeks a member header and updates 'get'. '*skippedp' is set to the - number of bytes skipped. Returns true if it finds a valid header. +/* Seek for a member header and update 'get'. Set '*skippedp' to the number + of bytes skipped. Return true if a valid header is found. */ static bool Rd_find_header( struct Range_decoder * const rdec, unsigned * const skippedp ) @@ -140,8 +140,7 @@ static bool Rd_try_reload( struct Range_decoder * const rdec ) int i; rdec->reload_pending = false; rdec->code = 0; - for( i = 0; i < 5; ++i ) - rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); + for( i = 0; i < 5; ++i ) rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); rdec->range = 0xFFFFFFFFU; rdec->code &= rdec->range; /* make sure that first byte is discarded */ } @@ -161,12 +160,11 @@ static inline unsigned Rd_decode( struct Range_decoder * const rdec, int i; for( i = num_bits; i > 0; --i ) { - bool bit; Rd_normalize( rdec ); rdec->range >>= 1; /* symbol <<= 1; */ /* if( rdec->code >= rdec->range ) { rdec->code -= rdec->range; symbol |= 1; } */ - bit = ( rdec->code >= rdec->range ); + const bool bit = ( rdec->code >= rdec->range ); symbol <<= 1; symbol += bit; rdec->code -= rdec->range & ( 0U - bit ); } @@ -176,42 +174,75 @@ static inline unsigned Rd_decode( struct Range_decoder * const rdec, static inline unsigned Rd_decode_bit( struct Range_decoder * const rdec, Bit_model * const probability ) { - uint32_t bound; Rd_normalize( rdec ); - bound = ( rdec->range >> bit_model_total_bits ) * *probability; + const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability; if( rdec->code < bound ) { - *probability += (bit_model_total - *probability) >> bit_model_move_bits; rdec->range = bound; + *probability += ( bit_model_total - *probability ) >> bit_model_move_bits; return 0; } else { - *probability -= *probability >> bit_model_move_bits; rdec->code -= bound; rdec->range -= bound; + *probability -= *probability >> bit_model_move_bits; return 1; } } -static inline unsigned Rd_decode_tree3( struct Range_decoder * const rdec, - Bit_model bm[] ) +static inline void Rd_decode_symbol_bit( struct Range_decoder * const rdec, + Bit_model * const probability, unsigned * symbol ) { - unsigned symbol = 2 | Rd_decode_bit( rdec, &bm[1] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - return symbol & 7; + Rd_normalize( rdec ); + *symbol <<= 1; + const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability; + if( rdec->code < bound ) + { + rdec->range = bound; + *probability += ( bit_model_total - *probability ) >> bit_model_move_bits; + } + else + { + rdec->code -= bound; + rdec->range -= bound; + *probability -= *probability >> bit_model_move_bits; + *symbol |= 1; + } + } + +static inline void Rd_decode_symbol_bit_reversed( struct Range_decoder * const rdec, + Bit_model * const probability, unsigned * model, + unsigned * symbol, const int i ) + { + Rd_normalize( rdec ); + *model <<= 1; + const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability; + if( rdec->code < bound ) + { + rdec->range = bound; + *probability += ( bit_model_total - *probability ) >> bit_model_move_bits; + } + else + { + rdec->code -= bound; + rdec->range -= bound; + *probability -= *probability >> bit_model_move_bits; + *model |= 1; + *symbol |= 1 << i; + } } static inline unsigned Rd_decode_tree6( struct Range_decoder * const rdec, Bit_model bm[] ) { - unsigned symbol = 2 | Rd_decode_bit( rdec, &bm[1] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + unsigned symbol = 1; + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); return symbol & 0x3F; } @@ -219,9 +250,14 @@ static inline unsigned Rd_decode_tree8( struct Range_decoder * const rdec, Bit_model bm[] ) { unsigned symbol = 1; - int i; - for( i = 0; i < 8; ++i ) - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); return symbol & 0xFF; } @@ -233,21 +269,19 @@ Rd_decode_tree_reversed( struct Range_decoder * const rdec, unsigned symbol = 0; int i; for( i = 0; i < num_bits; ++i ) - { - const unsigned bit = Rd_decode_bit( rdec, &bm[model] ); - model <<= 1; model += bit; - symbol |= ( bit << i ); - } + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, i ); return symbol; } static inline unsigned Rd_decode_tree_reversed4( struct Range_decoder * const rdec, Bit_model bm[] ) { - unsigned symbol = Rd_decode_bit( rdec, &bm[1] ); - symbol += Rd_decode_bit( rdec, &bm[2+symbol] ) << 1; - symbol += Rd_decode_bit( rdec, &bm[4+symbol] ) << 2; - symbol += Rd_decode_bit( rdec, &bm[8+symbol] ) << 3; + unsigned model = 1; + unsigned symbol = 0; + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 0 ); + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 1 ); + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 2 ); + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 3 ); return symbol; } @@ -270,11 +304,24 @@ static inline unsigned Rd_decode_len( struct Range_decoder * const rdec, struct Len_model * const lm, const int pos_state ) { + Bit_model * bm; + unsigned mask, offset, symbol = 1; + if( Rd_decode_bit( rdec, &lm->choice1 ) == 0 ) - return Rd_decode_tree3( rdec, lm->bm_low[pos_state] ); + { bm = lm->bm_low[pos_state]; mask = 7; offset = 0; goto len3; } if( Rd_decode_bit( rdec, &lm->choice2 ) == 0 ) - return len_low_symbols + Rd_decode_tree3( rdec, lm->bm_mid[pos_state] ); - return len_low_symbols + len_mid_symbols + Rd_decode_tree8( rdec, lm->bm_high ); + { bm = lm->bm_mid[pos_state]; mask = 7; offset = len_low_symbols; goto len3; } + bm = lm->bm_high; mask = 0xFF; offset = len_low_symbols + len_mid_symbols; + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); +len3: + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + return ( symbol & mask ) + min_match_len + offset; } diff --git a/doc/lzlib.info b/doc/lzlib.info index bef1859..d81bc88 100644 --- a/doc/lzlib.info +++ b/doc/lzlib.info @@ -1,6 +1,6 @@ This is lzlib.info, produced by makeinfo version 4.13+ from lzlib.texi. -INFO-DIR-SECTION Data Compression +INFO-DIR-SECTION Compression START-INFO-DIR-ENTRY * Lzlib: (lzlib). Compression library for the lzip format END-INFO-DIR-ENTRY @@ -11,7 +11,7 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir) Lzlib Manual ************ -This manual is for Lzlib (version 1.12, 2 January 2021). +This manual is for Lzlib (version 1.13, 23 January 2022). * Menu: @@ -30,7 +30,7 @@ This manual is for Lzlib (version 1.12, 2 January 2021). * Concept index:: Index of concepts - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute, and modify it. @@ -73,8 +73,12 @@ byte near the beginning is a thing of the past. The functions and variables forming the interface of the compression library are declared in the file 'lzlib.h'. Usage examples of the library -are given in the files 'bbexample.c', 'ffexample.c', and 'main.c' from the -source distribution. +are given in the files 'bbexample.c', 'ffexample.c', and 'minilzip.c' from +the source distribution. + + All the library functions are thread safe. The library does not install +any signal handler. The decoder checks the consistency of the compressed +data, so the library should never crash even in case of corrupted input. Compression/decompression is done by repeatedly calling a couple of read/write functions until all the data have been processed by the library. @@ -102,20 +106,16 @@ concatenated compressed data streams is also supported. automatically creating multimember output. The members so created are large, about 2 PiB each. - All the library functions are thread safe. The library does not install -any signal handler. The decoder checks the consistency of the compressed -data, so the library should never crash even in case of corrupted input. - In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a concrete algorithm; it is more like "any algorithm using the LZMA coding scheme". For example, the option '-0' of lzip uses the scheme in almost the simplest way possible; issuing the longest match it can find, or a literal byte if it can't find a match. Inversely, a much more elaborated way of -finding coding sequences of minimum size than the one currently used by -lzip could be developed, and the resulting sequence could also be coded -using the LZMA coding scheme. +finding coding sequences of minimum size than the one currently used by lzip +could be developed, and the resulting sequence could also be coded using the +LZMA coding scheme. - Lzlib currently implements two variants of the LZMA algorithm; fast + Lzlib currently implements two variants of the LZMA algorithm: fast (used by option '-0' of minilzip) and normal (used by all other compression levels). @@ -145,7 +145,8 @@ One goal of lzlib is to keep perfect backward compatibility with older versions of itself down to 1.0. Any application working with an older lzlib should work with a newer lzlib. Installing a newer lzlib should not break anything. This chapter describes the constants and functions that the -application can use to discover the version of the library being used. +application can use to discover the version of the library being used. All +of them are declared in 'lzlib.h'. -- Constant: LZ_API_VERSION This constant is defined in 'lzlib.h' and works as a version test @@ -325,13 +326,13 @@ except 'LZ_compress_open' whose return value must be verified by calling 'LZ_compress_sync_flush'. Then call 'LZ_compress_read' until it returns 0. - This function writes a LZMA marker '3' ("Sync Flush" marker) to the - compressed output. Note that the sync flush marker is not allowed in - lzip files; it is a device for interactive communication between - applications using lzlib, but is useless and wasteful in a file, and - is excluded from the media type 'application/lzip'. The LZMA marker - '2' ("End Of Stream" marker) is the only marker allowed in lzip files. - *Note Data format::. + This function writes at least one LZMA marker '3' ("Sync Flush" marker) + to the compressed output. Note that the sync flush marker is not + allowed in lzip files; it is a device for interactive communication + between applications using lzlib, but is useless and wasteful in a + file, and is excluded from the media type 'application/lzip'. The LZMA + marker '2' ("End Of Stream" marker) is the only marker allowed in lzip + files. *Note Data format::. Repeated use of 'LZ_compress_sync_flush' may degrade compression ratio, so use it only when needed. If the interval between calls to @@ -347,34 +348,30 @@ except 'LZ_compress_open' whose return value must be verified by calling -- Function: int LZ_compress_read ( struct LZ_Encoder * const ENCODER, uint8_t * const BUFFER, const int SIZE ) - The function 'LZ_compress_read' reads up to SIZE bytes from the stream - pointed to by ENCODER, storing the results in BUFFER. If - LZ_API_VERSION >= 1012, BUFFER may be a null pointer, in which case - the bytes read are discarded. - - The return value is the number of bytes actually read. This might be - less than SIZE; for example, if there aren't that many bytes left in - the stream or if more bytes have to be yet written with the function + Reads up to SIZE bytes from the stream pointed to by ENCODER, storing + the results in BUFFER. If LZ_API_VERSION >= 1012, BUFFER may be a null + pointer, in which case the bytes read are discarded. + + Returns the number of bytes actually read. This might be less than + SIZE; for example, if there aren't that many bytes left in the stream + or if more bytes have to be yet written with the function 'LZ_compress_write'. Note that reading less than SIZE bytes is not an error. -- Function: int LZ_compress_write ( struct LZ_Encoder * const ENCODER, uint8_t * const BUFFER, const int SIZE ) - The function 'LZ_compress_write' writes up to SIZE bytes from BUFFER - to the stream pointed to by ENCODER. - - The return value is the number of bytes actually written. This might be + Writes up to SIZE bytes from BUFFER to the stream pointed to by + ENCODER. Returns the number of bytes actually written. This might be less than SIZE. Note that writing less than SIZE bytes is not an error. -- Function: int LZ_compress_write_size ( struct LZ_Encoder * const ENCODER ) - The function 'LZ_compress_write_size' returns the maximum number of - bytes that can be immediately written through 'LZ_compress_write'. For - efficiency reasons, once the input buffer is full and - 'LZ_compress_write_size' returns 0, almost all the buffer must be - compressed before a size greater than 0 is returned again. (This is - done to minimize the amount of data that must be copied to the - beginning of the buffer before new data can be accepted). + Returns the maximum number of bytes that can be immediately written + through 'LZ_compress_write'. For efficiency reasons, once the input + buffer is full and 'LZ_compress_write_size' returns 0, almost all the + buffer must be compressed before a size greater than 0 is returned + again. (This is done to minimize the amount of data that must be + copied to the beginning of the buffer before new data can be accepted). It is guaranteed that an immediate call to 'LZ_compress_write' will accept a SIZE up to the returned number of bytes. @@ -472,14 +469,13 @@ except 'LZ_decompress_open' whose return value must be verified by calling -- Function: int LZ_decompress_read ( struct LZ_Decoder * const DECODER, uint8_t * const BUFFER, const int SIZE ) - The function 'LZ_decompress_read' reads up to SIZE bytes from the - stream pointed to by DECODER, storing the results in BUFFER. If - LZ_API_VERSION >= 1012, BUFFER may be a null pointer, in which case - the bytes read are discarded. - - The return value is the number of bytes actually read. This might be - less than SIZE; for example, if there aren't that many bytes left in - the stream or if more bytes have to be yet written with the function + Reads up to SIZE bytes from the stream pointed to by DECODER, storing + the results in BUFFER. If LZ_API_VERSION >= 1012, BUFFER may be a null + pointer, in which case the bytes read are discarded. + + Returns the number of bytes actually read. This might be less than + SIZE; for example, if there aren't that many bytes left in the stream + or if more bytes have to be yet written with the function 'LZ_decompress_write'. Note that reading less than SIZE bytes is not an error. @@ -499,18 +495,16 @@ except 'LZ_decompress_open' whose return value must be verified by calling -- Function: int LZ_decompress_write ( struct LZ_Decoder * const DECODER, uint8_t * const BUFFER, const int SIZE ) - The function 'LZ_decompress_write' writes up to SIZE bytes from BUFFER - to the stream pointed to by DECODER. - - The return value is the number of bytes actually written. This might be + Writes up to SIZE bytes from BUFFER to the stream pointed to by + DECODER. Returns the number of bytes actually written. This might be less than SIZE. Note that writing less than SIZE bytes is not an error. -- Function: int LZ_decompress_write_size ( struct LZ_Decoder * const DECODER ) - The function 'LZ_decompress_write_size' returns the maximum number of - bytes that can be immediately written through 'LZ_decompress_write'. - This number varies smoothly; each compressed byte consumed may be - overwritten immediately, increasing by 1 the value returned. + Returns the maximum number of bytes that can be immediately written + through 'LZ_decompress_write'. This number varies smoothly; each + compressed byte consumed may be overwritten immediately, increasing by + 1 the value returned. It is guaranteed that an immediate call to 'LZ_decompress_write' will accept a SIZE up to the returned number of bytes. @@ -530,24 +524,24 @@ except 'LZ_decompress_open' whose return value must be verified by calling -- Function: int LZ_decompress_member_finished ( struct LZ_Decoder * const DECODER ) Returns 1 if the previous call to 'LZ_decompress_read' finished reading - the current member, indicating that final values for member are + the current member, indicating that final values for the member are available through 'LZ_decompress_data_crc', 'LZ_decompress_data_position', and 'LZ_decompress_member_position'. Otherwise it returns 0. -- Function: int LZ_decompress_member_version ( struct LZ_Decoder * const DECODER ) - Returns the version of current member from member header. + Returns the version of the current member, read from the member header. -- Function: int LZ_decompress_dictionary_size ( struct LZ_Decoder * const DECODER ) - Returns the dictionary size of the current member, read from the member - header. + Returns the dictionary size of the current member, read from the + member header. -- Function: unsigned LZ_decompress_data_crc ( struct LZ_Decoder * const DECODER ) Returns the 32 bit Cyclic Redundancy Check of the data decompressed - from the current member. The returned value is valid only when + from the current member. The value returned is valid only when 'LZ_decompress_member_finished' returns 1. -- Function: unsigned long long LZ_decompress_data_position ( struct @@ -650,13 +644,14 @@ compatible with lzip 1.4 or newer. Lzip is a lossless data compressor with a user interface similar to the one of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov -chain-Algorithm' (LZMA) stream format, chosen to maximize safety and -interoperability. Lzip can compress about as fast as gzip (lzip -0) or -compress most files more than bzip2 (lzip -9). Decompression speed is -intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 -from a data recovery perspective. Lzip has been designed, written, and -tested with great care to replace gzip and bzip2 as the standard -general-purpose compressed format for unix-like systems. +chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity +checking to maximize interoperability and optimize safety. Lzip can compress +about as fast as gzip (lzip -0) or compress most files more than bzip2 +(lzip -9). Decompression speed is intermediate between gzip and bzip2. Lzip +is better than gzip and bzip2 from a data recovery perspective. Lzip has +been designed, written, and tested with great care to replace gzip and +bzip2 as the standard general-purpose compressed format for unix-like +systems. The format for running minilzip is: @@ -705,10 +700,13 @@ once, the first time it appears in the command line. '-d' '--decompress' - Decompress the files specified. If a file does not exist or can't be - opened, minilzip continues decompressing the rest of the files. If a - file fails to decompress, or is a terminal, minilzip exits immediately - without decompressing the rest of the files. + Decompress the files specified. If a file does not exist, can't be + opened, or the destination file already exists and '--force' has not + been specified, minilzip continues decompressing the rest of the files + and exits with error status 1. If a file fails to decompress, or is a + terminal, minilzip exits immediately with error status 2 without + decompressing the rest of the files. A terminal is considered an + uncompressed file, and therefore invalid. '-f' '--force' @@ -831,12 +829,14 @@ once, the first time it appears in the command line. '--check-lib' Compare the version of lzlib used to compile minilzip with the version - actually being used and exit. Report any differences found. Exit with - error status 1 if differences are found. A mismatch may indicate that - lzlib is not correctly installed or that a different version of lzlib - has been installed after compiling the shared version of minilzip. - 'minilzip -v --check-lib' shows the version of lzlib being used and - the value of 'LZ_API_VERSION' (if defined). *Note Library version::. + actually being used at run time and exit. Report any differences + found. Exit with error status 1 if differences are found. A mismatch + may indicate that lzlib is not correctly installed or that a different + version of lzlib has been installed after compiling the shared version + of minilzip. Exit with error status 2 if LZ_API_VERSION and + LZ_version_string don't match. 'minilzip -v --check-lib' shows the + version of lzlib being used and the value of LZ_API_VERSION (if + defined). *Note Library version::. Numbers given as arguments to options may be followed by a multiplier @@ -857,7 +857,7 @@ Y yottabyte (10^24) | Yi yobibyte (2^80) Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid -input file, 3 for an internal consistency error (eg, bug) which caused +input file, 3 for an internal consistency error (e.g., bug) which caused minilzip to panic. @@ -886,9 +886,11 @@ when there is no longer anything to take away. represents a variable number of bytes. - A lzip data stream consists of a series of "members" (compressed data + Lzip data consist of a series of independent "members" (compressed data sets). The members simply appear one after another in the data stream, with -no additional information before, between, or after them. +no additional information before, between, or after them. Each member can +encode in compressed form up to 16 EiB - 1 byte of uncompressed data. The +size of a multimember data stream is unlimited. Each member has the following structure: @@ -916,7 +918,7 @@ no additional information before, between, or after them. Valid values for dictionary size range from 4 KiB to 512 MiB. 'LZMA stream' - The LZMA stream, finished by an end of stream marker. Uses default + The LZMA stream, finished by an "End Of Stream" marker. Uses default values for encoder properties. *Note Stream format: (lzip)Stream format, for a complete description. Lzip only uses the LZMA marker '2' ("End Of Stream" marker). Lzlib @@ -924,16 +926,17 @@ no additional information before, between, or after them. sync_flush::. 'CRC32 (4 bytes)' - Cyclic Redundancy Check (CRC) of the uncompressed original data. + Cyclic Redundancy Check (CRC) of the original uncompressed data. 'Data size (8 bytes)' - Size of the uncompressed original data. + Size of the original uncompressed data. 'Member size (8 bytes)' Total size of the member, including header and trailer. This field acts as a distributed index, allows the verification of stream integrity, - and facilitates safe recovery of undamaged members from multimember - files. + and facilitates the safe recovery of undamaged members from + multimember files. Member size should be limited to 2 PiB to prevent + the data size field from overflowing. @@ -967,10 +970,10 @@ File: lzlib.info, Node: Buffer compression, Next: Buffer decompression, Up: E Buffer-to-buffer single-member compression (MEMBER_SIZE > total output). -/* Compresses 'insize' bytes from 'inbuf' to 'outbuf'. - Returns the size of the compressed data in '*outlenp'. - In case of error, or if 'outsize' is too small, returns false and does - not modify '*outlenp'. +/* Compress 'insize' bytes from 'inbuf' to 'outbuf'. + Return the size of the compressed data in '*outlenp'. + In case of error, or if 'outsize' is too small, return false and do not + modify '*outlenp'. */ bool bbcompress( const uint8_t * const inbuf, const int insize, const int dictionary_size, const int match_len_limit, @@ -1011,10 +1014,10 @@ File: lzlib.info, Node: Buffer decompression, Next: File compression, Prev: B Buffer-to-buffer decompression. -/* Decompresses 'insize' bytes from 'inbuf' to 'outbuf'. - Returns the size of the decompressed data in '*outlenp'. - In case of error, or if 'outsize' is too small, returns false and does - not modify '*outlenp'. +/* Decompress 'insize' bytes from 'inbuf' to 'outbuf'. + Return the size of the decompressed data in '*outlenp'. + In case of error, or if 'outsize' is too small, return false and do not + modify '*outlenp'. */ bool bbdecompress( const uint8_t * const inbuf, const int insize, uint8_t * const outbuf, const int outsize, @@ -1159,9 +1162,9 @@ int ffmmcompress( FILE * const infile, FILE * const outfile ) Example 2: Multimember compression (user-restarted members). (Call LZ_compress_open with MEMBER_SIZE > largest member). -/* Compresses 'infile' to 'outfile' as a multimember stream with one member +/* Compress 'infile' to 'outfile' as a multimember stream with one member for each line of text terminated by a newline character or by EOF. - Returns 0 if success, 1 if error. + Return 0 if success, 1 if error. */ int fflfcompress( struct LZ_Encoder * const encoder, FILE * const infile, FILE * const outfile ) @@ -1205,7 +1208,7 @@ File: lzlib.info, Node: Skipping data errors, Prev: File compression mm, Up: 11.6 Skipping data errors ========================= -/* Decompresses 'infile' to 'outfile' with automatic resynchronization to +/* Decompress 'infile' to 'outfile' with automatic resynchronization to next member in case of data error, including the automatic removal of leading garbage. */ @@ -1253,7 +1256,7 @@ eternity, if not longer. If you find a bug in lzlib, please send electronic mail to <lzip-bug@nongnu.org>. Include the version number, which you can find by -running 'minilzip --version' or in 'LZ_version_string' from 'lzlib.h'. +running 'minilzip --version' and 'minilzip -v --check-lib'. File: lzlib.info, Node: Concept index, Prev: Problems, Up: Top @@ -1288,29 +1291,29 @@ Concept index Tag Table: -Node: Top220 -Node: Introduction1342 +Node: Top215 +Node: Introduction1338 Node: Library version6413 -Node: Buffering8918 -Node: Parameter limits10143 -Node: Compression functions11097 -Ref: member_size12907 -Ref: sync_flush14673 -Node: Decompression functions19493 -Node: Error codes27187 -Node: Error messages29478 -Node: Invoking minilzip30057 -Node: Data format39651 -Ref: coded-dict-size40957 -Node: Examples42267 -Node: Buffer compression43228 -Node: Buffer decompression44754 -Node: File compression46174 -Node: File decompression47157 -Node: File compression mm48161 -Node: Skipping data errors51193 -Node: Problems52505 -Node: Concept index53077 +Node: Buffering8957 +Node: Parameter limits10182 +Node: Compression functions11136 +Ref: member_size12946 +Ref: sync_flush14712 +Node: Decompression functions19400 +Node: Error codes26968 +Node: Error messages29259 +Node: Invoking minilzip29838 +Node: Data format39786 +Ref: coded-dict-size41232 +Node: Examples42641 +Node: Buffer compression43602 +Node: Buffer decompression45122 +Node: File compression46536 +Node: File decompression47519 +Node: File compression mm48523 +Node: Skipping data errors51552 +Node: Problems52862 +Node: Concept index53423 End Tag Table diff --git a/doc/lzlib.texi b/doc/lzlib.texi index 644a3d7..3caf9dd 100644 --- a/doc/lzlib.texi +++ b/doc/lzlib.texi @@ -6,10 +6,10 @@ @finalout @c %**end of header -@set UPDATED 2 January 2021 -@set VERSION 1.12 +@set UPDATED 23 January 2022 +@set VERSION 1.13 -@dircategory Data Compression +@dircategory Compression @direntry * Lzlib: (lzlib). Compression library for the lzip format @end direntry @@ -52,7 +52,7 @@ This manual is for Lzlib (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2009-2021 Antonio Diaz Diaz. +Copyright @copyright{} 2009-2022 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute, and modify it. @@ -77,9 +77,9 @@ taking into account both data integrity and decoder availability: The lzip format provides very safe integrity checking and some data recovery means. The program @uref{http://www.nongnu.org/lzip/manual/lziprecover_manual.html#Data-safety,,lziprecover} -can repair bit flip errors (one of the most common forms of data -corruption) in lzip files, and provides data recovery capabilities, -including error-checked merging of damaged copies of a file. +can repair bit flip errors (one of the most common forms of data corruption) +in lzip files, and provides data recovery capabilities, including +error-checked merging of damaged copies of a file. @ifnothtml @xref{Data safety,,,lziprecover}. @end ifnothtml @@ -89,8 +89,8 @@ The lzip format is as simple as possible (but not simpler). The lzip manual provides the source code of a simple decompressor along with a detailed explanation of how it works, so that with the only help of the lzip manual it would be possible for a digital archaeologist to extract -the data from a lzip file long after quantum computers eventually render -LZMA obsolete. +the data from a lzip file long after quantum computers eventually +render LZMA obsolete. @item Additionally the lzip reference implementation is copylefted, which @@ -104,8 +104,12 @@ the beginning is a thing of the past. The functions and variables forming the interface of the compression library are declared in the file @samp{lzlib.h}. Usage examples of the library are -given in the files @samp{bbexample.c}, @samp{ffexample.c}, and @samp{main.c} -from the source distribution. +given in the files @samp{bbexample.c}, @samp{ffexample.c}, and +@samp{minilzip.c} from the source distribution. + +All the library functions are thread safe. The library does not install any +signal handler. The decoder checks the consistency of the compressed data, +so the library should never crash even in case of corrupted input. Compression/decompression is done by repeatedly calling a couple of read/write functions until all the data have been processed by the library. @@ -134,22 +138,17 @@ Lzlib is able to compress and decompress streams of unlimited size by automatically creating multimember output. The members so created are large, about @w{2 PiB} each. -All the library functions are thread safe. The library does not install -any signal handler. The decoder checks the consistency of the compressed -data, so the library should never crash even in case of corrupted input. - In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a concrete algorithm; it is more like "any algorithm using the LZMA coding -scheme". For example, the option @samp{-0} of lzip uses the scheme in almost -the simplest way possible; issuing the longest match it can find, or a -literal byte if it can't find a match. Inversely, a much more elaborated way -of finding coding sequences of minimum size than the one currently used by -lzip could be developed, and the resulting sequence could also be coded -using the LZMA coding scheme. +scheme". For example, the option @samp{-0} of lzip uses the scheme in almost the +simplest way possible; issuing the longest match it can find, or a literal +byte if it can't find a match. Inversely, a much more elaborated way of +finding coding sequences of minimum size than the one currently used by lzip +could be developed, and the resulting sequence could also be coded using the +LZMA coding scheme. -Lzlib currently implements two variants of the LZMA algorithm; fast (used by -option @samp{-0} of minilzip) and normal (used by all other compression -levels). +Lzlib currently implements two variants of the LZMA algorithm: fast (used by +option @samp{-0} of minilzip) and normal (used by all other compression levels). The high compression of LZMA comes from combining two basic, well-proven compression ideas: sliding dictionaries (LZ77/78) and markov models (the @@ -176,7 +175,8 @@ One goal of lzlib is to keep perfect backward compatibility with older versions of itself down to 1.0. Any application working with an older lzlib should work with a newer lzlib. Installing a newer lzlib should not break anything. This chapter describes the constants and functions that the -application can use to discover the version of the library being used. +application can use to discover the version of the library being used. All +of them are declared in @samp{lzlib.h}. @defvr Constant LZ_API_VERSION This constant is defined in @samp{lzlib.h} and works as a version test @@ -372,12 +372,13 @@ already written with the function @samp{LZ_compress_write}. First call @samp{LZ_compress_sync_flush}. Then call @samp{LZ_compress_read} until it returns 0. -This function writes a LZMA marker @samp{3} ("Sync Flush" marker) to the -compressed output. Note that the sync flush marker is not allowed in lzip -files; it is a device for interactive communication between applications -using lzlib, but is useless and wasteful in a file, and is excluded from the -media type @samp{application/lzip}. The LZMA marker @samp{2} ("End Of -Stream" marker) is the only marker allowed in lzip files. @xref{Data format}. +This function writes at least one LZMA marker @samp{3} ("Sync Flush" marker) +to the compressed output. Note that the sync flush marker is not allowed in +lzip files; it is a device for interactive communication between +applications using lzlib, but is useless and wasteful in a file, and is +excluded from the media type @samp{application/lzip}. The LZMA marker +@samp{2} ("End Of Stream" marker) is the only marker allowed in lzip files. +@xref{Data format}. Repeated use of @samp{LZ_compress_sync_flush} may degrade compression ratio, so use it only when needed. If the interval between calls to @@ -394,36 +395,33 @@ are more bytes available than those needed to complete @var{member_size}, @deftypefun int LZ_compress_read ( struct LZ_Encoder * const @var{encoder}, uint8_t * const @var{buffer}, const int @var{size} ) -The function @samp{LZ_compress_read} reads up to @var{size} bytes from the -stream pointed to by @var{encoder}, storing the results in @var{buffer}. -If @w{LZ_API_VERSION >= 1012}, @var{buffer} may be a null pointer, in which -case the bytes read are discarded. - -The return value is the number of bytes actually read. This might be less -than @var{size}; for example, if there aren't that many bytes left in the -stream or if more bytes have to be yet written with the function +Reads up to @var{size} bytes from the stream pointed to by @var{encoder}, +storing the results in @var{buffer}. If @w{LZ_API_VERSION >= 1012}, +@var{buffer} may be a null pointer, in which case the bytes read are +discarded. + +Returns the number of bytes actually read. This might be less than +@var{size}; for example, if there aren't that many bytes left in the stream +or if more bytes have to be yet written with the function @samp{LZ_compress_write}. Note that reading less than @var{size} bytes is not an error. @end deftypefun @deftypefun int LZ_compress_write ( struct LZ_Encoder * const @var{encoder}, uint8_t * const @var{buffer}, const int @var{size} ) -The function @samp{LZ_compress_write} writes up to @var{size} bytes from -@var{buffer} to the stream pointed to by @var{encoder}. - -The return value is the number of bytes actually written. This might be -less than @var{size}. Note that writing less than @var{size} bytes is -not an error. +Writes up to @var{size} bytes from @var{buffer} to the stream pointed to by +@var{encoder}. Returns the number of bytes actually written. This might be +less than @var{size}. Note that writing less than @var{size} bytes is not an +error. @end deftypefun @deftypefun int LZ_compress_write_size ( struct LZ_Encoder * const @var{encoder} ) -The function @samp{LZ_compress_write_size} returns the maximum number of -bytes that can be immediately written through @samp{LZ_compress_write}. -For efficiency reasons, once the input buffer is full and -@samp{LZ_compress_write_size} returns 0, almost all the buffer must be -compressed before a size greater than 0 is returned again. (This is done to -minimize the amount of data that must be copied to the beginning of the +Returns the maximum number of bytes that can be immediately written through +@samp{LZ_compress_write}. For efficiency reasons, once the input buffer is +full and @samp{LZ_compress_write_size} returns 0, almost all the buffer must +be compressed before a size greater than 0 is returned again. (This is done +to minimize the amount of data that must be copied to the beginning of the buffer before new data can be accepted). It is guaranteed that an immediate call to @samp{LZ_compress_write} will @@ -478,10 +476,10 @@ perhaps not yet read. @chapter Decompression functions @cindex decompression functions -These are the functions used to decompress data. In case of error, all -of them return -1 or 0, for signed and unsigned return values -respectively, except @samp{LZ_decompress_open} whose return value must -be verified by calling @samp{LZ_decompress_errno} before using it. +These are the functions used to decompress data. In case of error, all of +them return -1 or 0, for signed and unsigned return values respectively, +except @samp{LZ_decompress_open} whose return value must be verified by +calling @samp{LZ_decompress_errno} before using it. @deftypefun {struct LZ_Decoder *} LZ_decompress_open ( void ) @@ -539,14 +537,14 @@ function does nothing. @deftypefun int LZ_decompress_read ( struct LZ_Decoder * const @var{decoder}, uint8_t * const @var{buffer}, const int @var{size} ) -The function @samp{LZ_decompress_read} reads up to @var{size} bytes from the -stream pointed to by @var{decoder}, storing the results in @var{buffer}. -If @w{LZ_API_VERSION >= 1012}, @var{buffer} may be a null pointer, in which -case the bytes read are discarded. - -The return value is the number of bytes actually read. This might be less -than @var{size}; for example, if there aren't that many bytes left in the -stream or if more bytes have to be yet written with the function +Reads up to @var{size} bytes from the stream pointed to by @var{decoder}, +storing the results in @var{buffer}. If @w{LZ_API_VERSION >= 1012}, +@var{buffer} may be a null pointer, in which case the bytes read are +discarded. + +Returns the number of bytes actually read. This might be less than +@var{size}; for example, if there aren't that many bytes left in the stream +or if more bytes have to be yet written with the function @samp{LZ_decompress_write}. Note that reading less than @var{size} bytes is not an error. @@ -571,20 +569,18 @@ recover as much data as possible from each damaged member. @deftypefun int LZ_decompress_write ( struct LZ_Decoder * const @var{decoder}, uint8_t * const @var{buffer}, const int @var{size} ) -The function @samp{LZ_decompress_write} writes up to @var{size} bytes from -@var{buffer} to the stream pointed to by @var{decoder}. - -The return value is the number of bytes actually written. This might be -less than @var{size}. Note that writing less than @var{size} bytes is -not an error. +Writes up to @var{size} bytes from @var{buffer} to the stream pointed to by +@var{decoder}. Returns the number of bytes actually written. This might be +less than @var{size}. Note that writing less than @var{size} bytes is not an +error. @end deftypefun @deftypefun int LZ_decompress_write_size ( struct LZ_Decoder * const @var{decoder} ) -The function @samp{LZ_decompress_write_size} returns the maximum number of -bytes that can be immediately written through @samp{LZ_decompress_write}. -This number varies smoothly; each compressed byte consumed may be -overwritten immediately, increasing by 1 the value returned. +Returns the maximum number of bytes that can be immediately written through +@samp{LZ_decompress_write}. This number varies smoothly; each compressed +byte consumed may be overwritten immediately, increasing by 1 the value +returned. It is guaranteed that an immediate call to @samp{LZ_decompress_write} will accept a @var{size} up to the returned number of bytes. @@ -607,26 +603,25 @@ does not imply @samp{LZ_decompress_member_finished}. @deftypefun int LZ_decompress_member_finished ( struct LZ_Decoder * const @var{decoder} ) Returns 1 if the previous call to @samp{LZ_decompress_read} finished reading -the current member, indicating that final values for member are available +the current member, indicating that final values for the member are available through @samp{LZ_decompress_data_crc}, @samp{LZ_decompress_data_position}, and @samp{LZ_decompress_member_position}. Otherwise it returns 0. @end deftypefun @deftypefun int LZ_decompress_member_version ( struct LZ_Decoder * const @var{decoder} ) -Returns the version of current member from member header. +Returns the version of the current member, read from the member header. @end deftypefun @deftypefun int LZ_decompress_dictionary_size ( struct LZ_Decoder * const @var{decoder} ) -Returns the dictionary size of the current member, read from the member -header. +Returns the dictionary size of the current member, read from the member header. @end deftypefun @deftypefun {unsigned} LZ_decompress_data_crc ( struct LZ_Decoder * const @var{decoder} ) Returns the 32 bit Cyclic Redundancy Check of the data decompressed from -the current member. The returned value is valid only when +the current member. The value returned is valid only when @samp{LZ_decompress_member_finished} returns 1. @end deftypefun @@ -672,8 +667,7 @@ examine @samp{LZ_(de)compress_errno}. The error codes are defined in the header file @samp{lzlib.h}. @deftypevr Constant {enum LZ_Errno} LZ_ok -The value of this constant is 0 and is used to indicate that there is no -error. +The value of this constant is 0 and is used to indicate that there is no error. @end deftypevr @deftypevr Constant {enum LZ_Errno} LZ_bad_argument @@ -737,16 +731,17 @@ The value of @var{lz_errno} normally comes from a call to Minilzip is a test program for the compression library lzlib, fully compatible with lzip 1.4 or newer. -@uref{http://www.nongnu.org/lzip/lzip.html,,Lzip} is a lossless data -compressor with a user interface similar to the one of gzip or bzip2. Lzip -uses a simplified form of the 'Lempel-Ziv-Markov chain-Algorithm' (LZMA) -stream format, chosen to maximize safety and interoperability. Lzip can -compress about as fast as gzip @w{(lzip -0)} or compress most files more -than bzip2 @w{(lzip -9)}. Decompression speed is intermediate between gzip -and bzip2. Lzip is better than gzip and bzip2 from a data recovery -perspective. Lzip has been designed, written, and tested with great care to -replace gzip and bzip2 as the standard general-purpose compressed format for -unix-like systems. +@uref{http://www.nongnu.org/lzip/lzip.html,,Lzip} +is a lossless data compressor with a user interface similar to the one +of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov +chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity +checking to maximize interoperability and optimize safety. Lzip can compress +about as fast as gzip @w{(lzip -0)} or compress most files more than bzip2 +@w{(lzip -9)}. Decompression speed is intermediate between gzip and bzip2. +Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip +has been designed, written, and tested with great care to replace gzip and +bzip2 as the standard general-purpose compressed format for unix-like +systems. @noindent The format for running minilzip is: @@ -803,10 +798,12 @@ and @samp{-S}. @samp{-c} has no effect when testing or listing. @item -d @itemx --decompress -Decompress the files specified. If a file does not exist or can't be -opened, minilzip continues decompressing the rest of the files. If a file -fails to decompress, or is a terminal, minilzip exits immediately without -decompressing the rest of the files. +Decompress the files specified. If a file does not exist, can't be opened, +or the destination file already exists and @samp{--force} has not been +specified, minilzip continues decompressing the rest of the files and exits with +error status 1. If a file fails to decompress, or is a terminal, minilzip exits +immediately with error status 2 without decompressing the rest of the files. +A terminal is considered an uncompressed file, and therefore invalid. @item -f @itemx --force @@ -932,12 +929,13 @@ header" error and the cause is not indeed a corrupt header. @item --check-lib Compare the @uref{#Library-version,,version of lzlib} used to compile -minilzip with the version actually being used and exit. Report any -differences found. Exit with error status 1 if differences are found. A +minilzip with the version actually being used at run time and exit. Report +any differences found. Exit with error status 1 if differences are found. A mismatch may indicate that lzlib is not correctly installed or that a different version of lzlib has been installed after compiling the shared -version of minilzip. @w{@samp{minilzip -v --check-lib}} shows the version of -lzlib being used and the value of @samp{LZ_API_VERSION} (if defined). +version of minilzip. Exit with error status 2 if LZ_API_VERSION and +LZ_version_string don't match. @w{@samp{minilzip -v --check-lib}} shows the +version of lzlib being used and the value of LZ_API_VERSION (if defined). @ifnothtml @xref{Library version}. @end ifnothtml @@ -963,9 +961,9 @@ Table of SI and binary prefixes (unit multipliers): @sp 1 Exit status: 0 for a normal exit, 1 for environmental problems (file not -found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or -invalid input file, 3 for an internal consistency error (eg, bug) which -caused minilzip to panic. +found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid +input file, 3 for an internal consistency error (e.g., bug) which caused +minilzip to panic. @node Data format @@ -996,9 +994,11 @@ represents one byte; a box like this: represents a variable number of bytes. @sp 1 -A lzip data stream consists of a series of "members" (compressed data sets). -The members simply appear one after another in the data stream, with no -additional information before, between, or after them. +Lzip data consist of a series of independent "members" (compressed data +sets). The members simply appear one after another in the data stream, with +no additional information before, between, or after them. Each member can +encode in compressed form up to @w{16 EiB - 1 byte} of uncompressed data. +The size of a multimember data stream is unlimited. Each member has the following structure: @@ -1029,7 +1029,7 @@ Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@* Valid values for dictionary size range from 4 KiB to 512 MiB. @item LZMA stream -The LZMA stream, finished by an end of stream marker. Uses default values +The LZMA stream, finished by an "End Of Stream" marker. Uses default values for encoder properties. @ifnothtml @xref{Stream format,,,lzip}, @@ -1043,15 +1043,17 @@ Lzip only uses the LZMA marker @samp{2} ("End Of Stream" marker). Lzlib also uses the LZMA marker @samp{3} ("Sync Flush" marker). @xref{sync_flush}. @item CRC32 (4 bytes) -Cyclic Redundancy Check (CRC) of the uncompressed original data. +Cyclic Redundancy Check (CRC) of the original uncompressed data. @item Data size (8 bytes) -Size of the uncompressed original data. +Size of the original uncompressed data. @item Member size (8 bytes) Total size of the member, including header and trailer. This field acts as a distributed index, allows the verification of stream integrity, and -facilitates safe recovery of undamaged members from multimember files. +facilitates the safe recovery of undamaged members from multimember files. +Member size should be limited to @w{2 PiB} to prevent the data size field +from overflowing. @end table @@ -1086,10 +1088,10 @@ Buffer-to-buffer single-member compression @w{(@var{member_size} > total output)}. @verbatim -/* Compresses 'insize' bytes from 'inbuf' to 'outbuf'. - Returns the size of the compressed data in '*outlenp'. - In case of error, or if 'outsize' is too small, returns false and does - not modify '*outlenp'. +/* Compress 'insize' bytes from 'inbuf' to 'outbuf'. + Return the size of the compressed data in '*outlenp'. + In case of error, or if 'outsize' is too small, return false and do not + modify '*outlenp'. */ bool bbcompress( const uint8_t * const inbuf, const int insize, const int dictionary_size, const int match_len_limit, @@ -1131,10 +1133,10 @@ bool bbcompress( const uint8_t * const inbuf, const int insize, Buffer-to-buffer decompression. @verbatim -/* Decompresses 'insize' bytes from 'inbuf' to 'outbuf'. - Returns the size of the decompressed data in '*outlenp'. - In case of error, or if 'outsize' is too small, returns false and does - not modify '*outlenp'. +/* Decompress 'insize' bytes from 'inbuf' to 'outbuf'. + Return the size of the decompressed data in '*outlenp'. + In case of error, or if 'outsize' is too small, return false and do not + modify '*outlenp'. */ bool bbdecompress( const uint8_t * const inbuf, const int insize, uint8_t * const outbuf, const int outsize, @@ -1285,9 +1287,9 @@ Example 2: Multimember compression (user-restarted members). (Call LZ_compress_open with @var{member_size} > largest member). @verbatim -/* Compresses 'infile' to 'outfile' as a multimember stream with one member +/* Compress 'infile' to 'outfile' as a multimember stream with one member for each line of text terminated by a newline character or by EOF. - Returns 0 if success, 1 if error. + Return 0 if success, 1 if error. */ int fflfcompress( struct LZ_Encoder * const encoder, FILE * const infile, FILE * const outfile ) @@ -1332,7 +1334,7 @@ int fflfcompress( struct LZ_Encoder * const encoder, @cindex skipping data errors @verbatim -/* Decompresses 'infile' to 'outfile' with automatic resynchronization to +/* Decompress 'infile' to 'outfile' with automatic resynchronization to next member in case of data error, including the automatic removal of leading garbage. */ @@ -1381,8 +1383,8 @@ for all eternity, if not longer. If you find a bug in lzlib, please send electronic mail to @email{lzip-bug@@nongnu.org}. Include the version number, which you can -find by running @w{@samp{minilzip --version}} or in -@samp{LZ_version_string} from @samp{lzlib.h}. +find by running @w{@samp{minilzip --version}} and +@w{@samp{minilzip -v --check-lib}}. @node Concept index diff --git a/doc/minilzip.1 b/doc/minilzip.1 index 13a2d6d..0c4c06d 100644 --- a/doc/minilzip.1 +++ b/doc/minilzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH MINILZIP "1" "January 2021" "minilzip 1.12" "User Commands" +.TH MINILZIP "1" "January 2022" "minilzip 1.13" "User Commands" .SH NAME minilzip \- reduces the size of files .SH SYNOPSIS @@ -11,13 +11,14 @@ compatible with lzip 1.4 or newer. .PP Lzip is a lossless data compressor with a user interface similar to the one of gzip or bzip2. Lzip uses a simplified form of the 'Lempel\-Ziv\-Markov -chain\-Algorithm' (LZMA) stream format, chosen to maximize safety and -interoperability. Lzip can compress about as fast as gzip (lzip \fB\-0\fR) or -compress most files more than bzip2 (lzip \fB\-9\fR). Decompression speed is -intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 from -a data recovery perspective. Lzip has been designed, written, and tested -with great care to replace gzip and bzip2 as the standard general\-purpose -compressed format for unix\-like systems. +chain\-Algorithm' (LZMA) stream format and provides a 3 factor integrity +checking to maximize interoperability and optimize safety. Lzip can compress +about as fast as gzip (lzip \fB\-0\fR) or compress most files more than bzip2 +(lzip \fB\-9\fR). Decompression speed is intermediate between gzip and bzip2. +Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip +has been designed, written, and tested with great care to replace gzip and +bzip2 as the standard general\-purpose compressed format for unix\-like +systems. .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR @@ -100,7 +101,7 @@ To extract all the files from archive 'foo.tar.lz', use the commands .PP Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or -invalid input file, 3 for an internal consistency error (eg, bug) which +invalid input file, 3 for an internal consistency error (e.g., bug) which caused minilzip to panic. .PP The ideas embodied in lzlib are due to (at least) the following people: @@ -113,9 +114,21 @@ Report bugs to lzip\-bug@nongnu.org .br Lzlib home page: http://www.nongnu.org/lzip/lzlib.html .SH COPYRIGHT -Copyright \(co 2021 Antonio Diaz Diaz. -Using lzlib 1.12 +Copyright \(co 2022 Antonio Diaz Diaz. +Using lzlib 1.13 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. +.SH "SEE ALSO" +The full documentation for +.B minilzip +is maintained as a Texinfo manual. If the +.B info +and +.B minilzip +programs are properly installed at your site, the command +.IP +.B info lzlib +.PP +should give you access to the complete manual. @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -21,18 +21,7 @@ static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs { int32_t * ptr0 = e->eb.mb.pos_array + ( e->eb.mb.cyclic_pos << 1 ); int32_t * ptr1 = ptr0 + 1; - int32_t * newptr; - int len = 0, len0 = 0, len1 = 0; - int maxlen = 3; /* only used if pairs != 0 */ - int num_pairs = 0; - const int pos1 = e->eb.mb.pos + 1; - const int min_pos = ( e->eb.mb.pos > e->eb.mb.dictionary_size ) ? - e->eb.mb.pos - e->eb.mb.dictionary_size : 0; - const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb ); - int count, key2, key3, key4, newpos1; - unsigned tmp; int len_limit = e->match_len_limit; - if( len_limit > Mb_available_bytes( &e->eb.mb ) ) { e->been_flushed = true; @@ -40,12 +29,18 @@ static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs if( len_limit < 4 ) { *ptr0 = *ptr1 = 0; return 0; } } - tmp = crc32[data[0]] ^ data[1]; - key2 = tmp & ( num_prev_positions2 - 1 ); + int maxlen = 3; /* only used if pairs != 0 */ + int num_pairs = 0; + const int min_pos = ( e->eb.mb.pos > e->eb.mb.dictionary_size ) ? + e->eb.mb.pos - e->eb.mb.dictionary_size : 0; + const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb ); + + unsigned tmp = crc32[data[0]] ^ data[1]; + const int key2 = tmp & ( num_prev_positions2 - 1 ); tmp ^= (unsigned)data[2] << 8; - key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) ); - key4 = num_prev_positions2 + num_prev_positions3 + - ( ( tmp ^ ( crc32[data[3]] << 5 ) ) & e->eb.mb.key4_mask ); + const int key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) ); + const int key4 = num_prev_positions2 + num_prev_positions3 + + ( ( tmp ^ ( crc32[data[3]] << 5 ) ) & e->eb.mb.key4_mask ); if( pairs ) { @@ -54,7 +49,7 @@ static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs if( np2 > min_pos && e->eb.mb.buffer[np2-1] == data[0] ) { pairs[0].dis = e->eb.mb.pos - np2; - pairs[0].len = maxlen = 2; + pairs[0].len = maxlen = 2 + ( np2 == np3 ); num_pairs = 1; } if( np2 != np3 && np3 > min_pos && e->eb.mb.buffer[np3-1] == data[0] ) @@ -73,19 +68,22 @@ static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs } } + const int pos1 = e->eb.mb.pos + 1; e->eb.mb.prev_positions[key2] = pos1; e->eb.mb.prev_positions[key3] = pos1; - newpos1 = e->eb.mb.prev_positions[key4]; + int newpos1 = e->eb.mb.prev_positions[key4]; e->eb.mb.prev_positions[key4] = pos1; + int len = 0, len0 = 0, len1 = 0; + + int count; for( count = e->cycles; ; ) { - int delta; if( newpos1 <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; } if( e->been_flushed ) len = 0; - delta = pos1 - newpos1; - newptr = e->eb.mb.pos_array + + const int delta = pos1 - newpos1; + int32_t * const newptr = e->eb.mb.pos_array + ( ( e->eb.mb.cyclic_pos - delta + ( (e->eb.mb.cyclic_pos >= delta) ? 0 : e->eb.mb.dictionary_size + 1 ) ) << 1 ); if( data[len-delta] == data[len] ) @@ -140,7 +138,6 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e ) for( len_state = 0; len_state < len_states; ++len_state ) { int * const dsp = e->dis_slot_prices[len_state]; - int * const dp = e->dis_prices[len_state]; const Bit_model * const bmds = e->eb.bm_dis_slot[len_state]; int slot = 0; for( ; slot < end_dis_model; ++slot ) @@ -149,6 +146,7 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e ) dsp[slot] = price_symbol6( bmds, slot ) + (((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift_bits ); + int * const dp = e->dis_prices[len_state]; for( dis = 0; dis < start_dis_model; ++dis ) dp[dis] = dsp[dis]; for( ; dis < modeled_distances; ++dis ) @@ -157,7 +155,7 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e ) } -/* Returns the number of bytes advanced (ahead). +/* Return the number of bytes advanced (ahead). trials[0]..trials[ahead-1] contain the steps to encode. ( trials[0].dis4 == -1 ) means literal. A match/rep longer or equal than match_len_limit finishes the sequence. @@ -166,9 +164,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, const int reps[num_rep_distances], const State state ) { - int main_len, num_pairs, i, rep, num_trials, len; - int rep_index = 0, cur = 0; - int replens[num_rep_distances]; + int num_pairs, num_trials; + int i, rep, len; if( e->pending_num_pairs > 0 ) /* from previous call */ { @@ -177,8 +174,10 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, } else num_pairs = LZe_read_match_distances( e ); - main_len = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0; + const int main_len = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0; + int replens[num_rep_distances]; + int rep_index = 0; for( i = 0; i < num_rep_distances; ++i ) { replens[i] = Mb_true_match_len( &e->eb.mb, 0, reps[i] + 1 ); @@ -200,7 +199,6 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, return main_len; } - { const int pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask; const int match_price = price1( e->eb.bm_match[state][pos_state] ); const int rep_match_price = match_price + price1( e->eb.bm_rep[state] ); @@ -238,9 +236,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, for( rep = 0; rep < num_rep_distances; ++rep ) { - int price; if( replens[rep] < min_match_len ) continue; - price = rep_match_price + LZeb_price_rep( &e->eb, rep, state, pos_state ); + const int price = rep_match_price + LZeb_price_rep( &e->eb, rep, state, pos_state ); for( len = min_match_len; len <= replens[rep]; ++len ) Tr_update( &e->trials[len], price + Lp_price( &e->rep_len_prices, len, pos_state ), rep, 0 ); @@ -260,17 +257,10 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, if( ++len > e->pairs[i].len && ++i >= num_pairs ) break; } } - } + int cur = 0; while( true ) /* price optimization loop */ { - struct Trial *cur_trial, *next_trial; - int newlen, pos_state, triable_bytes, len_limit; - int start_len = min_match_len; - int next_price, match_price, rep_match_price; - State cur_state; - uint8_t prev_byte, cur_byte, match_byte; - if( !Mb_move_pos( &e->eb.mb ) ) return 0; if( ++cur >= num_trials ) /* no more initialized trials */ { @@ -278,8 +268,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, return cur; } - num_pairs = LZe_read_match_distances( e ); - newlen = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0; + const int num_pairs = LZe_read_match_distances( e ); + const int newlen = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0; if( newlen >= e->match_len_limit ) { e->pending_num_pairs = num_pairs; @@ -288,7 +278,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, } /* give final values to current trial */ - cur_trial = &e->trials[cur]; + struct Trial * cur_trial = &e->trials[cur]; + State cur_state; { const int dis4 = cur_trial->dis4; int prev_index = cur_trial->prev_index; @@ -319,25 +310,25 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, mtf_reps( dis4, cur_trial->reps ); /* literal is ignored */ } - pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask; - prev_byte = Mb_peek( &e->eb.mb, 1 ); - cur_byte = Mb_peek( &e->eb.mb, 0 ); - match_byte = Mb_peek( &e->eb.mb, cur_trial->reps[0] + 1 ); + const int pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask; + const uint8_t prev_byte = Mb_peek( &e->eb.mb, 1 ); + const uint8_t cur_byte = Mb_peek( &e->eb.mb, 0 ); + const uint8_t match_byte = Mb_peek( &e->eb.mb, cur_trial->reps[0] + 1 ); - next_price = cur_trial->price + - price0( e->eb.bm_match[cur_state][pos_state] ); + int next_price = cur_trial->price + + price0( e->eb.bm_match[cur_state][pos_state] ); if( St_is_char( cur_state ) ) next_price += LZeb_price_literal( &e->eb, prev_byte, cur_byte ); else next_price += LZeb_price_matched( &e->eb, prev_byte, cur_byte, match_byte ); /* try last updates to next trial */ - next_trial = &e->trials[cur+1]; + struct Trial * next_trial = &e->trials[cur+1]; Tr_update( next_trial, next_price, -1, cur ); /* literal */ - match_price = cur_trial->price + price1( e->eb.bm_match[cur_state][pos_state] ); - rep_match_price = match_price + price1( e->eb.bm_rep[cur_state] ); + const int match_price = cur_trial->price + price1( e->eb.bm_match[cur_state][pos_state] ); + const int rep_match_price = match_price + price1( e->eb.bm_rep[cur_state] ); if( match_byte == cur_byte && next_trial->dis4 != 0 && next_trial->prev_index2 == single_step_trial ) @@ -352,11 +343,11 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, } } - triable_bytes = + const int triable_bytes = min( Mb_available_bytes( &e->eb.mb ), max_num_trials - 1 - cur ); if( triable_bytes < min_match_len ) continue; - len_limit = min( e->match_len_limit, triable_bytes ); + const int len_limit = min( e->match_len_limit, triable_bytes ); /* try literal + rep0 */ if( match_byte != cur_byte && next_trial->prev_index != cur ) @@ -380,19 +371,20 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, } } + int start_len = min_match_len; + /* try rep distances */ for( rep = 0; rep < num_rep_distances; ++rep ) { const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb ); const int dis = cur_trial->reps[rep] + 1; - int price; if( data[0-dis] != data[0] || data[1-dis] != data[1] ) continue; for( len = min_match_len; len < len_limit; ++len ) if( data[len-dis] != data[len] ) break; while( num_trials < cur + len ) e->trials[++num_trials].price = infinite_price; - price = rep_match_price + LZeb_price_rep( &e->eb, rep, cur_state, pos_state ); + int price = rep_match_price + LZeb_price_rep( &e->eb, rep, cur_state, pos_state ); for( i = min_match_len; i <= len; ++i ) Tr_update( &e->trials[cur+i], price + Lp_price( &e->rep_len_prices, i, pos_state ), rep, cur ); @@ -400,17 +392,14 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, if( rep == 0 ) start_len = len + 1; /* discard shorter matches */ /* try rep + literal + rep0 */ - { int len2 = len + 1; const int limit = min( e->match_len_limit + len2, triable_bytes ); - int pos_state2; - State state2; while( len2 < limit && data[len2-dis] == data[len2] ) ++len2; len2 -= len + 1; if( len2 < min_match_len ) continue; - pos_state2 = ( pos_state + len ) & pos_state_mask; - state2 = St_set_rep( cur_state ); + int pos_state2 = ( pos_state + len ) & pos_state_mask; + State state2 = St_set_rep( cur_state ); price += Lp_price( &e->rep_len_prices, len, pos_state ) + price0( e->eb.bm_match[state2][pos_state2] ) + LZeb_price_matched( &e->eb, data[len-1], data[len], data[len-dis] ); @@ -423,21 +412,19 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, e->trials[++num_trials].price = infinite_price; Tr_update3( &e->trials[cur+len+1+len2], price, rep, cur + len + 1, cur ); } - } /* try matches */ if( newlen >= start_len && newlen <= len_limit ) { - int dis; const int normal_match_price = match_price + price0( e->eb.bm_rep[cur_state] ); while( num_trials < cur + newlen ) e->trials[++num_trials].price = infinite_price; - i = 0; + int i = 0; while( e->pairs[i].len < start_len ) ++i; - dis = e->pairs[i].dis; + int dis = e->pairs[i].dis; for( len = start_len; ; ++len ) { int price = normal_match_price + LZe_price_pair( e, dis, len, pos_state ); @@ -484,7 +471,7 @@ static bool LZe_encode_member( struct LZ_encoder * const e ) const int dis_price_count = best ? 1 : 512; const int align_price_count = best ? 1 : dis_align_size; const int price_count = ( e->match_len_limit > 36 ) ? 1013 : 4093; - int ahead, i; + int i; State * const state = &e->eb.state; if( e->eb.member_finished ) return true; @@ -494,11 +481,10 @@ static bool LZe_encode_member( struct LZ_encoder * const e ) if( Mb_data_position( &e->eb.mb ) == 0 && !Mb_data_finished( &e->eb.mb ) ) /* encode first byte */ { - const uint8_t prev_byte = 0; - uint8_t cur_byte; if( !Mb_enough_available_bytes( &e->eb.mb ) || !Re_enough_free_bytes( &e->eb.renc ) ) return true; - cur_byte = Mb_peek( &e->eb.mb, 0 ); + const uint8_t prev_byte = 0; + const uint8_t cur_byte = Mb_peek( &e->eb.mb, 0 ); Re_encode_bit( &e->eb.renc, &e->eb.bm_match[*state][0], 0 ); LZeb_encode_literal( &e->eb, prev_byte, cur_byte ); CRC32_update_byte( &e->eb.crc, cur_byte ); @@ -525,7 +511,7 @@ static bool LZe_encode_member( struct LZ_encoder * const e ) Lp_update_prices( &e->rep_len_prices ); } - ahead = LZe_sequence_optimizer( e, e->eb.reps, *state ); + int ahead = LZe_sequence_optimizer( e, e->eb.reps, *state ); e->price_counter -= ahead; for( i = 0; ahead > 0; ) @@ -542,14 +528,13 @@ static bool LZe_encode_member( struct LZ_encoder * const e ) const uint8_t prev_byte = Mb_peek( &e->eb.mb, ahead + 1 ); const uint8_t cur_byte = Mb_peek( &e->eb.mb, ahead ); CRC32_update_byte( &e->eb.crc, cur_byte ); - if( St_is_char( *state ) ) + if( ( *state = St_set_char( *state ) ) < 4 ) LZeb_encode_literal( &e->eb, prev_byte, cur_byte ); else { const uint8_t match_byte = Mb_peek( &e->eb.mb, ahead + e->eb.reps[0] + 1 ); LZeb_encode_matched( &e->eb, prev_byte, cur_byte, match_byte ); } - *state = St_set_char( *state ); } else /* match or repeated match */ { @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -194,10 +194,9 @@ static inline int LZeb_price_rep( const struct LZ_encoder_base * const eb, const int rep, const State state, const int pos_state ) { - int price; if( rep == 0 ) return price0( eb->bm_rep0[state] ) + price1( eb->bm_len[state][pos_state] ); - price = price1( eb->bm_rep0[state] ); + int price = price1( eb->bm_rep0[state] ); if( rep == 1 ) price += price0( eb->bm_rep1[state] ); else diff --git a/encoder_base.c b/encoder_base.c index c1ef9ef..4535352 100644 --- a/encoder_base.c +++ b/encoder_base.c @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -47,7 +47,6 @@ static bool Mb_init( struct Matchfinder_base * const mb, const int before_size, { const int buffer_size_limit = ( dict_factor * dict_size ) + before_size + after_size; - unsigned size; int i; mb->partial_data_pos = 0; @@ -66,9 +65,8 @@ static bool Mb_init( struct Matchfinder_base * const mb, const int before_size, mb->saved_dictionary_size = dict_size; mb->dictionary_size = dict_size; mb->pos_limit = mb->buffer_size - after_size; - size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 ); - if( mb->dictionary_size > 1 << 26 ) /* 64 MiB */ - size >>= 1; + unsigned size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 ); + if( mb->dictionary_size > 1 << 26 ) size >>= 1; /* 64 MiB */ mb->key4_mask = size - 1; /* increases with dictionary size */ size += num_prev_positions23; mb->num_prev_positions = size; @@ -88,8 +86,7 @@ static bool Mb_init( struct Matchfinder_base * const mb, const int before_size, static void Mb_adjust_array( struct Matchfinder_base * const mb ) { int size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 ); - if( mb->dictionary_size > 1 << 26 ) /* 64 MiB */ - size >>= 1; + if( mb->dictionary_size > 1 << 26 ) size >>= 1; /* 64 MiB */ mb->key4_mask = size - 1; size += mb->num_prev_positions23; mb->num_prev_positions = size; @@ -129,21 +126,21 @@ static void Mb_reset( struct Matchfinder_base * const mb ) /* End Of Stream marker => (dis == 0xFFFFFFFFU, len == min_match_len) */ static void LZeb_try_full_flush( struct LZ_encoder_base * const eb ) { - int i; - const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask; - const State state = eb->state; - Lzip_trailer trailer; if( eb->member_finished || Cb_free_bytes( &eb->renc.cb ) < max_marker_size + eb->renc.ff_count + Lt_size ) return; eb->member_finished = true; + const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask; + const State state = eb->state; Re_encode_bit( &eb->renc, &eb->bm_match[state][pos_state], 1 ); Re_encode_bit( &eb->renc, &eb->bm_rep[state], 0 ); LZeb_encode_pair( eb, 0xFFFFFFFFU, min_match_len, pos_state ); Re_flush( &eb->renc ); + Lzip_trailer trailer; Lt_set_data_crc( trailer, LZeb_crc( eb ) ); Lt_set_data_size( trailer, Mb_data_position( &eb->mb ) ); Lt_set_member_size( trailer, Re_member_position( &eb->renc ) + Lt_size ); + int i; for( i = 0; i < Lt_size; ++i ) Cb_put_byte( &eb->renc.cb, trailer[i] ); } @@ -152,13 +149,13 @@ static void LZeb_try_full_flush( struct LZ_encoder_base * const eb ) /* Sync Flush marker => (dis == 0xFFFFFFFFU, len == min_match_len + 1) */ static void LZeb_try_sync_flush( struct LZ_encoder_base * const eb ) { - const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask; - const State state = eb->state; const unsigned min_size = eb->renc.ff_count + max_marker_size; if( eb->member_finished || Cb_free_bytes( &eb->renc.cb ) < min_size + max_marker_size ) return; eb->mb.sync_flush_pending = false; const unsigned long long old_mpos = Re_member_position( &eb->renc ); + const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask; + const State state = eb->state; do { /* size of markers must be >= rd_min_available_bytes + 5 */ Re_encode_bit( &eb->renc, &eb->bm_match[state][pos_state], 1 ); Re_encode_bit( &eb->renc, &eb->bm_rep[state], 0 ); diff --git a/encoder_base.h b/encoder_base.h index e727a7d..17ffc93 100644 --- a/encoder_base.h +++ b/encoder_base.h @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -147,10 +147,9 @@ static inline int price_bit( const Bit_model bm, const bool bit ) static inline int price_symbol3( const Bit_model bm[], int symbol ) { - int price; bool bit = symbol & 1; symbol |= 8; symbol >>= 1; - price = price_bit( bm[symbol], bit ); + int price = price_bit( bm[symbol], bit ); bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit ); return price + price_bit( bm[1], symbol & 1 ); } @@ -158,10 +157,9 @@ static inline int price_symbol3( const Bit_model bm[], int symbol ) static inline int price_symbol6( const Bit_model bm[], unsigned symbol ) { - int price; bool bit = symbol & 1; symbol |= 64; symbol >>= 1; - price = price_bit( bm[symbol], bit ); + int price = price_bit( bm[symbol], bit ); bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit ); bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit ); bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit ); @@ -172,10 +170,9 @@ static inline int price_symbol6( const Bit_model bm[], unsigned symbol ) static inline int price_symbol8( const Bit_model bm[], int symbol ) { - int price; bool bit = symbol & 1; symbol |= 0x100; symbol >>= 1; - price = price_bit( bm[symbol], bit ); + int price = price_bit( bm[symbol], bit ); bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit ); bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit ); bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit ); @@ -427,10 +424,9 @@ static inline void Re_encode_bit( struct Range_encoder * const renc, static inline void Re_encode_tree3( struct Range_encoder * const renc, Bit_model bm[], const int symbol ) { - int model; bool bit = ( symbol >> 2 ) & 1; Re_encode_bit( renc, &bm[1], bit ); - model = 2 | bit; + int model = 2 | bit; bit = ( symbol >> 1 ) & 1; Re_encode_bit( renc, &bm[model], bit ); model <<= 1; model |= bit; Re_encode_bit( renc, &bm[model], symbol & 1 ); @@ -439,10 +435,9 @@ static inline void Re_encode_tree3( struct Range_encoder * const renc, static inline void Re_encode_tree6( struct Range_encoder * const renc, Bit_model bm[], const unsigned symbol ) { - int model; bool bit = ( symbol >> 5 ) & 1; Re_encode_bit( renc, &bm[1], bit ); - model = 2 | bit; + int model = 2 | bit; bit = ( symbol >> 4 ) & 1; Re_encode_bit( renc, &bm[model], bit ); model <<= 1; model |= bit; bit = ( symbol >> 3 ) & 1; @@ -583,8 +578,7 @@ static inline int LZeb_price_matched( const struct LZ_encoder_base * const eb, static inline void LZeb_encode_literal( struct LZ_encoder_base * const eb, const uint8_t prev_byte, const uint8_t symbol ) - { Re_encode_tree8( &eb->renc, eb->bm_literal[get_lit_state(prev_byte)], - symbol ); } + { Re_encode_tree8( &eb->renc, eb->bm_literal[get_lit_state(prev_byte)], symbol ); } static inline void LZeb_encode_matched( struct LZ_encoder_base * const eb, const uint8_t prev_byte, const uint8_t symbol, const uint8_t match_byte ) @@ -595,8 +589,8 @@ static inline void LZeb_encode_pair( struct LZ_encoder_base * const eb, const unsigned dis, const int len, const int pos_state ) { - const unsigned dis_slot = get_slot( dis ); Re_encode_len( &eb->renc, &eb->match_len_model, len, pos_state ); + const unsigned dis_slot = get_slot( dis ); Re_encode_tree6( &eb->renc, eb->bm_dis_slot[get_len_state(len)], dis_slot ); if( dis_slot >= start_dis_model ) diff --git a/fast_encoder.c b/fast_encoder.c index bdcbb97..618c3d6 100644 --- a/fast_encoder.c +++ b/fast_encoder.c @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -20,25 +20,24 @@ static int FLZe_longest_match_len( struct FLZ_encoder * const fe, int * const distance ) { enum { len_limit = 16 }; - const uint8_t * const data = Mb_ptr_to_current_pos( &fe->eb.mb ); int32_t * ptr0 = fe->eb.mb.pos_array + fe->eb.mb.cyclic_pos; - const int pos1 = fe->eb.mb.pos + 1; - int maxlen = 0, newpos1, count; const int available = min( Mb_available_bytes( &fe->eb.mb ), max_match_len ); if( available < len_limit ) { *ptr0 = 0; return 0; } + const uint8_t * const data = Mb_ptr_to_current_pos( &fe->eb.mb ); fe->key4 = ( ( fe->key4 << 4 ) ^ data[3] ) & fe->eb.mb.key4_mask; - newpos1 = fe->eb.mb.prev_positions[fe->key4]; + const int pos1 = fe->eb.mb.pos + 1; + int newpos1 = fe->eb.mb.prev_positions[fe->key4]; fe->eb.mb.prev_positions[fe->key4] = pos1; + int maxlen = 0, count; for( count = 4; ; ) { - int32_t * newptr; int delta; if( newpos1 <= 0 || --count < 0 || ( delta = pos1 - newpos1 ) > fe->eb.mb.dictionary_size ) { *ptr0 = 0; break; } - newptr = fe->eb.mb.pos_array + + int32_t * const newptr = fe->eb.mb.pos_array + ( fe->eb.mb.cyclic_pos - delta + ( ( fe->eb.mb.cyclic_pos >= delta ) ? 0 : fe->eb.mb.dictionary_size + 1 ) ); @@ -71,11 +70,10 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe ) if( Mb_data_position( &fe->eb.mb ) == 0 && !Mb_data_finished( &fe->eb.mb ) ) /* encode first byte */ { - const uint8_t prev_byte = 0; - uint8_t cur_byte; if( !Mb_enough_available_bytes( &fe->eb.mb ) || !Re_enough_free_bytes( &fe->eb.renc ) ) return true; - cur_byte = Mb_peek( &fe->eb.mb, 0 ); + const uint8_t prev_byte = 0; + const uint8_t cur_byte = Mb_peek( &fe->eb.mb, 0 ); Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[*state][0], 0 ); LZeb_encode_literal( &fe->eb, prev_byte, cur_byte ); CRC32_update_byte( &fe->eb.crc, cur_byte ); @@ -86,13 +84,12 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe ) while( !Mb_data_finished( &fe->eb.mb ) && Re_member_position( &fe->eb.renc ) < fe->eb.member_size_limit ) { - int match_distance = 0; /* avoid warning from gcc 6.1.0 */ - int main_len, pos_state; - int len = 0; if( !Mb_enough_available_bytes( &fe->eb.mb ) || !Re_enough_free_bytes( &fe->eb.renc ) ) return true; - main_len = FLZe_longest_match_len( fe, &match_distance ); - pos_state = Mb_data_position( &fe->eb.mb ) & pos_state_mask; + int match_distance = 0; /* avoid warning from gcc 6.1.0 */ + const int main_len = FLZe_longest_match_len( fe, &match_distance ); + const int pos_state = Mb_data_position( &fe->eb.mb ) & pos_state_mask; + int len = 0; for( i = 0; i < num_rep_distances; ++i ) { @@ -109,11 +106,10 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe ) Re_encode_bit( &fe->eb.renc, &fe->eb.bm_len[*state][pos_state], 1 ); else { - int distance; Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep1[*state], rep > 1 ); if( rep > 1 ) Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep2[*state], rep > 2 ); - distance = fe->eb.reps[rep]; + const int distance = fe->eb.reps[rep]; for( i = rep; i > 0; --i ) fe->eb.reps[i] = fe->eb.reps[i-1]; fe->eb.reps[0] = distance; } @@ -138,7 +134,6 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe ) continue; } - { const uint8_t prev_byte = Mb_peek( &fe->eb.mb, 1 ); const uint8_t cur_byte = Mb_peek( &fe->eb.mb, 0 ); const uint8_t match_byte = Mb_peek( &fe->eb.mb, fe->eb.reps[0] + 1 ); @@ -169,12 +164,10 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe ) /* literal byte */ Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[*state][pos_state], 0 ); - if( St_is_char( *state ) ) + if( ( *state = St_set_char( *state ) ) < 4 ) LZeb_encode_literal( &fe->eb, prev_byte, cur_byte ); else LZeb_encode_matched( &fe->eb, prev_byte, cur_byte, match_byte ); - *state = St_set_char( *state ); - } } LZeb_try_full_flush( &fe->eb ); diff --git a/fast_encoder.h b/fast_encoder.h index 1c3a6ff..54756bd 100644 --- a/fast_encoder.h +++ b/fast_encoder.h @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/ffexample.c b/ffexample.c index 2891f02..59345ee 100644 --- a/ffexample.c +++ b/ffexample.c @@ -1,5 +1,5 @@ /* File to file example - Test program for the library lzlib - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute, and modify it. @@ -20,7 +20,7 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> -#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ #include <fcntl.h> #include <io.h> #endif @@ -178,9 +178,9 @@ int ffmmcompress( FILE * const infile, FILE * const outfile ) } -/* Compresses 'infile' to 'outfile' as a multimember stream with one member +/* Compress 'infile' to 'outfile' as a multimember stream with one member for each line of text terminated by a newline character or by EOF. - Returns 0 if success, 1 if error. + Return 0 if success, 1 if error. */ int fflfcompress( struct LZ_Encoder * const encoder, FILE * const infile, FILE * const outfile ) @@ -219,7 +219,7 @@ int fflfcompress( struct LZ_Encoder * const encoder, } -/* Decompresses 'infile' to 'outfile' with automatic resynchronization to +/* Decompress 'infile' to 'outfile' with automatic resynchronization to next member in case of data error, including the automatic removal of leading garbage. */ @@ -257,7 +257,7 @@ int ffrsdecompress( struct LZ_Decoder * const decoder, int main( const int argc, const char * const argv[] ) { -#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); #endif @@ -1,5 +1,5 @@ /* Lzcheck - Test program for the library lzlib - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute, and modify it. @@ -133,10 +133,11 @@ static void xclose_decoder( struct LZ_Decoder * const decoder, } -/* Returns the next (usually newline-terminated) chunk of data from file. +/* Return the next (usually newline-terminated) chunk of data from file. The size returned in *sizep is always <= buffer_size. - If sizep is a null pointer, rewinds the file, resets state, and returns. - If file is at EOF, returns an empty line. */ + If sizep is a null pointer, rewind the file, reset state, and return. + If file is at EOF, return an empty line. +*/ static const uint8_t * next_line( FILE * const file, int * const sizep ) { static int l = 0; @@ -332,7 +333,7 @@ int main( const int argc, const char * const argv[] ) if( argc < 2 ) { - fputs( "Usage: lzcheck filename.txt...\n", stderr ); + fputs( "Usage: lzcheck [-m|-s] filename.txt...\n", stderr ); return 1; } @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -168,6 +168,7 @@ static const uint32_t crc32[256] = static inline void CRC32_update_byte( uint32_t * const crc, const uint8_t byte ) { *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); } +/* about as fast as it is possible without messing with endianness */ static inline void CRC32_update_buf( uint32_t * const crc, const uint8_t * const buffer, const int size ) @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -24,9 +24,9 @@ extern "C" { /* LZ_API_VERSION was first defined in lzlib 1.8 to 1. Since lzlib 1.12, LZ_API_VERSION is defined as (major * 1000 + minor). */ -#define LZ_API_VERSION 1012 +#define LZ_API_VERSION 1013 -static const char * const LZ_version_string = "1.12"; +static const char * const LZ_version_string = "1.13"; enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error, LZ_sequence_error, LZ_header_error, LZ_unexpected_eof, @@ -1,5 +1,5 @@ /* Minilzip - Test program for the library lzlib - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,11 +18,12 @@ Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid input file, 3 for an internal consistency error - (eg, bug) which caused minilzip to panic. + (e.g., bug) which caused minilzip to panic. */ #define _FILE_OFFSET_BITS 64 +#include <ctype.h> #include <errno.h> #include <fcntl.h> #include <limits.h> @@ -35,9 +36,9 @@ #include <unistd.h> #include <utime.h> #include <sys/stat.h> -#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ #include <io.h> -#if defined(__MSVCRT__) +#if defined __MSVCRT__ #define fchmod(x,y) 0 #define fchown(x,y,z) 0 #define strtoull strtoul @@ -50,7 +51,7 @@ #define S_IWOTH 0 #endif #endif -#if defined(__DJGPP__) +#if defined __DJGPP__ #define S_ISSOCK(x) 0 #define S_ISVTX 0 #endif @@ -67,6 +68,11 @@ #error "Environments where CHAR_BIT != 8 are not supported." #endif +#if ( defined SIZE_MAX && SIZE_MAX < UINT_MAX ) || \ + ( defined SSIZE_MAX && SSIZE_MAX < INT_MAX ) +#error "Environments where 'size_t' is narrower than 'int' are not supported." +#endif + #ifndef max #define max(x,y) ((x) >= (y) ? (x) : (y)) #endif @@ -85,7 +91,7 @@ static const char * const mem_msg = "Not enough memory."; int verbosity = 0; static const char * const program_name = "minilzip"; -static const char * const program_year = "2021"; +static const char * const program_year = "2022"; static const char * invocation_name = "minilzip"; /* default value */ static const struct { const char * from; const char * to; } known_extensions[] = { @@ -114,13 +120,14 @@ static void show_help( void ) "compatible with lzip 1.4 or newer.\n" "\nLzip is a lossless data compressor with a user interface similar to the one\n" "of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n" - "chain-Algorithm' (LZMA) stream format, chosen to maximize safety and\n" - "interoperability. Lzip can compress about as fast as gzip (lzip -0) or\n" - "compress most files more than bzip2 (lzip -9). Decompression speed is\n" - "intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 from\n" - "a data recovery perspective. Lzip has been designed, written, and tested\n" - "with great care to replace gzip and bzip2 as the standard general-purpose\n" - "compressed format for unix-like systems.\n" + "chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity\n" + "checking to maximize interoperability and optimize safety. Lzip can compress\n" + "about as fast as gzip (lzip -0) or compress most files more than bzip2\n" + "(lzip -9). Decompression speed is intermediate between gzip and bzip2.\n" + "Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip\n" + "has been designed, written, and tested with great care to replace gzip and\n" + "bzip2 as the standard general-purpose compressed format for unix-like\n" + "systems.\n" "\nUsage: %s [options] [files]\n", invocation_name ); printf( "\nOptions:\n" " -h, --help display this help and exit\n" @@ -158,7 +165,7 @@ static void show_help( void ) "'tar -xf foo.tar.lz' or 'minilzip -cd foo.tar.lz | tar -xf -'.\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" - "invalid input file, 3 for an internal consistency error (eg, bug) which\n" + "invalid input file, 3 for an internal consistency error (e.g., bug) which\n" "caused minilzip to panic.\n" "\nThe ideas embodied in lzlib are due to (at least) the following people:\n" "Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the\n" @@ -181,17 +188,48 @@ static void show_version( void ) } -int check_lib() +static inline void set_retval( int * retval, const int new_val ) + { if( *retval < new_val ) *retval = new_val; } + + +static int check_lzlib_ver() /* <major>.<minor> or <major>.<minor>[a-z.-]* */ { - bool warning = false; +#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012 + const unsigned char * p = (unsigned char *)LZ_version_string; + unsigned major = 0, minor = 0; + while( major < 100000 && isdigit( *p ) ) + { major *= 10; major += *p - '0'; ++p; } + if( *p == '.' ) ++p; + else +out: { show_error( "Invalid LZ_version_string in lzlib.h", 0, false ); return 2; } + while( minor < 100 && isdigit( *p ) ) + { minor *= 10; minor += *p - '0'; ++p; } + if( *p && *p != '-' && *p != '.' && !islower( *p ) ) goto out; + const unsigned version = major * 1000 + minor; + if( LZ_API_VERSION != version ) + { + if( verbosity >= 0 ) + fprintf( stderr, "%s: Version mismatch in lzlib.h: " + "LZ_API_VERSION = %u, should be %u.\n", + program_name, LZ_API_VERSION, version ); + return 2; + } +#endif + return 0; + } + + +static int check_lib() + { + int retval = check_lzlib_ver(); if( strcmp( LZ_version_string, LZ_version() ) != 0 ) - { warning = true; + { set_retval( &retval, 1 ); if( verbosity >= 0 ) printf( "warning: LZ_version_string != LZ_version() (%s vs %s)\n", LZ_version_string, LZ_version() ); } #if defined LZ_API_VERSION && LZ_API_VERSION >= 1012 if( LZ_API_VERSION != LZ_api_version() ) - { warning = true; + { set_retval( &retval, 1 ); if( verbosity >= 0 ) printf( "warning: LZ_API_VERSION != LZ_api_version() (%u vs %u)\n", LZ_API_VERSION, LZ_api_version() ); } @@ -208,7 +246,7 @@ int check_lib() "Using an unknown LZ_API_VERSION\n", LZ_API_VERSION ); #endif } - return warning; + return retval; } @@ -234,8 +272,6 @@ struct Pretty_print static void Pp_init( struct Pretty_print * const pp, const char * const filenames[], const int num_filenames ) { - unsigned stdin_name_len; - int i; pp->name = 0; pp->padded_name = 0; pp->stdin_name = "(stdin)"; @@ -243,7 +279,8 @@ static void Pp_init( struct Pretty_print * const pp, pp->first_post = false; if( verbosity <= 0 ) return; - stdin_name_len = strlen( pp->stdin_name ); + const unsigned stdin_name_len = strlen( pp->stdin_name ); + int i; for( i = 0; i < num_filenames; ++i ) { const char * const s = filenames[i]; @@ -277,16 +314,14 @@ static void Pp_reset( struct Pretty_print * const pp ) static void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ) { - if( verbosity >= 0 ) + if( verbosity < 0 ) return; + if( pp->first_post ) { - if( pp->first_post ) - { - pp->first_post = false; - fputs( pp->padded_name, stderr ); - if( !msg ) fflush( stderr ); - } - if( msg ) fprintf( stderr, "%s\n", msg ); + pp->first_post = false; + fputs( pp->padded_name, stderr ); + if( !msg ) fflush( stderr ); } + if( msg ) fprintf( stderr, "%s\n", msg ); } @@ -307,17 +342,53 @@ static void show_header( const unsigned dictionary_size ) } -static unsigned long long getnum( const char * const ptr, +/* separate large numbers >= 100_000 in groups of 3 digits using '_' */ +static const char * format_num3( unsigned long long num ) + { + const char * const si_prefix = "kMGTPEZY"; + const char * const binary_prefix = "KMGTPEZY"; + enum { buffers = 8, bufsize = 4 * sizeof (long long) }; + static char buffer[buffers][bufsize]; /* circle of static buffers for printf */ + static int current = 0; + int i; + char * const buf = buffer[current++]; current %= buffers; + char * p = buf + bufsize - 1; /* fill the buffer backwards */ + *p = 0; /* terminator */ + if( num > 1024 ) + { + char prefix = 0; /* try binary first, then si */ + for( i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i ) + { num /= 1024; prefix = binary_prefix[i]; } + if( prefix ) *(--p) = 'i'; + else + for( i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i ) + { num /= 1000; prefix = si_prefix[i]; } + if( prefix ) *(--p) = prefix; + } + const bool split = num >= 100000; + + for( i = 0; ; ) + { + *(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break; + if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; } + } + return p; + } + + +static unsigned long long getnum( const char * const arg, + const char * const option_name, const unsigned long long llimit, const unsigned long long ulimit ) { - unsigned long long result; char * tail; errno = 0; - result = strtoull( ptr, &tail, 0 ); - if( tail == ptr ) + unsigned long long result = strtoull( arg, &tail, 0 ); + if( tail == arg ) { - show_error( "Bad or missing numerical argument.", 0, true ); + if( verbosity >= 0 ) + fprintf( stderr, "%s: Bad or missing numerical argument in " + "option '%s'.\n", program_name, option_name ); exit( 1 ); } @@ -340,7 +411,9 @@ static unsigned long long getnum( const char * const ptr, } if( exponent <= 0 ) { - show_error( "Bad multiplier in numerical argument.", 0, true ); + if( verbosity >= 0 ) + fprintf( stderr, "%s: Bad multiplier in numerical argument of " + "option '%s'.\n", program_name, option_name ); exit( 1 ); } for( i = 0; i < exponent; ++i ) @@ -352,23 +425,25 @@ static unsigned long long getnum( const char * const ptr, if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; if( errno ) { - show_error( "Numerical argument out of limits.", 0, false ); + if( verbosity >= 0 ) + fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] " + "in option '%s'.\n", program_name, format_num3( llimit ), + format_num3( ulimit ), option_name ); exit( 1 ); } return result; } -static int get_dict_size( const char * const arg ) +static int get_dict_size( const char * const arg, const char * const option_name ) { char * tail; - int dictionary_size; const long bits = strtol( arg, &tail, 0 ); if( bits >= LZ_min_dictionary_bits() && bits <= LZ_max_dictionary_bits() && *tail == 0 ) return 1 << bits; - dictionary_size = getnum( arg, LZ_min_dictionary_size(), - LZ_max_dictionary_size() ); + int dictionary_size = getnum( arg, option_name, LZ_min_dictionary_size(), + LZ_max_dictionary_size() ); if( dictionary_size == 65535 ) ++dictionary_size; /* no fast encoder */ return dictionary_size; } @@ -442,34 +517,31 @@ static int open_instream( const char * const name, struct stat * const in_statsp const enum Mode program_mode, const int eindex, const bool one_to_one, const bool recompress ) { - int infd = -1; if( program_mode == m_compress && !recompress && eindex >= 0 ) { if( verbosity >= 0 ) fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n", program_name, name, known_extensions[eindex].from ); + return -1; } + int infd = open( name, O_RDONLY | O_BINARY ); + if( infd < 0 ) + show_file_error( name, "Can't open input file", errno ); else { - infd = open( name, O_RDONLY | O_BINARY ); - if( infd < 0 ) - show_file_error( name, "Can't open input file", errno ); - else + const int i = fstat( infd, in_statsp ); + const mode_t mode = in_statsp->st_mode; + const bool can_read = ( i == 0 && + ( S_ISBLK( mode ) || S_ISCHR( mode ) || + S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); + if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) ) { - const int i = fstat( infd, in_statsp ); - const mode_t mode = in_statsp->st_mode; - const bool can_read = ( i == 0 && - ( S_ISBLK( mode ) || S_ISCHR( mode ) || - S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); - if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) ) - { - if( verbosity >= 0 ) - fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", - program_name, name, ( can_read && one_to_one ) ? - ",\n and neither '-c' nor '-o' were specified" : "" ); - close( infd ); - infd = -1; - } + if( verbosity >= 0 ) + fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", + program_name, name, ( can_read && one_to_one ) ? + ",\n and neither '-c' nor '-o' were specified" : "" ); + close( infd ); + infd = -1; } } return infd; @@ -532,10 +604,6 @@ static void signal_handler( int sig ) } -static inline void set_retval( int * retval, const int new_val ) - { if( *retval < new_val ) *retval = new_val; } - - static bool check_tty_in( const char * const input_filename, const int infd, const enum Mode program_mode, int * const retval ) { @@ -543,7 +611,7 @@ static bool check_tty_in( const char * const input_filename, const int infd, isatty( infd ) ) /* for example /dev/tty */ { show_file_error( input_filename, "I won't read compressed data from a terminal.", 0 ); - close( infd ); set_retval( retval, 1 ); + close( infd ); set_retval( retval, 2 ); if( program_mode != m_test ) cleanup_and_fail( *retval ); return false; } return true; @@ -594,8 +662,8 @@ static void close_and_set_permissions( const struct stat * const in_statsp ) } -/* Returns the number of bytes really read. - If (returned value < size) and (errno == 0), means EOF was reached. +/* Return the number of bytes really read. + If (value returned < size) and (errno == 0), means EOF was reached. */ static int readblock( const int fd, uint8_t * const buf, const int size ) { @@ -613,8 +681,8 @@ static int readblock( const int fd, uint8_t * const buf, const int size ) } -/* Returns the number of bytes really written. - If (returned value < size), it is always an error. +/* Return the number of bytes really written. + If (value returned < size), it is always an error. */ static int writeblock( const int fd, const uint8_t * const buf, const int size ) { @@ -659,7 +727,7 @@ static int do_compress( struct LZ_Encoder * const encoder, while( true ) { - int in_size = 0, out_size; + int in_size = 0; while( LZ_compress_write_size( encoder ) > 0 ) { const int size = min( LZ_compress_write_size( encoder ), buffer_size ); @@ -675,7 +743,7 @@ static int do_compress( struct LZ_Encoder * const encoder, /* else LZ_compress_sync_flush( encoder ); */ in_size += rd; } - out_size = LZ_compress_read( encoder, buffer, buffer_size ); + const int out_size = LZ_compress_read( encoder, buffer, buffer_size ); if( out_size < 0 ) { Pp_show_msg( pp, 0 ); @@ -843,7 +911,7 @@ static int do_decompress( struct LZ_Decoder * const decoder, const int infd, fputs( testing ? "ok\n" : "done\n", stderr ); Pp_reset( pp ); } } - first_member = false; + first_member = false; /* member decompressed successfully */ } if( rd <= 0 ) break; } @@ -985,23 +1053,15 @@ int main( const int argc, const char * const argv[] ) unsigned long long member_size = max_member_size; unsigned long long volume_size = 0; const char * default_output_filename = ""; - static struct Arg_parser parser; /* static because valgrind complains */ - static struct Pretty_print pp; /* and memory management in C sucks */ - static const char ** filenames = 0; - int num_filenames = 0; enum Mode program_mode = m_compress; - int argind = 0; - int failed_tests = 0; - int retval = 0; int i; - bool filenames_given = false; bool force = false; bool ignore_trailing = true; bool keep_input_files = false; bool loose_trailing = false; bool recompress = false; - bool stdin_used = false; bool to_stdout = false; + if( argc > 0 ) invocation_name = argv[0]; enum { opt_chk = 256, opt_lt }; const struct ap_Option options[] = @@ -1037,25 +1097,27 @@ int main( const int argc, const char * const argv[] ) { opt_lt, "loose-trailing", ap_no }, { 0, 0, ap_no } }; - if( argc > 0 ) invocation_name = argv[0]; - + /* static because valgrind complains and memory management in C sucks */ + static struct Arg_parser parser; if( !ap_init( &parser, argc, argv, options, 0 ) ) { show_error( mem_msg, 0, false ); return 1; } if( ap_error( &parser ) ) /* bad option */ { show_error( ap_error( &parser ), 0, true ); return 1; } + int argind = 0; for( ; argind < ap_arguments( &parser ); ++argind ) { const int code = ap_code( &parser, argind ); - const char * const arg = ap_argument( &parser, argind ); if( !code ) break; /* no more options */ + const char * const pn = ap_parsed_name( &parser, argind ); + const char * const arg = ap_argument( &parser, argind ); switch( code ) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': encoder_options = option_mapping[code-'0']; break; case 'a': ignore_trailing = false; break; - case 'b': member_size = getnum( arg, 100000, max_member_size ); break; + case 'b': member_size = getnum( arg, pn, 100000, max_member_size ); break; case 'c': to_stdout = true; break; case 'd': set_mode( &program_mode, m_decompress ); break; case 'f': force = true; break; @@ -1063,15 +1125,15 @@ int main( const int argc, const char * const argv[] ) case 'h': show_help(); return 0; case 'k': keep_input_files = true; break; case 'm': encoder_options.match_len_limit = - getnum( arg, LZ_min_match_len_limit(), - LZ_max_match_len_limit() ); break; + getnum( arg, pn, LZ_min_match_len_limit(), + LZ_max_match_len_limit() ); break; case 'n': break; case 'o': if( strcmp( arg, "-" ) == 0 ) to_stdout = true; else { default_output_filename = arg; } break; case 'q': verbosity = -1; break; - case 's': encoder_options.dictionary_size = get_dict_size( arg ); + case 's': encoder_options.dictionary_size = get_dict_size( arg, pn ); break; - case 'S': volume_size = getnum( arg, 100000, max_volume_size ); break; + case 'S': volume_size = getnum( arg, pn, 100000, max_volume_size ); break; case 't': set_mode( &program_mode, m_test ); break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; @@ -1096,15 +1158,17 @@ int main( const int argc, const char * const argv[] ) if( strcmp( LZ_version_string, LZ_version() ) != 0 ) show_error( "warning: wrong library version_string. Try --check-lib.", 0, false ); -#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); #endif - num_filenames = max( 1, ap_arguments( &parser ) - argind ); + static const char ** filenames = 0; + int num_filenames = max( 1, ap_arguments( &parser ) - argind ); filenames = resize_buffer( filenames, num_filenames * sizeof filenames[0] ); filenames[0] = "-"; + bool filenames_given = false; for( i = 0; argind + i < ap_arguments( &parser ); ++i ) { filenames[i] = ap_argument( &parser, argind + i ); @@ -1133,16 +1197,18 @@ int main( const int argc, const char * const argv[] ) if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) ) set_signals( signal_handler ); + static struct Pretty_print pp; Pp_init( &pp, filenames, num_filenames ); + int failed_tests = 0; + int retval = 0; const bool one_to_one = !to_stdout && program_mode != m_test && !to_file; + bool stdin_used = false; for( i = 0; i < num_filenames; ++i ) { const char * input_filename = ""; int infd; - int tmp; struct stat in_stats; - const struct stat * in_statsp; Pp_set_name( &pp, filenames[i] ); if( strcmp( filenames[i], "-" ) == 0 ) @@ -1184,7 +1250,9 @@ int main( const int argc, const char * const argv[] ) return 1; /* check tty only once and don't try to delete a tty */ } - in_statsp = ( input_filename[0] && one_to_one ) ? &in_stats : 0; + const struct stat * const in_statsp = + ( input_filename[0] && one_to_one ) ? &in_stats : 0; + int tmp; if( program_mode == m_compress ) tmp = compress( member_size, volume_size, infd, &encoder_options, &pp, in_statsp ); diff --git a/testsuite/check.sh b/testsuite/check.sh index 44e2428..e93697e 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lzlib - Compression library for the lzip format -# Copyright (C) 2009-2021 Antonio Diaz Diaz. +# Copyright (C) 2009-2022 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute, and modify it. @@ -39,7 +39,8 @@ fox_lz="${testdir}"/fox.lz fail=0 test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } -"${LZIP}" --check-lib # just print warning +"${LZIP}" --check-lib # just print warning +[ $? != 2 ] || { test_failed $LINENO ; exit 2 ; } # unless bad lzlib.h printf "testing lzlib-%s..." "$2" "${LZIP}" -fkqm4 in @@ -99,6 +100,7 @@ done printf "LZIP\001-.............................." | "${LZIP}" -t 2> /dev/null printf "LZIP\002-.............................." | "${LZIP}" -t 2> /dev/null printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null +rm -f out || framework_failure printf "\ntesting decompression..." @@ -118,25 +120,28 @@ done lines=$("${LZIP}" -tvv "${in_em}" 2>&1 | wc -l) || test_failed $LINENO [ "${lines}" -eq 8 ] || test_failed $LINENO "${lines}" +"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO cat "${in_lz}" > copy.lz || framework_failure "${LZIP}" -dk copy.lz || test_failed $LINENO cmp in copy || test_failed $LINENO -printf "to be overwritten" > copy || framework_failure -"${LZIP}" -d copy.lz 2> /dev/null +cat fox > copy || framework_failure +cat "${in_lz}" > out.lz || framework_failure +rm -f out || framework_failure +"${LZIP}" -d copy.lz out.lz 2> /dev/null # skip copy, decompress out [ $? = 1 ] || test_failed $LINENO +cmp fox copy || test_failed $LINENO +cmp in out || test_failed $LINENO "${LZIP}" -df copy.lz || test_failed $LINENO [ ! -e copy.lz ] || test_failed $LINENO cmp in copy || test_failed $LINENO +rm -f copy out || framework_failure -rm -f copy || framework_failure cat "${in_lz}" > copy.lz || framework_failure "${LZIP}" -d -S100k copy.lz || test_failed $LINENO # ignore -S [ ! -e copy.lz ] || test_failed $LINENO cmp in copy || test_failed $LINENO printf "to be overwritten" > copy || framework_failure -"${LZIP}" -d -o copy < "${in_lz}" 2> /dev/null -[ $? = 1 ] || test_failed $LINENO "${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO cmp in copy || test_failed $LINENO rm -f out copy || framework_failure @@ -160,7 +165,7 @@ rm -f copy anyothername.out || framework_failure [ $? = 1 ] || test_failed $LINENO "${LZIP}" -cdq in "${in_lz}" > copy [ $? = 2 ] || test_failed $LINENO -cat copy in | cmp in - || test_failed $LINENO +cat copy in | cmp in - || test_failed $LINENO # copy must be empty "${LZIP}" -cdq nx_file.lz "${in_lz}" > copy [ $? = 1 ] || test_failed $LINENO cmp in copy || test_failed $LINENO @@ -381,7 +386,6 @@ for i in fox_v2.lz fox_s11.lz fox_de20.lz \ [ $? = 2 ] || test_failed $LINENO $i done -"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO for i in fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do "${LZIP}" -cdq "${testdir}"/$i > out [ $? = 2 ] || test_failed $LINENO $i |