summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog17
-rw-r--r--INSTALL4
-rw-r--r--Makefile.in23
-rw-r--r--NEWS97
-rw-r--r--README14
-rw-r--r--bbexample.c34
-rw-r--r--carg_parser.c110
-rw-r--r--carg_parser.h14
-rw-r--r--cbuffer.c10
-rwxr-xr-xconfigure16
-rw-r--r--decoder.c21
-rw-r--r--decoder.h123
-rw-r--r--doc/lzlib.info243
-rw-r--r--doc/lzlib.texi244
-rw-r--r--doc/minilzip.135
-rw-r--r--encoder.c127
-rw-r--r--encoder.h5
-rw-r--r--encoder_base.c23
-rw-r--r--encoder_base.h22
-rw-r--r--fast_encoder.c35
-rw-r--r--fast_encoder.h2
-rw-r--r--ffexample.c12
-rw-r--r--lzcheck.c11
-rw-r--r--lzip.h3
-rw-r--r--lzlib.c2
-rw-r--r--lzlib.h6
-rw-r--r--minilzip.c (renamed from main.c)256
-rwxr-xr-xtestsuite/check.sh22
28 files changed, 807 insertions, 724 deletions
diff --git a/ChangeLog b/ChangeLog
index e38a855..8d7da96 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2022-01-23 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.13 released.
+ * Set variables AR and ARFLAGS from configure.
+ (Reported by Hoël Bézier).
+ * main.c: Rename to minilzip.c.
+ * minilzip.c (getnum): Show option name and valid range if error.
+ (check_lib): Check that LZ_API_VERSION and LZ_version_string match.
+ * Improve several descriptions in manual, '--help', and man page.
+ * lzlib.texi: Change GNU Texinfo category to 'Compression'.
+ (Reported by Alfred M. Szmidt).
+
2021-01-02 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.12 released.
@@ -48,7 +60,7 @@
* main.c: Compile on DOS with DJGPP.
* lzlib.texi: Improve descriptions of '-0..-9', '-m', and '-s'.
Document that 'LZ_(de)compress_finish' can be called repeatedly.
- * configure: Accept appending to CFLAGS, 'CFLAGS+=OPTIONS'.
+ * configure: Accept appending to CFLAGS; 'CFLAGS+=OPTIONS'.
* Makefile.in: Rename targets 'install-bin*' to 'install-lib*'.
* Makefile.in: Targets 'install-bin*' now install minilzip.
* INSTALL: Document use of CFLAGS+='-D __USE_MINGW_ANSI_STDIO'.
@@ -57,6 +69,7 @@
* Version 1.10 released.
* LZ_compress_finish now adjusts dictionary size for each member.
+ (Older versions can adjust dictionary size only once).
* lzlib.c (LZ_decompress_read): Detect corrupt header with HD=3.
* main.c: New option '--loose-trailing'.
* main.c (main): Option '-S, --volume-size' now keeps input files.
@@ -235,7 +248,7 @@
* Version 0.1 released.
-Copyright (C) 2009-2021 Antonio Diaz Diaz.
+Copyright (C) 2009-2022 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute, and
diff --git a/INSTALL b/INSTALL
index a9a870e..f3b0b94 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,7 +1,7 @@
Requirements
------------
You will need a C99 compiler. (gcc 3.3.6 or newer is recommended).
-I use gcc 6.1.0 and 4.1.2, but the code should compile with any standards
+I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards
compliant compiler.
Gcc is available at http://gcc.gnu.org.
@@ -74,7 +74,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above.
-Copyright (C) 2009-2021 Antonio Diaz Diaz.
+Copyright (C) 2009-2022 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute, and modify it.
diff --git a/Makefile.in b/Makefile.in
index 94e3770..81b404b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1,6 +1,5 @@
DISTNAME = $(pkgname)-$(pkgversion)
-AR = ar
INSTALL = install
INSTALL_PROGRAM = $(INSTALL) -m 755
INSTALL_DATA = $(INSTALL) -m 644
@@ -9,7 +8,7 @@ LDCONFIG = /sbin/ldconfig
SHELL = /bin/sh
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
-objs = carg_parser.o main.o
+objs = carg_parser.o minilzip.o
.PHONY : all install install-bin install-info install-man \
@@ -24,27 +23,27 @@ objs = carg_parser.o main.o
all : $(progname_static) $(progname_shared)
lib$(libname).a : lzlib.o
- $(AR) -rcs $@ $<
+ $(AR) $(ARFLAGS) $@ $<
lib$(libname).so.$(pkgversion) : lzlib_sh.o
- $(CC) $(LDFLAGS) $(CFLAGS) -fpic -fPIC -shared -Wl,--soname=lib$(libname).so.$(soversion) -o $@ $<
+ $(CC) $(CFLAGS) $(LDFLAGS) -fpic -fPIC -shared -Wl,--soname=lib$(libname).so.$(soversion) -o $@ $<
$(progname) : $(objs) lib$(libname).a
- $(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(objs) lib$(libname).a
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs) lib$(libname).a
$(progname)_shared : $(objs) lib$(libname).so.$(pkgversion)
- $(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(objs) lib$(libname).so.$(pkgversion)
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs) lib$(libname).so.$(pkgversion)
bbexample : bbexample.o lib$(libname).a
- $(CC) $(LDFLAGS) $(CFLAGS) -o $@ bbexample.o lib$(libname).a
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ bbexample.o lib$(libname).a
ffexample : ffexample.o lib$(libname).a
- $(CC) $(LDFLAGS) $(CFLAGS) -o $@ ffexample.o lib$(libname).a
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ ffexample.o lib$(libname).a
lzcheck : lzcheck.o lib$(libname).a
- $(CC) $(LDFLAGS) $(CFLAGS) -o $@ lzcheck.o lib$(libname).a
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ lzcheck.o lib$(libname).a
-main.o : main.c
+minilzip.o : minilzip.c
$(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
lzlib_sh.o : lzlib.c
@@ -60,7 +59,7 @@ $(objs) : Makefile
carg_parser.o : carg_parser.h
lzlib.o : Makefile $(lzdeps)
lzlib_sh.o : Makefile $(lzdeps)
-main.o : carg_parser.h lzlib.h
+minilzip.o : carg_parser.h lzlib.h
bbexample.o : Makefile lzlib.h
ffexample.o : Makefile lzlib.h
lzcheck.o : Makefile lzlib.h
@@ -76,7 +75,7 @@ $(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi
man : $(VPATH)/doc/$(progname).1
$(VPATH)/doc/$(progname).1 : $(progname)
- help2man -n 'reduces the size of files' -o $@ --no-info ./$(progname)
+ help2man -n 'reduces the size of files' -o $@ --info-page=$(pkgname) ./$(progname)
Makefile : $(VPATH)/configure $(VPATH)/Makefile.in
./config.status
diff --git a/NEWS b/NEWS
index 7b01d52..e2ceb38 100644
--- a/NEWS
+++ b/NEWS
@@ -1,92 +1,15 @@
-Changes in version 1.12:
+Changes in version 1.13:
-The value of the version test macro 'LZ_API_VERSION' is now defined as
-1000 * major + minor. For version 1.12 it is 1012.
-This change does not affect the soversion.
+The variables AR and ARFLAGS can now be set from configure. (Before you
+needed to run 'make AR=<ar_command>'. (Reported by Hoël Bézier).
-The new function 'LZ_api_version', which returns the LZ_API_VERSION of the
-library object code being used, has been added to lzlib.
+In case of error in a numerical argument to a command line option, minilzip
+now shows the name of the option and the range of valid values.
-If end of file is found at member trailer or EOS marker,
-'LZ_decompress_errno' now returns 'LZ_unexpected_eof' instead of
-'LZ_data_error'.
+'minilzip --check-lib' now checks that LZ_API_VERSION and LZ_version_string
+match.
-Decompression speed has been slightly increased.
+Several descriptions have been improved in manual, '--help', and man page.
-A bug has been fixed in minilzip that falsely reported a library stall when
-decompressing a file with empty members.
-
-The new option '--check-lib', which compares the version of lzlib used to
-compile minilzip with the version actually being used, has been added to
-minilzip.
-
-Minilzip now reports an error if a file name is empty (minilzip -t "").
-
-Option '-o, --output' now behaves like '-c, --stdout', but sending the
-output unconditionally to a file instead of to standard output. See the new
-description of '-o' in the manual. This change is not backwards compatible.
-Therefore commands like:
- minilzip -o foo.lz - bar < foo
-must now be split into:
- minilzip -o foo.lz - < foo
- minilzip bar
-or rewritten as:
- minilzip - bar < foo > foo.lz
-
-When using '-c' or '-o', minilzip now checks whether the output is a
-terminal only once.
-
-Minilzip now does not even open the output file if the input file is a terminal.
-
-The words 'decompressed' and 'compressed' have been replaced with the
-shorter 'out' and 'in' in the verbose output of minilzip when decompressing
-or testing.
-
-It has been documented in the manual that 'LZ_(de)compress_close' and
-'LZ_(de)compress_errno' can be called with a null argument.
-
-It has been documented in the manual that the LZMA marker '3' ("Sync Flush"
-marker) is not allowed in lzip files. Marker '3' is a device for interactive
-communication between applications using lzlib, but is useless and wasteful
-in a file, and is excluded from the media type 'application/lzip'. The LZMA
-marker '2' ("End Of Stream" marker) is the only marker allowed in lzip
-files.
-
-It has been documented in the manual that not calling 'LZ_decompress_finish'
-prevents lzlib from detecting a truncated member.
-
-It has been documented in the manual that 'LZ_decompress_read' returns at
-least once per member so that 'LZ_decompress_member_finished' can be called
-(and trailer data retrieved) for each member, even for empty members.
-Therefore, 'LZ_decompress_read' returning 0 does not mean that the end of
-the stream has been reached.
-
-It has been documented in the manual that 'LZ_(de)compress_read' can be
-called with a null buffer pointer argument.
-
-Real code examples for the most common uses of the library have been added
-to the tutorial.
-
-'bbexample.c' has been simplified to not use 'LZ_(de)compress_write_size'.
-
-'lzcheck' now accepts options '-s' (to check LZ_compress_sync_flush) and
-'-m' (to check member by member decompression).
-
-'lzcheck.c' now also tests member by member decompression without
-intermediate calls to 'LZ_decompress_finish'.
-
-The new file 'ffexample.c', containing example functions for file-to-file
-compression/decompression, has been added to the distribution.
-
-The commands needed to extract files from a tar.lz archive have been
-documented in the output of 'minilzip --help' and in the man page.
-
-'make install-bin' no longer installs the minilzip man page. This is to
-prevent 'make install-bin install-man-compress' from installing the man page
-twice before compressing it.
-
-The new targets 'install-bin-compress' and 'install-bin-strip-compress',
-which install a (stripped) minilzip and a compressed man page, have been
-added to the Makefile.
-
-9 new test files have been added to the testsuite.
+The texinfo category of the manual has been changed from 'Data Compression'
+to 'Compression' to match that of gzip. (Reported by Alfred M. Szmidt).
diff --git a/README b/README
index ee6e6d7..86a0cb5 100644
--- a/README
+++ b/README
@@ -31,9 +31,13 @@ the beginning is a thing of the past.
The functions and variables forming the interface of the compression library
are declared in the file 'lzlib.h'. Usage examples of the library are given
-in the files 'bbexample.c', 'ffexample.c', and 'main.c' from the source
+in the files 'bbexample.c', 'ffexample.c', and 'minilzip.c' from the source
distribution.
+All the library functions are thread safe. The library does not install any
+signal handler. The decoder checks the consistency of the compressed data,
+so the library should never crash even in case of corrupted input.
+
Compression/decompression is done by repeatedly calling a couple of
read/write functions until all the data have been processed by the library.
This interface is safer and less error prone than the traditional zlib
@@ -60,10 +64,6 @@ Lzlib is able to compress and decompress streams of unlimited size by
automatically creating multimember output. The members so created are large,
about 2 PiB each.
-All the library functions are thread safe. The library does not install
-any signal handler. The decoder checks the consistency of the compressed
-data, so the library should never crash even in case of corrupted input.
-
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
concrete algorithm; it is more like "any algorithm using the LZMA coding
scheme". For example, the option '-0' of lzip uses the scheme in almost the
@@ -73,7 +73,7 @@ finding coding sequences of minimum size than the one currently used by lzip
could be developed, and the resulting sequence could also be coded using the
LZMA coding scheme.
-Lzlib currently implements two variants of the LZMA algorithm; fast (used by
+Lzlib currently implements two variants of the LZMA algorithm: fast (used by
option '-0' of minilzip) and normal (used by all other compression levels).
The high compression of LZMA comes from combining two basic, well-proven
@@ -93,7 +93,7 @@ been compressed. Decompressed is used to refer to data which have undergone
the process of decompression.
-Copyright (C) 2009-2021 Antonio Diaz Diaz.
+Copyright (C) 2009-2022 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute, and modify it.
diff --git a/bbexample.c b/bbexample.c
index 96257ee..074f7ae 100644
--- a/bbexample.c
+++ b/bbexample.c
@@ -1,5 +1,5 @@
/* Buffer to buffer example - Test program for the library lzlib
- Copyright (C) 2010-2021 Antonio Diaz Diaz.
+ Copyright (C) 2010-2022 Antonio Diaz Diaz.
This program is free software: you have unlimited permission
to copy, distribute, and modify it.
@@ -28,9 +28,9 @@
#endif
-/* Returns the address of a malloc'd buffer containing the file data and
+/* Return the address of a malloc'd buffer containing the file data and
the file size in '*file_sizep'.
- In case of error, returns 0 and does not modify '*file_sizep'.
+ In case of error, return 0 and do not modify '*file_sizep'.
*/
uint8_t * read_file( const char * const name, long * const file_sizep )
{
@@ -73,10 +73,10 @@ uint8_t * read_file( const char * const name, long * const file_sizep )
}
-/* Compresses 'insize' bytes from 'inbuf'.
- Returns the address of a malloc'd buffer containing the compressed data,
+/* Compress 'insize' bytes from 'inbuf'.
+ Return the address of a malloc'd buffer containing the compressed data,
and the size of the data in '*outlenp'.
- In case of error, returns 0 and does not modify '*outlenp'.
+ In case of error, return 0 and do not modify '*outlenp'.
*/
uint8_t * bbcompressl( const uint8_t * const inbuf, const long insize,
const int level, long * const outlenp )
@@ -152,10 +152,10 @@ uint8_t * bbcompressl( const uint8_t * const inbuf, const long insize,
}
-/* Decompresses 'insize' bytes from 'inbuf'.
- Returns the address of a malloc'd buffer containing the decompressed
+/* Decompress 'insize' bytes from 'inbuf'.
+ Return the address of a malloc'd buffer containing the decompressed
data, and the size of the data in '*outlenp'.
- In case of error, returns 0 and does not modify '*outlenp'.
+ In case of error, return 0 and do not modify '*outlenp'.
*/
uint8_t * bbdecompressl( const uint8_t * const inbuf, const long insize,
long * const outlenp )
@@ -230,10 +230,10 @@ int full_test( const uint8_t * const inbuf, const long insize )
}
-/* Compresses 'insize' bytes from 'inbuf' to 'outbuf'.
- Returns the size of the compressed data in '*outlenp'.
- In case of error, or if 'outsize' is too small, returns false and does
- not modify '*outlenp'.
+/* Compress 'insize' bytes from 'inbuf' to 'outbuf'.
+ Return the size of the compressed data in '*outlenp'.
+ In case of error, or if 'outsize' is too small, return false and do not
+ modify '*outlenp'.
*/
bool bbcompress( const uint8_t * const inbuf, const int insize,
const int dictionary_size, const int match_len_limit,
@@ -267,10 +267,10 @@ bool bbcompress( const uint8_t * const inbuf, const int insize,
}
-/* Decompresses 'insize' bytes from 'inbuf' to 'outbuf'.
- Returns the size of the decompressed data in '*outlenp'.
- In case of error, or if 'outsize' is too small, returns false and does
- not modify '*outlenp'.
+/* Decompress 'insize' bytes from 'inbuf' to 'outbuf'.
+ Return the size of the decompressed data in '*outlenp'.
+ In case of error, or if 'outsize' is too small, return false and do not
+ modify '*outlenp'.
*/
bool bbdecompress( const uint8_t * const inbuf, const int insize,
uint8_t * const outbuf, const int outsize,
diff --git a/carg_parser.c b/carg_parser.c
index d0c05d5..181ba23 100644
--- a/carg_parser.c
+++ b/carg_parser.c
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
- Copyright (C) 2006-2021 Antonio Diaz Diaz.
+ Copyright (C) 2006-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -32,10 +32,10 @@ static void * ap_resize_buffer( void * buf, const int min_size )
}
-static char push_back_record( struct Arg_parser * const ap,
- const int code, const char * const argument )
+static char push_back_record( struct Arg_parser * const ap, const int code,
+ const char * const long_name,
+ const char * const argument )
{
- const int len = strlen( argument );
struct ap_Record * p;
void * tmp = ap_resize_buffer( ap->data,
( ap->data_size + 1 ) * sizeof (struct ap_Record) );
@@ -43,11 +43,29 @@ static char push_back_record( struct Arg_parser * const ap,
ap->data = (struct ap_Record *)tmp;
p = &(ap->data[ap->data_size]);
p->code = code;
- p->argument = 0;
- tmp = ap_resize_buffer( p->argument, len + 1 );
- if( !tmp ) return 0;
- p->argument = (char *)tmp;
- strncpy( p->argument, argument, len + 1 );
+ if( long_name )
+ {
+ const int len = strlen( long_name );
+ p->parsed_name = (char *)malloc( len + 2 + 1 );
+ if( !p->parsed_name ) return 0;
+ p->parsed_name[0] = p->parsed_name[1] = '-';
+ strncpy( p->parsed_name + 2, long_name, len + 1 );
+ }
+ else if( code > 0 && code < 256 )
+ {
+ p->parsed_name = (char *)malloc( 2 + 1 );
+ if( !p->parsed_name ) return 0;
+ p->parsed_name[0] = '-'; p->parsed_name[1] = code; p->parsed_name[2] = 0;
+ }
+ else p->parsed_name = 0;
+ if( argument )
+ {
+ const int len = strlen( argument );
+ p->argument = (char *)malloc( len + 1 );
+ if( !p->argument ) { free( p->parsed_name ); return 0; }
+ strncpy( p->argument, argument, len + 1 );
+ }
+ else p->argument = 0;
++ap->data_size;
return 1;
}
@@ -68,12 +86,14 @@ static char add_error( struct Arg_parser * const ap, const char * const msg )
static void free_data( struct Arg_parser * const ap )
{
int i;
- for( i = 0; i < ap->data_size; ++i ) free( ap->data[i].argument );
+ for( i = 0; i < ap->data_size; ++i )
+ { free( ap->data[i].argument ); free( ap->data[i].parsed_name ); }
if( ap->data ) { free( ap->data ); ap->data = 0; }
ap->data_size = 0;
}
+/* Return 0 only if out of memory. */
static char parse_long_option( struct Arg_parser * const ap,
const char * const opt, const char * const arg,
const struct ap_Option options[],
@@ -87,9 +107,10 @@ static char parse_long_option( struct Arg_parser * const ap,
/* Test all long options for either exact match or abbreviated matches. */
for( i = 0; options[i].code != 0; ++i )
- if( options[i].name && strncmp( options[i].name, &opt[2], len ) == 0 )
+ if( options[i].long_name &&
+ strncmp( options[i].long_name, &opt[2], len ) == 0 )
{
- if( strlen( options[i].name ) == len ) /* Exact match found */
+ if( strlen( options[i].long_name ) == len ) /* Exact match found */
{ index = i; exact = 1; break; }
else if( index < 0 ) index = i; /* First nonexact match found */
else if( options[index].code != options[i].code ||
@@ -117,35 +138,39 @@ static char parse_long_option( struct Arg_parser * const ap,
{
if( options[index].has_arg == ap_no )
{
- add_error( ap, "option '--" ); add_error( ap, options[index].name );
+ add_error( ap, "option '--" ); add_error( ap, options[index].long_name );
add_error( ap, "' doesn't allow an argument" );
return 1;
}
if( options[index].has_arg == ap_yes && !opt[len+3] )
{
- add_error( ap, "option '--" ); add_error( ap, options[index].name );
+ add_error( ap, "option '--" ); add_error( ap, options[index].long_name );
add_error( ap, "' requires an argument" );
return 1;
}
- return push_back_record( ap, options[index].code, &opt[len+3] );
+ return push_back_record( ap, options[index].code,
+ options[index].long_name, &opt[len+3] );
}
if( options[index].has_arg == ap_yes )
{
if( !arg || !arg[0] )
{
- add_error( ap, "option '--" ); add_error( ap, options[index].name );
+ add_error( ap, "option '--" ); add_error( ap, options[index].long_name );
add_error( ap, "' requires an argument" );
return 1;
}
++*argindp;
- return push_back_record( ap, options[index].code, arg );
+ return push_back_record( ap, options[index].code,
+ options[index].long_name, arg );
}
- return push_back_record( ap, options[index].code, "" );
+ return push_back_record( ap, options[index].code,
+ options[index].long_name, 0 );
}
+/* Return 0 only if out of memory. */
static char parse_short_option( struct Arg_parser * const ap,
const char * const opt, const char * const arg,
const struct ap_Option options[],
@@ -156,13 +181,13 @@ static char parse_short_option( struct Arg_parser * const ap,
while( cind > 0 )
{
int index = -1, i;
- const unsigned char code = opt[cind];
+ const unsigned char c = opt[cind];
char code_str[2];
- code_str[0] = code; code_str[1] = 0;
+ code_str[0] = c; code_str[1] = 0;
- if( code != 0 )
+ if( c != 0 )
for( i = 0; options[i].code; ++i )
- if( code == options[i].code )
+ if( c == options[i].code )
{ index = i; break; }
if( index < 0 )
@@ -176,7 +201,7 @@ static char parse_short_option( struct Arg_parser * const ap,
if( options[index].has_arg != ap_no && cind > 0 && opt[cind] )
{
- if( !push_back_record( ap, code, &opt[cind] ) ) return 0;
+ if( !push_back_record( ap, c, 0, &opt[cind] ) ) return 0;
++*argindp; cind = 0;
}
else if( options[index].has_arg == ap_yes )
@@ -188,9 +213,9 @@ static char parse_short_option( struct Arg_parser * const ap,
return 1;
}
++*argindp; cind = 0;
- if( !push_back_record( ap, code, arg ) ) return 0;
+ if( !push_back_record( ap, c, 0, arg ) ) return 0;
}
- else if( !push_back_record( ap, code, "" ) ) return 0;
+ else if( !push_back_record( ap, c, 0, 0 ) ) return 0;
}
return 1;
}
@@ -203,7 +228,7 @@ char ap_init( struct Arg_parser * const ap,
const char ** non_options = 0; /* skipped non-options */
int non_options_size = 0; /* number of skipped non-options */
int argind = 1; /* index in argv */
- int i;
+ char done = 0; /* false until success */
ap->data = 0;
ap->error = 0;
@@ -223,20 +248,20 @@ char ap_init( struct Arg_parser * const ap,
if( ch2 == '-' )
{
if( !argv[argind][2] ) { ++argind; break; } /* we found "--" */
- else if( !parse_long_option( ap, opt, arg, options, &argind ) ) return 0;
+ else if( !parse_long_option( ap, opt, arg, options, &argind ) ) goto out;
}
- else if( !parse_short_option( ap, opt, arg, options, &argind ) ) return 0;
+ else if( !parse_short_option( ap, opt, arg, options, &argind ) ) goto out;
if( ap->error ) break;
}
else
{
if( in_order )
- { if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; }
+ { if( !push_back_record( ap, 0, 0, argv[argind++] ) ) goto out; }
else
{
void * tmp = ap_resize_buffer( non_options,
( non_options_size + 1 ) * sizeof *non_options );
- if( !tmp ) return 0;
+ if( !tmp ) goto out;
non_options = (const char **)tmp;
non_options[non_options_size++] = argv[argind++];
}
@@ -245,13 +270,15 @@ char ap_init( struct Arg_parser * const ap,
if( ap->error ) free_data( ap );
else
{
+ int i;
for( i = 0; i < non_options_size; ++i )
- if( !push_back_record( ap, 0, non_options[i] ) ) return 0;
+ if( !push_back_record( ap, 0, 0, non_options[i] ) ) goto out;
while( argind < argc )
- if( !push_back_record( ap, 0, argv[argind++] ) ) return 0;
+ if( !push_back_record( ap, 0, 0, argv[argind++] ) ) goto out;
}
- if( non_options ) free( non_options );
- return 1;
+ done = 1;
+out: if( non_options ) free( non_options );
+ return done;
}
@@ -273,13 +300,20 @@ int ap_arguments( const struct Arg_parser * const ap )
int ap_code( const struct Arg_parser * const ap, const int i )
{
- if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].code;
- else return 0;
+ if( i < 0 || i >= ap_arguments( ap ) ) return 0;
+ return ap->data[i].code;
+ }
+
+
+const char * ap_parsed_name( const struct Arg_parser * const ap, const int i )
+ {
+ if( i < 0 || i >= ap_arguments( ap ) || !ap->data[i].parsed_name ) return "";
+ return ap->data[i].parsed_name;
}
const char * ap_argument( const struct Arg_parser * const ap, const int i )
{
- if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].argument;
- else return "";
+ if( i < 0 || i >= ap_arguments( ap ) || !ap->data[i].argument ) return "";
+ return ap->data[i].argument;
}
diff --git a/carg_parser.h b/carg_parser.h
index c5f2352..0c64861 100644
--- a/carg_parser.h
+++ b/carg_parser.h
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
- Copyright (C) 2006-2021 Antonio Diaz Diaz.
+ Copyright (C) 2006-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -24,9 +24,9 @@
message.
'options' is an array of 'struct ap_Option' terminated by an element
- containing a code which is zero. A null name means a short-only
- option. A code value outside the unsigned char range means a
- long-only option.
+ containing a code which is zero. A null long_name means a short-only
+ option. A code value outside the unsigned char range means a long-only
+ option.
Arg_parser normally makes it appear as if all the option arguments
were specified before all the non-option arguments for the purposes
@@ -50,7 +50,7 @@ enum ap_Has_arg { ap_no, ap_yes, ap_maybe };
struct ap_Option
{
int code; /* Short option letter or code ( code != 0 ) */
- const char * name; /* Long option name (maybe null) */
+ const char * long_name; /* Long option name (maybe null) */
enum ap_Has_arg has_arg;
};
@@ -58,6 +58,7 @@ struct ap_Option
struct ap_Record
{
int code;
+ char * parsed_name;
char * argument;
};
@@ -86,6 +87,9 @@ int ap_arguments( const struct Arg_parser * const ap );
Else ap_argument( i ) is the option's argument (or empty). */
int ap_code( const struct Arg_parser * const ap, const int i );
+/* Full name of the option parsed (short or long). */
+const char * ap_parsed_name( const struct Arg_parser * const ap, const int i );
+
const char * ap_argument( const struct Arg_parser * const ap, const int i );
#ifdef __cplusplus
diff --git a/cbuffer.c b/cbuffer.c
index ee54131..812de42 100644
--- a/cbuffer.c
+++ b/cbuffer.c
@@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
- Copyright (C) 2009-2021 Antonio Diaz Diaz.
+ Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -76,9 +76,9 @@ static bool Cb_unread_data( struct Circular_buffer * const cb,
}
-/* Copies up to 'out_size' bytes to 'out_buffer' and updates 'get'.
+/* Copy up to 'out_size' bytes to 'out_buffer' and update 'get'.
If 'out_buffer' is null, the bytes are discarded.
- Returns the number of bytes copied or discarded.
+ Return the number of bytes copied or discarded.
*/
static unsigned Cb_read_data( struct Circular_buffer * const cb,
uint8_t * const out_buffer,
@@ -110,8 +110,8 @@ static unsigned Cb_read_data( struct Circular_buffer * const cb,
}
-/* Copies up to 'in_size' bytes from 'in_buffer' and updates 'put'.
- Returns the number of bytes copied.
+/* Copy up to 'in_size' bytes from 'in_buffer' and update 'put'.
+ Return the number of bytes copied.
*/
static unsigned Cb_write_data( struct Circular_buffer * const cb,
const uint8_t * const in_buffer,
diff --git a/configure b/configure
index d74cf5d..4060472 100755
--- a/configure
+++ b/configure
@@ -1,12 +1,12 @@
#! /bin/sh
# configure script for Lzlib - Compression library for the lzip format
-# Copyright (C) 2009-2021 Antonio Diaz Diaz.
+# Copyright (C) 2009-2022 Antonio Diaz Diaz.
#
# This configure script is free software: you have unlimited permission
# to copy, distribute, and modify it.
pkgname=lzlib
-pkgversion=1.12
+pkgversion=1.13
soversion=1
progname=minilzip
progname_static=${progname}
@@ -29,9 +29,11 @@ infodir='$(datarootdir)/info'
libdir='$(exec_prefix)/lib'
mandir='$(datarootdir)/man'
CC=gcc
+AR=ar
CPPFLAGS=
CFLAGS='-Wall -W -O2'
LDFLAGS=
+ARFLAGS=-rcs
# checking whether we are using GNU C.
/bin/sh -c "${CC} --version" > /dev/null 2>&1 || { CC=cc ; CFLAGS=-O2 ; }
@@ -79,10 +81,12 @@ while [ $# != 0 ] ; do
echo " --enable-shared build also a shared library [disable]"
echo " --disable-ldconfig don't run ldconfig after install"
echo " CC=COMPILER C compiler to use [${CC}]"
+ echo " AR=ARCHIVER library archiver to use [${AR}]"
echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]"
echo " CFLAGS=OPTIONS command line options for the C compiler [${CFLAGS}]"
echo " CFLAGS+=OPTIONS append options to the current value of CFLAGS"
echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]"
+ echo " ARFLAGS=OPTIONS command line options for the library archiver [${ARFLAGS}]"
echo
exit 0 ;;
--version | -V)
@@ -118,10 +122,12 @@ while [ $# != 0 ] ; do
--disable-ldconfig) disable_ldconfig=yes ;;
CC=*) CC=${optarg} ;;
+ AR=*) AR=${optarg} ;;
CPPFLAGS=*) CPPFLAGS=${optarg} ;;
CFLAGS=*) CFLAGS=${optarg} ;;
CFLAGS+=*) CFLAGS="${CFLAGS} ${optarg}" ;;
LDFLAGS=*) LDFLAGS=${optarg} ;;
+ ARFLAGS=*) ARFLAGS=${optarg} ;;
--*)
echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;;
@@ -189,13 +195,15 @@ echo "infodir = ${infodir}"
echo "libdir = ${libdir}"
echo "mandir = ${mandir}"
echo "CC = ${CC}"
+echo "AR = ${AR}"
echo "CPPFLAGS = ${CPPFLAGS}"
echo "CFLAGS = ${CFLAGS}"
echo "LDFLAGS = ${LDFLAGS}"
+echo "ARFLAGS = ${ARFLAGS}"
rm -f Makefile
cat > Makefile << EOF
# Makefile for Lzlib - Compression library for the lzip format
-# Copyright (C) 2009-2021 Antonio Diaz Diaz.
+# Copyright (C) 2009-2022 Antonio Diaz Diaz.
# This file was generated automatically by configure. Don't edit.
#
# This Makefile is free software: you have unlimited permission
@@ -220,9 +228,11 @@ infodir = ${infodir}
libdir = ${libdir}
mandir = ${mandir}
CC = ${CC}
+AR = ${AR}
CPPFLAGS = ${CPPFLAGS}
CFLAGS = ${CFLAGS}
LDFLAGS = ${LDFLAGS}
+ARFLAGS = ${ARFLAGS}
EOF
cat "${srcdir}/Makefile.in" >> Makefile
diff --git a/decoder.c b/decoder.c
index 82472b3..16f6532 100644
--- a/decoder.c
+++ b/decoder.c
@@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
- Copyright (C) 2009-2021 Antonio Diaz Diaz.
+ Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -49,8 +49,6 @@ static int LZd_decode_member( struct LZ_decoder * const d )
while( !Rd_finished( rdec ) )
{
- int len;
- const int pos_state = LZd_data_position( d ) & pos_state_mask;
/* const unsigned mpos = rdec->member_position;
if( mpos - old_mpos > rd_min_available_bytes ) return 5;
old_mpos = mpos; */
@@ -58,23 +56,19 @@ static int LZd_decode_member( struct LZ_decoder * const d )
{ if( !rdec->at_stream_end ) return 0;
if( Cb_empty( &rdec->cb ) ) break; } /* decode until EOF */
if( !LZd_enough_free_bytes( d ) ) return 0;
+ const int pos_state = LZd_data_position( d ) & pos_state_mask;
if( Rd_decode_bit( rdec, &d->bm_match[*state][pos_state] ) == 0 ) /* 1st bit */
{
/* literal byte */
Bit_model * const bm = d->bm_literal[get_lit_state(LZd_peek_prev( d ))];
- if( St_is_char( *state ) )
- {
- *state -= ( *state < 4 ) ? *state : 3;
+ if( ( *state = St_set_char( *state ) ) < 4 )
LZd_put_byte( d, Rd_decode_tree8( rdec, bm ) );
- }
else
- {
- *state -= ( *state < 10 ) ? 3 : 6;
LZd_put_byte( d, Rd_decode_matched( rdec, bm, LZd_peek( d, d->rep0 ) ) );
- }
continue;
}
/* match or repeated match */
+ int len;
if( Rd_decode_bit( rdec, &d->bm_rep[*state] ) != 0 ) /* 2nd bit */
{
if( Rd_decode_bit( rdec, &d->bm_rep0[*state] ) == 0 ) /* 3rd bit */
@@ -100,13 +94,12 @@ static int LZd_decode_member( struct LZ_decoder * const d )
d->rep0 = distance;
}
*state = St_set_rep( *state );
- len = min_match_len + Rd_decode_len( rdec, &d->rep_len_model, pos_state );
+ len = Rd_decode_len( rdec, &d->rep_len_model, pos_state );
}
else /* match */
{
- unsigned distance;
- len = min_match_len + Rd_decode_len( rdec, &d->match_len_model, pos_state );
- distance = Rd_decode_tree6( rdec, d->bm_dis_slot[get_len_state(len)] );
+ len = Rd_decode_len( rdec, &d->match_len_model, pos_state );
+ unsigned distance = Rd_decode_tree6( rdec, d->bm_dis_slot[get_len_state(len)] );
if( distance >= start_dis_model )
{
const unsigned dis_slot = distance;
diff --git a/decoder.h b/decoder.h
index 15d5dfb..27de9cb 100644
--- a/decoder.h
+++ b/decoder.h
@@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
- Copyright (C) 2009-2021 Antonio Diaz Diaz.
+ Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -72,8 +72,8 @@ static inline void Rd_reset( struct Range_decoder * const rdec )
rdec->member_position = 0; rdec->at_stream_end = false; }
-/* Seeks a member header and updates 'get'. '*skippedp' is set to the
- number of bytes skipped. Returns true if it finds a valid header.
+/* Seek for a member header and update 'get'. Set '*skippedp' to the number
+ of bytes skipped. Return true if a valid header is found.
*/
static bool Rd_find_header( struct Range_decoder * const rdec,
unsigned * const skippedp )
@@ -140,8 +140,7 @@ static bool Rd_try_reload( struct Range_decoder * const rdec )
int i;
rdec->reload_pending = false;
rdec->code = 0;
- for( i = 0; i < 5; ++i )
- rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
+ for( i = 0; i < 5; ++i ) rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
rdec->range = 0xFFFFFFFFU;
rdec->code &= rdec->range; /* make sure that first byte is discarded */
}
@@ -161,12 +160,11 @@ static inline unsigned Rd_decode( struct Range_decoder * const rdec,
int i;
for( i = num_bits; i > 0; --i )
{
- bool bit;
Rd_normalize( rdec );
rdec->range >>= 1;
/* symbol <<= 1; */
/* if( rdec->code >= rdec->range ) { rdec->code -= rdec->range; symbol |= 1; } */
- bit = ( rdec->code >= rdec->range );
+ const bool bit = ( rdec->code >= rdec->range );
symbol <<= 1; symbol += bit;
rdec->code -= rdec->range & ( 0U - bit );
}
@@ -176,42 +174,75 @@ static inline unsigned Rd_decode( struct Range_decoder * const rdec,
static inline unsigned Rd_decode_bit( struct Range_decoder * const rdec,
Bit_model * const probability )
{
- uint32_t bound;
Rd_normalize( rdec );
- bound = ( rdec->range >> bit_model_total_bits ) * *probability;
+ const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability;
if( rdec->code < bound )
{
- *probability += (bit_model_total - *probability) >> bit_model_move_bits;
rdec->range = bound;
+ *probability += ( bit_model_total - *probability ) >> bit_model_move_bits;
return 0;
}
else
{
- *probability -= *probability >> bit_model_move_bits;
rdec->code -= bound;
rdec->range -= bound;
+ *probability -= *probability >> bit_model_move_bits;
return 1;
}
}
-static inline unsigned Rd_decode_tree3( struct Range_decoder * const rdec,
- Bit_model bm[] )
+static inline void Rd_decode_symbol_bit( struct Range_decoder * const rdec,
+ Bit_model * const probability, unsigned * symbol )
{
- unsigned symbol = 2 | Rd_decode_bit( rdec, &bm[1] );
- symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
- symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
- return symbol & 7;
+ Rd_normalize( rdec );
+ *symbol <<= 1;
+ const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability;
+ if( rdec->code < bound )
+ {
+ rdec->range = bound;
+ *probability += ( bit_model_total - *probability ) >> bit_model_move_bits;
+ }
+ else
+ {
+ rdec->code -= bound;
+ rdec->range -= bound;
+ *probability -= *probability >> bit_model_move_bits;
+ *symbol |= 1;
+ }
+ }
+
+static inline void Rd_decode_symbol_bit_reversed( struct Range_decoder * const rdec,
+ Bit_model * const probability, unsigned * model,
+ unsigned * symbol, const int i )
+ {
+ Rd_normalize( rdec );
+ *model <<= 1;
+ const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability;
+ if( rdec->code < bound )
+ {
+ rdec->range = bound;
+ *probability += ( bit_model_total - *probability ) >> bit_model_move_bits;
+ }
+ else
+ {
+ rdec->code -= bound;
+ rdec->range -= bound;
+ *probability -= *probability >> bit_model_move_bits;
+ *model |= 1;
+ *symbol |= 1 << i;
+ }
}
static inline unsigned Rd_decode_tree6( struct Range_decoder * const rdec,
Bit_model bm[] )
{
- unsigned symbol = 2 | Rd_decode_bit( rdec, &bm[1] );
- symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
- symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
- symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
- symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
- symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
+ unsigned symbol = 1;
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
return symbol & 0x3F;
}
@@ -219,9 +250,14 @@ static inline unsigned Rd_decode_tree8( struct Range_decoder * const rdec,
Bit_model bm[] )
{
unsigned symbol = 1;
- int i;
- for( i = 0; i < 8; ++i )
- symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
return symbol & 0xFF;
}
@@ -233,21 +269,19 @@ Rd_decode_tree_reversed( struct Range_decoder * const rdec,
unsigned symbol = 0;
int i;
for( i = 0; i < num_bits; ++i )
- {
- const unsigned bit = Rd_decode_bit( rdec, &bm[model] );
- model <<= 1; model += bit;
- symbol |= ( bit << i );
- }
+ Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, i );
return symbol;
}
static inline unsigned
Rd_decode_tree_reversed4( struct Range_decoder * const rdec, Bit_model bm[] )
{
- unsigned symbol = Rd_decode_bit( rdec, &bm[1] );
- symbol += Rd_decode_bit( rdec, &bm[2+symbol] ) << 1;
- symbol += Rd_decode_bit( rdec, &bm[4+symbol] ) << 2;
- symbol += Rd_decode_bit( rdec, &bm[8+symbol] ) << 3;
+ unsigned model = 1;
+ unsigned symbol = 0;
+ Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 0 );
+ Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 1 );
+ Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 2 );
+ Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 3 );
return symbol;
}
@@ -270,11 +304,24 @@ static inline unsigned Rd_decode_len( struct Range_decoder * const rdec,
struct Len_model * const lm,
const int pos_state )
{
+ Bit_model * bm;
+ unsigned mask, offset, symbol = 1;
+
if( Rd_decode_bit( rdec, &lm->choice1 ) == 0 )
- return Rd_decode_tree3( rdec, lm->bm_low[pos_state] );
+ { bm = lm->bm_low[pos_state]; mask = 7; offset = 0; goto len3; }
if( Rd_decode_bit( rdec, &lm->choice2 ) == 0 )
- return len_low_symbols + Rd_decode_tree3( rdec, lm->bm_mid[pos_state] );
- return len_low_symbols + len_mid_symbols + Rd_decode_tree8( rdec, lm->bm_high );
+ { bm = lm->bm_mid[pos_state]; mask = 7; offset = len_low_symbols; goto len3; }
+ bm = lm->bm_high; mask = 0xFF; offset = len_low_symbols + len_mid_symbols;
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+len3:
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ return ( symbol & mask ) + min_match_len + offset;
}
diff --git a/doc/lzlib.info b/doc/lzlib.info
index bef1859..d81bc88 100644
--- a/doc/lzlib.info
+++ b/doc/lzlib.info
@@ -1,6 +1,6 @@
This is lzlib.info, produced by makeinfo version 4.13+ from lzlib.texi.
-INFO-DIR-SECTION Data Compression
+INFO-DIR-SECTION Compression
START-INFO-DIR-ENTRY
* Lzlib: (lzlib). Compression library for the lzip format
END-INFO-DIR-ENTRY
@@ -11,7 +11,7 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir)
Lzlib Manual
************
-This manual is for Lzlib (version 1.12, 2 January 2021).
+This manual is for Lzlib (version 1.13, 23 January 2022).
* Menu:
@@ -30,7 +30,7 @@ This manual is for Lzlib (version 1.12, 2 January 2021).
* Concept index:: Index of concepts
- Copyright (C) 2009-2021 Antonio Diaz Diaz.
+ Copyright (C) 2009-2022 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to copy,
distribute, and modify it.
@@ -73,8 +73,12 @@ byte near the beginning is a thing of the past.
The functions and variables forming the interface of the compression
library are declared in the file 'lzlib.h'. Usage examples of the library
-are given in the files 'bbexample.c', 'ffexample.c', and 'main.c' from the
-source distribution.
+are given in the files 'bbexample.c', 'ffexample.c', and 'minilzip.c' from
+the source distribution.
+
+ All the library functions are thread safe. The library does not install
+any signal handler. The decoder checks the consistency of the compressed
+data, so the library should never crash even in case of corrupted input.
Compression/decompression is done by repeatedly calling a couple of
read/write functions until all the data have been processed by the library.
@@ -102,20 +106,16 @@ concatenated compressed data streams is also supported.
automatically creating multimember output. The members so created are large,
about 2 PiB each.
- All the library functions are thread safe. The library does not install
-any signal handler. The decoder checks the consistency of the compressed
-data, so the library should never crash even in case of corrupted input.
-
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
concrete algorithm; it is more like "any algorithm using the LZMA coding
scheme". For example, the option '-0' of lzip uses the scheme in almost the
simplest way possible; issuing the longest match it can find, or a literal
byte if it can't find a match. Inversely, a much more elaborated way of
-finding coding sequences of minimum size than the one currently used by
-lzip could be developed, and the resulting sequence could also be coded
-using the LZMA coding scheme.
+finding coding sequences of minimum size than the one currently used by lzip
+could be developed, and the resulting sequence could also be coded using the
+LZMA coding scheme.
- Lzlib currently implements two variants of the LZMA algorithm; fast
+ Lzlib currently implements two variants of the LZMA algorithm: fast
(used by option '-0' of minilzip) and normal (used by all other compression
levels).
@@ -145,7 +145,8 @@ One goal of lzlib is to keep perfect backward compatibility with older
versions of itself down to 1.0. Any application working with an older lzlib
should work with a newer lzlib. Installing a newer lzlib should not break
anything. This chapter describes the constants and functions that the
-application can use to discover the version of the library being used.
+application can use to discover the version of the library being used. All
+of them are declared in 'lzlib.h'.
-- Constant: LZ_API_VERSION
This constant is defined in 'lzlib.h' and works as a version test
@@ -325,13 +326,13 @@ except 'LZ_compress_open' whose return value must be verified by calling
'LZ_compress_sync_flush'. Then call 'LZ_compress_read' until it
returns 0.
- This function writes a LZMA marker '3' ("Sync Flush" marker) to the
- compressed output. Note that the sync flush marker is not allowed in
- lzip files; it is a device for interactive communication between
- applications using lzlib, but is useless and wasteful in a file, and
- is excluded from the media type 'application/lzip'. The LZMA marker
- '2' ("End Of Stream" marker) is the only marker allowed in lzip files.
- *Note Data format::.
+ This function writes at least one LZMA marker '3' ("Sync Flush" marker)
+ to the compressed output. Note that the sync flush marker is not
+ allowed in lzip files; it is a device for interactive communication
+ between applications using lzlib, but is useless and wasteful in a
+ file, and is excluded from the media type 'application/lzip'. The LZMA
+ marker '2' ("End Of Stream" marker) is the only marker allowed in lzip
+ files. *Note Data format::.
Repeated use of 'LZ_compress_sync_flush' may degrade compression
ratio, so use it only when needed. If the interval between calls to
@@ -347,34 +348,30 @@ except 'LZ_compress_open' whose return value must be verified by calling
-- Function: int LZ_compress_read ( struct LZ_Encoder * const ENCODER,
uint8_t * const BUFFER, const int SIZE )
- The function 'LZ_compress_read' reads up to SIZE bytes from the stream
- pointed to by ENCODER, storing the results in BUFFER. If
- LZ_API_VERSION >= 1012, BUFFER may be a null pointer, in which case
- the bytes read are discarded.
-
- The return value is the number of bytes actually read. This might be
- less than SIZE; for example, if there aren't that many bytes left in
- the stream or if more bytes have to be yet written with the function
+ Reads up to SIZE bytes from the stream pointed to by ENCODER, storing
+ the results in BUFFER. If LZ_API_VERSION >= 1012, BUFFER may be a null
+ pointer, in which case the bytes read are discarded.
+
+ Returns the number of bytes actually read. This might be less than
+ SIZE; for example, if there aren't that many bytes left in the stream
+ or if more bytes have to be yet written with the function
'LZ_compress_write'. Note that reading less than SIZE bytes is not an
error.
-- Function: int LZ_compress_write ( struct LZ_Encoder * const ENCODER,
uint8_t * const BUFFER, const int SIZE )
- The function 'LZ_compress_write' writes up to SIZE bytes from BUFFER
- to the stream pointed to by ENCODER.
-
- The return value is the number of bytes actually written. This might be
+ Writes up to SIZE bytes from BUFFER to the stream pointed to by
+ ENCODER. Returns the number of bytes actually written. This might be
less than SIZE. Note that writing less than SIZE bytes is not an error.
-- Function: int LZ_compress_write_size ( struct LZ_Encoder * const
ENCODER )
- The function 'LZ_compress_write_size' returns the maximum number of
- bytes that can be immediately written through 'LZ_compress_write'. For
- efficiency reasons, once the input buffer is full and
- 'LZ_compress_write_size' returns 0, almost all the buffer must be
- compressed before a size greater than 0 is returned again. (This is
- done to minimize the amount of data that must be copied to the
- beginning of the buffer before new data can be accepted).
+ Returns the maximum number of bytes that can be immediately written
+ through 'LZ_compress_write'. For efficiency reasons, once the input
+ buffer is full and 'LZ_compress_write_size' returns 0, almost all the
+ buffer must be compressed before a size greater than 0 is returned
+ again. (This is done to minimize the amount of data that must be
+ copied to the beginning of the buffer before new data can be accepted).
It is guaranteed that an immediate call to 'LZ_compress_write' will
accept a SIZE up to the returned number of bytes.
@@ -472,14 +469,13 @@ except 'LZ_decompress_open' whose return value must be verified by calling
-- Function: int LZ_decompress_read ( struct LZ_Decoder * const DECODER,
uint8_t * const BUFFER, const int SIZE )
- The function 'LZ_decompress_read' reads up to SIZE bytes from the
- stream pointed to by DECODER, storing the results in BUFFER. If
- LZ_API_VERSION >= 1012, BUFFER may be a null pointer, in which case
- the bytes read are discarded.
-
- The return value is the number of bytes actually read. This might be
- less than SIZE; for example, if there aren't that many bytes left in
- the stream or if more bytes have to be yet written with the function
+ Reads up to SIZE bytes from the stream pointed to by DECODER, storing
+ the results in BUFFER. If LZ_API_VERSION >= 1012, BUFFER may be a null
+ pointer, in which case the bytes read are discarded.
+
+ Returns the number of bytes actually read. This might be less than
+ SIZE; for example, if there aren't that many bytes left in the stream
+ or if more bytes have to be yet written with the function
'LZ_decompress_write'. Note that reading less than SIZE bytes is not
an error.
@@ -499,18 +495,16 @@ except 'LZ_decompress_open' whose return value must be verified by calling
-- Function: int LZ_decompress_write ( struct LZ_Decoder * const DECODER,
uint8_t * const BUFFER, const int SIZE )
- The function 'LZ_decompress_write' writes up to SIZE bytes from BUFFER
- to the stream pointed to by DECODER.
-
- The return value is the number of bytes actually written. This might be
+ Writes up to SIZE bytes from BUFFER to the stream pointed to by
+ DECODER. Returns the number of bytes actually written. This might be
less than SIZE. Note that writing less than SIZE bytes is not an error.
-- Function: int LZ_decompress_write_size ( struct LZ_Decoder * const
DECODER )
- The function 'LZ_decompress_write_size' returns the maximum number of
- bytes that can be immediately written through 'LZ_decompress_write'.
- This number varies smoothly; each compressed byte consumed may be
- overwritten immediately, increasing by 1 the value returned.
+ Returns the maximum number of bytes that can be immediately written
+ through 'LZ_decompress_write'. This number varies smoothly; each
+ compressed byte consumed may be overwritten immediately, increasing by
+ 1 the value returned.
It is guaranteed that an immediate call to 'LZ_decompress_write' will
accept a SIZE up to the returned number of bytes.
@@ -530,24 +524,24 @@ except 'LZ_decompress_open' whose return value must be verified by calling
-- Function: int LZ_decompress_member_finished ( struct LZ_Decoder * const
DECODER )
Returns 1 if the previous call to 'LZ_decompress_read' finished reading
- the current member, indicating that final values for member are
+ the current member, indicating that final values for the member are
available through 'LZ_decompress_data_crc',
'LZ_decompress_data_position', and 'LZ_decompress_member_position'.
Otherwise it returns 0.
-- Function: int LZ_decompress_member_version ( struct LZ_Decoder * const
DECODER )
- Returns the version of current member from member header.
+ Returns the version of the current member, read from the member header.
-- Function: int LZ_decompress_dictionary_size ( struct LZ_Decoder * const
DECODER )
- Returns the dictionary size of the current member, read from the member
- header.
+ Returns the dictionary size of the current member, read from the
+ member header.
-- Function: unsigned LZ_decompress_data_crc ( struct LZ_Decoder * const
DECODER )
Returns the 32 bit Cyclic Redundancy Check of the data decompressed
- from the current member. The returned value is valid only when
+ from the current member. The value returned is valid only when
'LZ_decompress_member_finished' returns 1.
-- Function: unsigned long long LZ_decompress_data_position ( struct
@@ -650,13 +644,14 @@ compatible with lzip 1.4 or newer.
Lzip is a lossless data compressor with a user interface similar to the
one of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov
-chain-Algorithm' (LZMA) stream format, chosen to maximize safety and
-interoperability. Lzip can compress about as fast as gzip (lzip -0) or
-compress most files more than bzip2 (lzip -9). Decompression speed is
-intermediate between gzip and bzip2. Lzip is better than gzip and bzip2
-from a data recovery perspective. Lzip has been designed, written, and
-tested with great care to replace gzip and bzip2 as the standard
-general-purpose compressed format for unix-like systems.
+chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity
+checking to maximize interoperability and optimize safety. Lzip can compress
+about as fast as gzip (lzip -0) or compress most files more than bzip2
+(lzip -9). Decompression speed is intermediate between gzip and bzip2. Lzip
+is better than gzip and bzip2 from a data recovery perspective. Lzip has
+been designed, written, and tested with great care to replace gzip and
+bzip2 as the standard general-purpose compressed format for unix-like
+systems.
The format for running minilzip is:
@@ -705,10 +700,13 @@ once, the first time it appears in the command line.
'-d'
'--decompress'
- Decompress the files specified. If a file does not exist or can't be
- opened, minilzip continues decompressing the rest of the files. If a
- file fails to decompress, or is a terminal, minilzip exits immediately
- without decompressing the rest of the files.
+ Decompress the files specified. If a file does not exist, can't be
+ opened, or the destination file already exists and '--force' has not
+ been specified, minilzip continues decompressing the rest of the files
+ and exits with error status 1. If a file fails to decompress, or is a
+ terminal, minilzip exits immediately with error status 2 without
+ decompressing the rest of the files. A terminal is considered an
+ uncompressed file, and therefore invalid.
'-f'
'--force'
@@ -831,12 +829,14 @@ once, the first time it appears in the command line.
'--check-lib'
Compare the version of lzlib used to compile minilzip with the version
- actually being used and exit. Report any differences found. Exit with
- error status 1 if differences are found. A mismatch may indicate that
- lzlib is not correctly installed or that a different version of lzlib
- has been installed after compiling the shared version of minilzip.
- 'minilzip -v --check-lib' shows the version of lzlib being used and
- the value of 'LZ_API_VERSION' (if defined). *Note Library version::.
+ actually being used at run time and exit. Report any differences
+ found. Exit with error status 1 if differences are found. A mismatch
+ may indicate that lzlib is not correctly installed or that a different
+ version of lzlib has been installed after compiling the shared version
+ of minilzip. Exit with error status 2 if LZ_API_VERSION and
+ LZ_version_string don't match. 'minilzip -v --check-lib' shows the
+ version of lzlib being used and the value of LZ_API_VERSION (if
+ defined). *Note Library version::.
Numbers given as arguments to options may be followed by a multiplier
@@ -857,7 +857,7 @@ Y yottabyte (10^24) | Yi yobibyte (2^80)
Exit status: 0 for a normal exit, 1 for environmental problems (file not
found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid
-input file, 3 for an internal consistency error (eg, bug) which caused
+input file, 3 for an internal consistency error (e.g., bug) which caused
minilzip to panic.

@@ -886,9 +886,11 @@ when there is no longer anything to take away.
represents a variable number of bytes.
- A lzip data stream consists of a series of "members" (compressed data
+ Lzip data consist of a series of independent "members" (compressed data
sets). The members simply appear one after another in the data stream, with
-no additional information before, between, or after them.
+no additional information before, between, or after them. Each member can
+encode in compressed form up to 16 EiB - 1 byte of uncompressed data. The
+size of a multimember data stream is unlimited.
Each member has the following structure:
@@ -916,7 +918,7 @@ no additional information before, between, or after them.
Valid values for dictionary size range from 4 KiB to 512 MiB.
'LZMA stream'
- The LZMA stream, finished by an end of stream marker. Uses default
+ The LZMA stream, finished by an "End Of Stream" marker. Uses default
values for encoder properties. *Note Stream format: (lzip)Stream
format, for a complete description.
Lzip only uses the LZMA marker '2' ("End Of Stream" marker). Lzlib
@@ -924,16 +926,17 @@ no additional information before, between, or after them.
sync_flush::.
'CRC32 (4 bytes)'
- Cyclic Redundancy Check (CRC) of the uncompressed original data.
+ Cyclic Redundancy Check (CRC) of the original uncompressed data.
'Data size (8 bytes)'
- Size of the uncompressed original data.
+ Size of the original uncompressed data.
'Member size (8 bytes)'
Total size of the member, including header and trailer. This field acts
as a distributed index, allows the verification of stream integrity,
- and facilitates safe recovery of undamaged members from multimember
- files.
+ and facilitates the safe recovery of undamaged members from
+ multimember files. Member size should be limited to 2 PiB to prevent
+ the data size field from overflowing.

@@ -967,10 +970,10 @@ File: lzlib.info, Node: Buffer compression, Next: Buffer decompression, Up: E
Buffer-to-buffer single-member compression (MEMBER_SIZE > total output).
-/* Compresses 'insize' bytes from 'inbuf' to 'outbuf'.
- Returns the size of the compressed data in '*outlenp'.
- In case of error, or if 'outsize' is too small, returns false and does
- not modify '*outlenp'.
+/* Compress 'insize' bytes from 'inbuf' to 'outbuf'.
+ Return the size of the compressed data in '*outlenp'.
+ In case of error, or if 'outsize' is too small, return false and do not
+ modify '*outlenp'.
*/
bool bbcompress( const uint8_t * const inbuf, const int insize,
const int dictionary_size, const int match_len_limit,
@@ -1011,10 +1014,10 @@ File: lzlib.info, Node: Buffer decompression, Next: File compression, Prev: B
Buffer-to-buffer decompression.
-/* Decompresses 'insize' bytes from 'inbuf' to 'outbuf'.
- Returns the size of the decompressed data in '*outlenp'.
- In case of error, or if 'outsize' is too small, returns false and does
- not modify '*outlenp'.
+/* Decompress 'insize' bytes from 'inbuf' to 'outbuf'.
+ Return the size of the decompressed data in '*outlenp'.
+ In case of error, or if 'outsize' is too small, return false and do not
+ modify '*outlenp'.
*/
bool bbdecompress( const uint8_t * const inbuf, const int insize,
uint8_t * const outbuf, const int outsize,
@@ -1159,9 +1162,9 @@ int ffmmcompress( FILE * const infile, FILE * const outfile )
Example 2: Multimember compression (user-restarted members). (Call
LZ_compress_open with MEMBER_SIZE > largest member).
-/* Compresses 'infile' to 'outfile' as a multimember stream with one member
+/* Compress 'infile' to 'outfile' as a multimember stream with one member
for each line of text terminated by a newline character or by EOF.
- Returns 0 if success, 1 if error.
+ Return 0 if success, 1 if error.
*/
int fflfcompress( struct LZ_Encoder * const encoder,
FILE * const infile, FILE * const outfile )
@@ -1205,7 +1208,7 @@ File: lzlib.info, Node: Skipping data errors, Prev: File compression mm, Up:
11.6 Skipping data errors
=========================
-/* Decompresses 'infile' to 'outfile' with automatic resynchronization to
+/* Decompress 'infile' to 'outfile' with automatic resynchronization to
next member in case of data error, including the automatic removal of
leading garbage.
*/
@@ -1253,7 +1256,7 @@ eternity, if not longer.
If you find a bug in lzlib, please send electronic mail to
<lzip-bug@nongnu.org>. Include the version number, which you can find by
-running 'minilzip --version' or in 'LZ_version_string' from 'lzlib.h'.
+running 'minilzip --version' and 'minilzip -v --check-lib'.

File: lzlib.info, Node: Concept index, Prev: Problems, Up: Top
@@ -1288,29 +1291,29 @@ Concept index

Tag Table:
-Node: Top220
-Node: Introduction1342
+Node: Top215
+Node: Introduction1338
Node: Library version6413
-Node: Buffering8918
-Node: Parameter limits10143
-Node: Compression functions11097
-Ref: member_size12907
-Ref: sync_flush14673
-Node: Decompression functions19493
-Node: Error codes27187
-Node: Error messages29478
-Node: Invoking minilzip30057
-Node: Data format39651
-Ref: coded-dict-size40957
-Node: Examples42267
-Node: Buffer compression43228
-Node: Buffer decompression44754
-Node: File compression46174
-Node: File decompression47157
-Node: File compression mm48161
-Node: Skipping data errors51193
-Node: Problems52505
-Node: Concept index53077
+Node: Buffering8957
+Node: Parameter limits10182
+Node: Compression functions11136
+Ref: member_size12946
+Ref: sync_flush14712
+Node: Decompression functions19400
+Node: Error codes26968
+Node: Error messages29259
+Node: Invoking minilzip29838
+Node: Data format39786
+Ref: coded-dict-size41232
+Node: Examples42641
+Node: Buffer compression43602
+Node: Buffer decompression45122
+Node: File compression46536
+Node: File decompression47519
+Node: File compression mm48523
+Node: Skipping data errors51552
+Node: Problems52862
+Node: Concept index53423

End Tag Table
diff --git a/doc/lzlib.texi b/doc/lzlib.texi
index 644a3d7..3caf9dd 100644
--- a/doc/lzlib.texi
+++ b/doc/lzlib.texi
@@ -6,10 +6,10 @@
@finalout
@c %**end of header
-@set UPDATED 2 January 2021
-@set VERSION 1.12
+@set UPDATED 23 January 2022
+@set VERSION 1.13
-@dircategory Data Compression
+@dircategory Compression
@direntry
* Lzlib: (lzlib). Compression library for the lzip format
@end direntry
@@ -52,7 +52,7 @@ This manual is for Lzlib (version @value{VERSION}, @value{UPDATED}).
@end menu
@sp 1
-Copyright @copyright{} 2009-2021 Antonio Diaz Diaz.
+Copyright @copyright{} 2009-2022 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to copy,
distribute, and modify it.
@@ -77,9 +77,9 @@ taking into account both data integrity and decoder availability:
The lzip format provides very safe integrity checking and some data
recovery means. The program
@uref{http://www.nongnu.org/lzip/manual/lziprecover_manual.html#Data-safety,,lziprecover}
-can repair bit flip errors (one of the most common forms of data
-corruption) in lzip files, and provides data recovery capabilities,
-including error-checked merging of damaged copies of a file.
+can repair bit flip errors (one of the most common forms of data corruption)
+in lzip files, and provides data recovery capabilities, including
+error-checked merging of damaged copies of a file.
@ifnothtml
@xref{Data safety,,,lziprecover}.
@end ifnothtml
@@ -89,8 +89,8 @@ The lzip format is as simple as possible (but not simpler). The lzip
manual provides the source code of a simple decompressor along with a
detailed explanation of how it works, so that with the only help of the
lzip manual it would be possible for a digital archaeologist to extract
-the data from a lzip file long after quantum computers eventually render
-LZMA obsolete.
+the data from a lzip file long after quantum computers eventually
+render LZMA obsolete.
@item
Additionally the lzip reference implementation is copylefted, which
@@ -104,8 +104,12 @@ the beginning is a thing of the past.
The functions and variables forming the interface of the compression library
are declared in the file @samp{lzlib.h}. Usage examples of the library are
-given in the files @samp{bbexample.c}, @samp{ffexample.c}, and @samp{main.c}
-from the source distribution.
+given in the files @samp{bbexample.c}, @samp{ffexample.c}, and
+@samp{minilzip.c} from the source distribution.
+
+All the library functions are thread safe. The library does not install any
+signal handler. The decoder checks the consistency of the compressed data,
+so the library should never crash even in case of corrupted input.
Compression/decompression is done by repeatedly calling a couple of
read/write functions until all the data have been processed by the library.
@@ -134,22 +138,17 @@ Lzlib is able to compress and decompress streams of unlimited size by
automatically creating multimember output. The members so created are large,
about @w{2 PiB} each.
-All the library functions are thread safe. The library does not install
-any signal handler. The decoder checks the consistency of the compressed
-data, so the library should never crash even in case of corrupted input.
-
In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
concrete algorithm; it is more like "any algorithm using the LZMA coding
-scheme". For example, the option @samp{-0} of lzip uses the scheme in almost
-the simplest way possible; issuing the longest match it can find, or a
-literal byte if it can't find a match. Inversely, a much more elaborated way
-of finding coding sequences of minimum size than the one currently used by
-lzip could be developed, and the resulting sequence could also be coded
-using the LZMA coding scheme.
+scheme". For example, the option @samp{-0} of lzip uses the scheme in almost the
+simplest way possible; issuing the longest match it can find, or a literal
+byte if it can't find a match. Inversely, a much more elaborated way of
+finding coding sequences of minimum size than the one currently used by lzip
+could be developed, and the resulting sequence could also be coded using the
+LZMA coding scheme.
-Lzlib currently implements two variants of the LZMA algorithm; fast (used by
-option @samp{-0} of minilzip) and normal (used by all other compression
-levels).
+Lzlib currently implements two variants of the LZMA algorithm: fast (used by
+option @samp{-0} of minilzip) and normal (used by all other compression levels).
The high compression of LZMA comes from combining two basic, well-proven
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
@@ -176,7 +175,8 @@ One goal of lzlib is to keep perfect backward compatibility with older
versions of itself down to 1.0. Any application working with an older lzlib
should work with a newer lzlib. Installing a newer lzlib should not break
anything. This chapter describes the constants and functions that the
-application can use to discover the version of the library being used.
+application can use to discover the version of the library being used. All
+of them are declared in @samp{lzlib.h}.
@defvr Constant LZ_API_VERSION
This constant is defined in @samp{lzlib.h} and works as a version test
@@ -372,12 +372,13 @@ already written with the function @samp{LZ_compress_write}. First call
@samp{LZ_compress_sync_flush}. Then call @samp{LZ_compress_read} until it
returns 0.
-This function writes a LZMA marker @samp{3} ("Sync Flush" marker) to the
-compressed output. Note that the sync flush marker is not allowed in lzip
-files; it is a device for interactive communication between applications
-using lzlib, but is useless and wasteful in a file, and is excluded from the
-media type @samp{application/lzip}. The LZMA marker @samp{2} ("End Of
-Stream" marker) is the only marker allowed in lzip files. @xref{Data format}.
+This function writes at least one LZMA marker @samp{3} ("Sync Flush" marker)
+to the compressed output. Note that the sync flush marker is not allowed in
+lzip files; it is a device for interactive communication between
+applications using lzlib, but is useless and wasteful in a file, and is
+excluded from the media type @samp{application/lzip}. The LZMA marker
+@samp{2} ("End Of Stream" marker) is the only marker allowed in lzip files.
+@xref{Data format}.
Repeated use of @samp{LZ_compress_sync_flush} may degrade compression
ratio, so use it only when needed. If the interval between calls to
@@ -394,36 +395,33 @@ are more bytes available than those needed to complete @var{member_size},
@deftypefun int LZ_compress_read ( struct LZ_Encoder * const @var{encoder}, uint8_t * const @var{buffer}, const int @var{size} )
-The function @samp{LZ_compress_read} reads up to @var{size} bytes from the
-stream pointed to by @var{encoder}, storing the results in @var{buffer}.
-If @w{LZ_API_VERSION >= 1012}, @var{buffer} may be a null pointer, in which
-case the bytes read are discarded.
-
-The return value is the number of bytes actually read. This might be less
-than @var{size}; for example, if there aren't that many bytes left in the
-stream or if more bytes have to be yet written with the function
+Reads up to @var{size} bytes from the stream pointed to by @var{encoder},
+storing the results in @var{buffer}. If @w{LZ_API_VERSION >= 1012},
+@var{buffer} may be a null pointer, in which case the bytes read are
+discarded.
+
+Returns the number of bytes actually read. This might be less than
+@var{size}; for example, if there aren't that many bytes left in the stream
+or if more bytes have to be yet written with the function
@samp{LZ_compress_write}. Note that reading less than @var{size} bytes is
not an error.
@end deftypefun
@deftypefun int LZ_compress_write ( struct LZ_Encoder * const @var{encoder}, uint8_t * const @var{buffer}, const int @var{size} )
-The function @samp{LZ_compress_write} writes up to @var{size} bytes from
-@var{buffer} to the stream pointed to by @var{encoder}.
-
-The return value is the number of bytes actually written. This might be
-less than @var{size}. Note that writing less than @var{size} bytes is
-not an error.
+Writes up to @var{size} bytes from @var{buffer} to the stream pointed to by
+@var{encoder}. Returns the number of bytes actually written. This might be
+less than @var{size}. Note that writing less than @var{size} bytes is not an
+error.
@end deftypefun
@deftypefun int LZ_compress_write_size ( struct LZ_Encoder * const @var{encoder} )
-The function @samp{LZ_compress_write_size} returns the maximum number of
-bytes that can be immediately written through @samp{LZ_compress_write}.
-For efficiency reasons, once the input buffer is full and
-@samp{LZ_compress_write_size} returns 0, almost all the buffer must be
-compressed before a size greater than 0 is returned again. (This is done to
-minimize the amount of data that must be copied to the beginning of the
+Returns the maximum number of bytes that can be immediately written through
+@samp{LZ_compress_write}. For efficiency reasons, once the input buffer is
+full and @samp{LZ_compress_write_size} returns 0, almost all the buffer must
+be compressed before a size greater than 0 is returned again. (This is done
+to minimize the amount of data that must be copied to the beginning of the
buffer before new data can be accepted).
It is guaranteed that an immediate call to @samp{LZ_compress_write} will
@@ -478,10 +476,10 @@ perhaps not yet read.
@chapter Decompression functions
@cindex decompression functions
-These are the functions used to decompress data. In case of error, all
-of them return -1 or 0, for signed and unsigned return values
-respectively, except @samp{LZ_decompress_open} whose return value must
-be verified by calling @samp{LZ_decompress_errno} before using it.
+These are the functions used to decompress data. In case of error, all of
+them return -1 or 0, for signed and unsigned return values respectively,
+except @samp{LZ_decompress_open} whose return value must be verified by
+calling @samp{LZ_decompress_errno} before using it.
@deftypefun {struct LZ_Decoder *} LZ_decompress_open ( void )
@@ -539,14 +537,14 @@ function does nothing.
@deftypefun int LZ_decompress_read ( struct LZ_Decoder * const @var{decoder}, uint8_t * const @var{buffer}, const int @var{size} )
-The function @samp{LZ_decompress_read} reads up to @var{size} bytes from the
-stream pointed to by @var{decoder}, storing the results in @var{buffer}.
-If @w{LZ_API_VERSION >= 1012}, @var{buffer} may be a null pointer, in which
-case the bytes read are discarded.
-
-The return value is the number of bytes actually read. This might be less
-than @var{size}; for example, if there aren't that many bytes left in the
-stream or if more bytes have to be yet written with the function
+Reads up to @var{size} bytes from the stream pointed to by @var{decoder},
+storing the results in @var{buffer}. If @w{LZ_API_VERSION >= 1012},
+@var{buffer} may be a null pointer, in which case the bytes read are
+discarded.
+
+Returns the number of bytes actually read. This might be less than
+@var{size}; for example, if there aren't that many bytes left in the stream
+or if more bytes have to be yet written with the function
@samp{LZ_decompress_write}. Note that reading less than @var{size} bytes is
not an error.
@@ -571,20 +569,18 @@ recover as much data as possible from each damaged member.
@deftypefun int LZ_decompress_write ( struct LZ_Decoder * const @var{decoder}, uint8_t * const @var{buffer}, const int @var{size} )
-The function @samp{LZ_decompress_write} writes up to @var{size} bytes from
-@var{buffer} to the stream pointed to by @var{decoder}.
-
-The return value is the number of bytes actually written. This might be
-less than @var{size}. Note that writing less than @var{size} bytes is
-not an error.
+Writes up to @var{size} bytes from @var{buffer} to the stream pointed to by
+@var{decoder}. Returns the number of bytes actually written. This might be
+less than @var{size}. Note that writing less than @var{size} bytes is not an
+error.
@end deftypefun
@deftypefun int LZ_decompress_write_size ( struct LZ_Decoder * const @var{decoder} )
-The function @samp{LZ_decompress_write_size} returns the maximum number of
-bytes that can be immediately written through @samp{LZ_decompress_write}.
-This number varies smoothly; each compressed byte consumed may be
-overwritten immediately, increasing by 1 the value returned.
+Returns the maximum number of bytes that can be immediately written through
+@samp{LZ_decompress_write}. This number varies smoothly; each compressed
+byte consumed may be overwritten immediately, increasing by 1 the value
+returned.
It is guaranteed that an immediate call to @samp{LZ_decompress_write} will
accept a @var{size} up to the returned number of bytes.
@@ -607,26 +603,25 @@ does not imply @samp{LZ_decompress_member_finished}.
@deftypefun int LZ_decompress_member_finished ( struct LZ_Decoder * const @var{decoder} )
Returns 1 if the previous call to @samp{LZ_decompress_read} finished reading
-the current member, indicating that final values for member are available
+the current member, indicating that final values for the member are available
through @samp{LZ_decompress_data_crc}, @samp{LZ_decompress_data_position},
and @samp{LZ_decompress_member_position}. Otherwise it returns 0.
@end deftypefun
@deftypefun int LZ_decompress_member_version ( struct LZ_Decoder * const @var{decoder} )
-Returns the version of current member from member header.
+Returns the version of the current member, read from the member header.
@end deftypefun
@deftypefun int LZ_decompress_dictionary_size ( struct LZ_Decoder * const @var{decoder} )
-Returns the dictionary size of the current member, read from the member
-header.
+Returns the dictionary size of the current member, read from the member header.
@end deftypefun
@deftypefun {unsigned} LZ_decompress_data_crc ( struct LZ_Decoder * const @var{decoder} )
Returns the 32 bit Cyclic Redundancy Check of the data decompressed from
-the current member. The returned value is valid only when
+the current member. The value returned is valid only when
@samp{LZ_decompress_member_finished} returns 1.
@end deftypefun
@@ -672,8 +667,7 @@ examine @samp{LZ_(de)compress_errno}.
The error codes are defined in the header file @samp{lzlib.h}.
@deftypevr Constant {enum LZ_Errno} LZ_ok
-The value of this constant is 0 and is used to indicate that there is no
-error.
+The value of this constant is 0 and is used to indicate that there is no error.
@end deftypevr
@deftypevr Constant {enum LZ_Errno} LZ_bad_argument
@@ -737,16 +731,17 @@ The value of @var{lz_errno} normally comes from a call to
Minilzip is a test program for the compression library lzlib, fully
compatible with lzip 1.4 or newer.
-@uref{http://www.nongnu.org/lzip/lzip.html,,Lzip} is a lossless data
-compressor with a user interface similar to the one of gzip or bzip2. Lzip
-uses a simplified form of the 'Lempel-Ziv-Markov chain-Algorithm' (LZMA)
-stream format, chosen to maximize safety and interoperability. Lzip can
-compress about as fast as gzip @w{(lzip -0)} or compress most files more
-than bzip2 @w{(lzip -9)}. Decompression speed is intermediate between gzip
-and bzip2. Lzip is better than gzip and bzip2 from a data recovery
-perspective. Lzip has been designed, written, and tested with great care to
-replace gzip and bzip2 as the standard general-purpose compressed format for
-unix-like systems.
+@uref{http://www.nongnu.org/lzip/lzip.html,,Lzip}
+is a lossless data compressor with a user interface similar to the one
+of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov
+chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity
+checking to maximize interoperability and optimize safety. Lzip can compress
+about as fast as gzip @w{(lzip -0)} or compress most files more than bzip2
+@w{(lzip -9)}. Decompression speed is intermediate between gzip and bzip2.
+Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip
+has been designed, written, and tested with great care to replace gzip and
+bzip2 as the standard general-purpose compressed format for unix-like
+systems.
@noindent
The format for running minilzip is:
@@ -803,10 +798,12 @@ and @samp{-S}. @samp{-c} has no effect when testing or listing.
@item -d
@itemx --decompress
-Decompress the files specified. If a file does not exist or can't be
-opened, minilzip continues decompressing the rest of the files. If a file
-fails to decompress, or is a terminal, minilzip exits immediately without
-decompressing the rest of the files.
+Decompress the files specified. If a file does not exist, can't be opened,
+or the destination file already exists and @samp{--force} has not been
+specified, minilzip continues decompressing the rest of the files and exits with
+error status 1. If a file fails to decompress, or is a terminal, minilzip exits
+immediately with error status 2 without decompressing the rest of the files.
+A terminal is considered an uncompressed file, and therefore invalid.
@item -f
@itemx --force
@@ -932,12 +929,13 @@ header" error and the cause is not indeed a corrupt header.
@item --check-lib
Compare the @uref{#Library-version,,version of lzlib} used to compile
-minilzip with the version actually being used and exit. Report any
-differences found. Exit with error status 1 if differences are found. A
+minilzip with the version actually being used at run time and exit. Report
+any differences found. Exit with error status 1 if differences are found. A
mismatch may indicate that lzlib is not correctly installed or that a
different version of lzlib has been installed after compiling the shared
-version of minilzip. @w{@samp{minilzip -v --check-lib}} shows the version of
-lzlib being used and the value of @samp{LZ_API_VERSION} (if defined).
+version of minilzip. Exit with error status 2 if LZ_API_VERSION and
+LZ_version_string don't match. @w{@samp{minilzip -v --check-lib}} shows the
+version of lzlib being used and the value of LZ_API_VERSION (if defined).
@ifnothtml
@xref{Library version}.
@end ifnothtml
@@ -963,9 +961,9 @@ Table of SI and binary prefixes (unit multipliers):
@sp 1
Exit status: 0 for a normal exit, 1 for environmental problems (file not
-found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
-invalid input file, 3 for an internal consistency error (eg, bug) which
-caused minilzip to panic.
+found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid
+input file, 3 for an internal consistency error (e.g., bug) which caused
+minilzip to panic.
@node Data format
@@ -996,9 +994,11 @@ represents one byte; a box like this:
represents a variable number of bytes.
@sp 1
-A lzip data stream consists of a series of "members" (compressed data sets).
-The members simply appear one after another in the data stream, with no
-additional information before, between, or after them.
+Lzip data consist of a series of independent "members" (compressed data
+sets). The members simply appear one after another in the data stream, with
+no additional information before, between, or after them. Each member can
+encode in compressed form up to @w{16 EiB - 1 byte} of uncompressed data.
+The size of a multimember data stream is unlimited.
Each member has the following structure:
@@ -1029,7 +1029,7 @@ Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
Valid values for dictionary size range from 4 KiB to 512 MiB.
@item LZMA stream
-The LZMA stream, finished by an end of stream marker. Uses default values
+The LZMA stream, finished by an "End Of Stream" marker. Uses default values
for encoder properties.
@ifnothtml
@xref{Stream format,,,lzip},
@@ -1043,15 +1043,17 @@ Lzip only uses the LZMA marker @samp{2} ("End Of Stream" marker). Lzlib
also uses the LZMA marker @samp{3} ("Sync Flush" marker). @xref{sync_flush}.
@item CRC32 (4 bytes)
-Cyclic Redundancy Check (CRC) of the uncompressed original data.
+Cyclic Redundancy Check (CRC) of the original uncompressed data.
@item Data size (8 bytes)
-Size of the uncompressed original data.
+Size of the original uncompressed data.
@item Member size (8 bytes)
Total size of the member, including header and trailer. This field acts
as a distributed index, allows the verification of stream integrity, and
-facilitates safe recovery of undamaged members from multimember files.
+facilitates the safe recovery of undamaged members from multimember files.
+Member size should be limited to @w{2 PiB} to prevent the data size field
+from overflowing.
@end table
@@ -1086,10 +1088,10 @@ Buffer-to-buffer single-member compression
@w{(@var{member_size} > total output)}.
@verbatim
-/* Compresses 'insize' bytes from 'inbuf' to 'outbuf'.
- Returns the size of the compressed data in '*outlenp'.
- In case of error, or if 'outsize' is too small, returns false and does
- not modify '*outlenp'.
+/* Compress 'insize' bytes from 'inbuf' to 'outbuf'.
+ Return the size of the compressed data in '*outlenp'.
+ In case of error, or if 'outsize' is too small, return false and do not
+ modify '*outlenp'.
*/
bool bbcompress( const uint8_t * const inbuf, const int insize,
const int dictionary_size, const int match_len_limit,
@@ -1131,10 +1133,10 @@ bool bbcompress( const uint8_t * const inbuf, const int insize,
Buffer-to-buffer decompression.
@verbatim
-/* Decompresses 'insize' bytes from 'inbuf' to 'outbuf'.
- Returns the size of the decompressed data in '*outlenp'.
- In case of error, or if 'outsize' is too small, returns false and does
- not modify '*outlenp'.
+/* Decompress 'insize' bytes from 'inbuf' to 'outbuf'.
+ Return the size of the decompressed data in '*outlenp'.
+ In case of error, or if 'outsize' is too small, return false and do not
+ modify '*outlenp'.
*/
bool bbdecompress( const uint8_t * const inbuf, const int insize,
uint8_t * const outbuf, const int outsize,
@@ -1285,9 +1287,9 @@ Example 2: Multimember compression (user-restarted members).
(Call LZ_compress_open with @var{member_size} > largest member).
@verbatim
-/* Compresses 'infile' to 'outfile' as a multimember stream with one member
+/* Compress 'infile' to 'outfile' as a multimember stream with one member
for each line of text terminated by a newline character or by EOF.
- Returns 0 if success, 1 if error.
+ Return 0 if success, 1 if error.
*/
int fflfcompress( struct LZ_Encoder * const encoder,
FILE * const infile, FILE * const outfile )
@@ -1332,7 +1334,7 @@ int fflfcompress( struct LZ_Encoder * const encoder,
@cindex skipping data errors
@verbatim
-/* Decompresses 'infile' to 'outfile' with automatic resynchronization to
+/* Decompress 'infile' to 'outfile' with automatic resynchronization to
next member in case of data error, including the automatic removal of
leading garbage.
*/
@@ -1381,8 +1383,8 @@ for all eternity, if not longer.
If you find a bug in lzlib, please send electronic mail to
@email{lzip-bug@@nongnu.org}. Include the version number, which you can
-find by running @w{@samp{minilzip --version}} or in
-@samp{LZ_version_string} from @samp{lzlib.h}.
+find by running @w{@samp{minilzip --version}} and
+@w{@samp{minilzip -v --check-lib}}.
@node Concept index
diff --git a/doc/minilzip.1 b/doc/minilzip.1
index 13a2d6d..0c4c06d 100644
--- a/doc/minilzip.1
+++ b/doc/minilzip.1
@@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16.
-.TH MINILZIP "1" "January 2021" "minilzip 1.12" "User Commands"
+.TH MINILZIP "1" "January 2022" "minilzip 1.13" "User Commands"
.SH NAME
minilzip \- reduces the size of files
.SH SYNOPSIS
@@ -11,13 +11,14 @@ compatible with lzip 1.4 or newer.
.PP
Lzip is a lossless data compressor with a user interface similar to the one
of gzip or bzip2. Lzip uses a simplified form of the 'Lempel\-Ziv\-Markov
-chain\-Algorithm' (LZMA) stream format, chosen to maximize safety and
-interoperability. Lzip can compress about as fast as gzip (lzip \fB\-0\fR) or
-compress most files more than bzip2 (lzip \fB\-9\fR). Decompression speed is
-intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 from
-a data recovery perspective. Lzip has been designed, written, and tested
-with great care to replace gzip and bzip2 as the standard general\-purpose
-compressed format for unix\-like systems.
+chain\-Algorithm' (LZMA) stream format and provides a 3 factor integrity
+checking to maximize interoperability and optimize safety. Lzip can compress
+about as fast as gzip (lzip \fB\-0\fR) or compress most files more than bzip2
+(lzip \fB\-9\fR). Decompression speed is intermediate between gzip and bzip2.
+Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip
+has been designed, written, and tested with great care to replace gzip and
+bzip2 as the standard general\-purpose compressed format for unix\-like
+systems.
.SH OPTIONS
.TP
\fB\-h\fR, \fB\-\-help\fR
@@ -100,7 +101,7 @@ To extract all the files from archive 'foo.tar.lz', use the commands
.PP
Exit status: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
-invalid input file, 3 for an internal consistency error (eg, bug) which
+invalid input file, 3 for an internal consistency error (e.g., bug) which
caused minilzip to panic.
.PP
The ideas embodied in lzlib are due to (at least) the following people:
@@ -113,9 +114,21 @@ Report bugs to lzip\-bug@nongnu.org
.br
Lzlib home page: http://www.nongnu.org/lzip/lzlib.html
.SH COPYRIGHT
-Copyright \(co 2021 Antonio Diaz Diaz.
-Using lzlib 1.12
+Copyright \(co 2022 Antonio Diaz Diaz.
+Using lzlib 1.13
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
.br
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
+.SH "SEE ALSO"
+The full documentation for
+.B minilzip
+is maintained as a Texinfo manual. If the
+.B info
+and
+.B minilzip
+programs are properly installed at your site, the command
+.IP
+.B info lzlib
+.PP
+should give you access to the complete manual.
diff --git a/encoder.c b/encoder.c
index 600a444..b76dafa 100644
--- a/encoder.c
+++ b/encoder.c
@@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
- Copyright (C) 2009-2021 Antonio Diaz Diaz.
+ Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -21,18 +21,7 @@ static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs
{
int32_t * ptr0 = e->eb.mb.pos_array + ( e->eb.mb.cyclic_pos << 1 );
int32_t * ptr1 = ptr0 + 1;
- int32_t * newptr;
- int len = 0, len0 = 0, len1 = 0;
- int maxlen = 3; /* only used if pairs != 0 */
- int num_pairs = 0;
- const int pos1 = e->eb.mb.pos + 1;
- const int min_pos = ( e->eb.mb.pos > e->eb.mb.dictionary_size ) ?
- e->eb.mb.pos - e->eb.mb.dictionary_size : 0;
- const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb );
- int count, key2, key3, key4, newpos1;
- unsigned tmp;
int len_limit = e->match_len_limit;
-
if( len_limit > Mb_available_bytes( &e->eb.mb ) )
{
e->been_flushed = true;
@@ -40,12 +29,18 @@ static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs
if( len_limit < 4 ) { *ptr0 = *ptr1 = 0; return 0; }
}
- tmp = crc32[data[0]] ^ data[1];
- key2 = tmp & ( num_prev_positions2 - 1 );
+ int maxlen = 3; /* only used if pairs != 0 */
+ int num_pairs = 0;
+ const int min_pos = ( e->eb.mb.pos > e->eb.mb.dictionary_size ) ?
+ e->eb.mb.pos - e->eb.mb.dictionary_size : 0;
+ const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb );
+
+ unsigned tmp = crc32[data[0]] ^ data[1];
+ const int key2 = tmp & ( num_prev_positions2 - 1 );
tmp ^= (unsigned)data[2] << 8;
- key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) );
- key4 = num_prev_positions2 + num_prev_positions3 +
- ( ( tmp ^ ( crc32[data[3]] << 5 ) ) & e->eb.mb.key4_mask );
+ const int key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) );
+ const int key4 = num_prev_positions2 + num_prev_positions3 +
+ ( ( tmp ^ ( crc32[data[3]] << 5 ) ) & e->eb.mb.key4_mask );
if( pairs )
{
@@ -54,7 +49,7 @@ static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs
if( np2 > min_pos && e->eb.mb.buffer[np2-1] == data[0] )
{
pairs[0].dis = e->eb.mb.pos - np2;
- pairs[0].len = maxlen = 2;
+ pairs[0].len = maxlen = 2 + ( np2 == np3 );
num_pairs = 1;
}
if( np2 != np3 && np3 > min_pos && e->eb.mb.buffer[np3-1] == data[0] )
@@ -73,19 +68,22 @@ static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs
}
}
+ const int pos1 = e->eb.mb.pos + 1;
e->eb.mb.prev_positions[key2] = pos1;
e->eb.mb.prev_positions[key3] = pos1;
- newpos1 = e->eb.mb.prev_positions[key4];
+ int newpos1 = e->eb.mb.prev_positions[key4];
e->eb.mb.prev_positions[key4] = pos1;
+ int len = 0, len0 = 0, len1 = 0;
+
+ int count;
for( count = e->cycles; ; )
{
- int delta;
if( newpos1 <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; }
if( e->been_flushed ) len = 0;
- delta = pos1 - newpos1;
- newptr = e->eb.mb.pos_array +
+ const int delta = pos1 - newpos1;
+ int32_t * const newptr = e->eb.mb.pos_array +
( ( e->eb.mb.cyclic_pos - delta +
( (e->eb.mb.cyclic_pos >= delta) ? 0 : e->eb.mb.dictionary_size + 1 ) ) << 1 );
if( data[len-delta] == data[len] )
@@ -140,7 +138,6 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e )
for( len_state = 0; len_state < len_states; ++len_state )
{
int * const dsp = e->dis_slot_prices[len_state];
- int * const dp = e->dis_prices[len_state];
const Bit_model * const bmds = e->eb.bm_dis_slot[len_state];
int slot = 0;
for( ; slot < end_dis_model; ++slot )
@@ -149,6 +146,7 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e )
dsp[slot] = price_symbol6( bmds, slot ) +
(((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift_bits );
+ int * const dp = e->dis_prices[len_state];
for( dis = 0; dis < start_dis_model; ++dis )
dp[dis] = dsp[dis];
for( ; dis < modeled_distances; ++dis )
@@ -157,7 +155,7 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e )
}
-/* Returns the number of bytes advanced (ahead).
+/* Return the number of bytes advanced (ahead).
trials[0]..trials[ahead-1] contain the steps to encode.
( trials[0].dis4 == -1 ) means literal.
A match/rep longer or equal than match_len_limit finishes the sequence.
@@ -166,9 +164,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
const int reps[num_rep_distances],
const State state )
{
- int main_len, num_pairs, i, rep, num_trials, len;
- int rep_index = 0, cur = 0;
- int replens[num_rep_distances];
+ int num_pairs, num_trials;
+ int i, rep, len;
if( e->pending_num_pairs > 0 ) /* from previous call */
{
@@ -177,8 +174,10 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
}
else
num_pairs = LZe_read_match_distances( e );
- main_len = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0;
+ const int main_len = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0;
+ int replens[num_rep_distances];
+ int rep_index = 0;
for( i = 0; i < num_rep_distances; ++i )
{
replens[i] = Mb_true_match_len( &e->eb.mb, 0, reps[i] + 1 );
@@ -200,7 +199,6 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
return main_len;
}
- {
const int pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask;
const int match_price = price1( e->eb.bm_match[state][pos_state] );
const int rep_match_price = match_price + price1( e->eb.bm_rep[state] );
@@ -238,9 +236,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
for( rep = 0; rep < num_rep_distances; ++rep )
{
- int price;
if( replens[rep] < min_match_len ) continue;
- price = rep_match_price + LZeb_price_rep( &e->eb, rep, state, pos_state );
+ const int price = rep_match_price + LZeb_price_rep( &e->eb, rep, state, pos_state );
for( len = min_match_len; len <= replens[rep]; ++len )
Tr_update( &e->trials[len], price +
Lp_price( &e->rep_len_prices, len, pos_state ), rep, 0 );
@@ -260,17 +257,10 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
if( ++len > e->pairs[i].len && ++i >= num_pairs ) break;
}
}
- }
+ int cur = 0;
while( true ) /* price optimization loop */
{
- struct Trial *cur_trial, *next_trial;
- int newlen, pos_state, triable_bytes, len_limit;
- int start_len = min_match_len;
- int next_price, match_price, rep_match_price;
- State cur_state;
- uint8_t prev_byte, cur_byte, match_byte;
-
if( !Mb_move_pos( &e->eb.mb ) ) return 0;
if( ++cur >= num_trials ) /* no more initialized trials */
{
@@ -278,8 +268,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
return cur;
}
- num_pairs = LZe_read_match_distances( e );
- newlen = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0;
+ const int num_pairs = LZe_read_match_distances( e );
+ const int newlen = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0;
if( newlen >= e->match_len_limit )
{
e->pending_num_pairs = num_pairs;
@@ -288,7 +278,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
}
/* give final values to current trial */
- cur_trial = &e->trials[cur];
+ struct Trial * cur_trial = &e->trials[cur];
+ State cur_state;
{
const int dis4 = cur_trial->dis4;
int prev_index = cur_trial->prev_index;
@@ -319,25 +310,25 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
mtf_reps( dis4, cur_trial->reps ); /* literal is ignored */
}
- pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask;
- prev_byte = Mb_peek( &e->eb.mb, 1 );
- cur_byte = Mb_peek( &e->eb.mb, 0 );
- match_byte = Mb_peek( &e->eb.mb, cur_trial->reps[0] + 1 );
+ const int pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask;
+ const uint8_t prev_byte = Mb_peek( &e->eb.mb, 1 );
+ const uint8_t cur_byte = Mb_peek( &e->eb.mb, 0 );
+ const uint8_t match_byte = Mb_peek( &e->eb.mb, cur_trial->reps[0] + 1 );
- next_price = cur_trial->price +
- price0( e->eb.bm_match[cur_state][pos_state] );
+ int next_price = cur_trial->price +
+ price0( e->eb.bm_match[cur_state][pos_state] );
if( St_is_char( cur_state ) )
next_price += LZeb_price_literal( &e->eb, prev_byte, cur_byte );
else
next_price += LZeb_price_matched( &e->eb, prev_byte, cur_byte, match_byte );
/* try last updates to next trial */
- next_trial = &e->trials[cur+1];
+ struct Trial * next_trial = &e->trials[cur+1];
Tr_update( next_trial, next_price, -1, cur ); /* literal */
- match_price = cur_trial->price + price1( e->eb.bm_match[cur_state][pos_state] );
- rep_match_price = match_price + price1( e->eb.bm_rep[cur_state] );
+ const int match_price = cur_trial->price + price1( e->eb.bm_match[cur_state][pos_state] );
+ const int rep_match_price = match_price + price1( e->eb.bm_rep[cur_state] );
if( match_byte == cur_byte && next_trial->dis4 != 0 &&
next_trial->prev_index2 == single_step_trial )
@@ -352,11 +343,11 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
}
}
- triable_bytes =
+ const int triable_bytes =
min( Mb_available_bytes( &e->eb.mb ), max_num_trials - 1 - cur );
if( triable_bytes < min_match_len ) continue;
- len_limit = min( e->match_len_limit, triable_bytes );
+ const int len_limit = min( e->match_len_limit, triable_bytes );
/* try literal + rep0 */
if( match_byte != cur_byte && next_trial->prev_index != cur )
@@ -380,19 +371,20 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
}
}
+ int start_len = min_match_len;
+
/* try rep distances */
for( rep = 0; rep < num_rep_distances; ++rep )
{
const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb );
const int dis = cur_trial->reps[rep] + 1;
- int price;
if( data[0-dis] != data[0] || data[1-dis] != data[1] ) continue;
for( len = min_match_len; len < len_limit; ++len )
if( data[len-dis] != data[len] ) break;
while( num_trials < cur + len )
e->trials[++num_trials].price = infinite_price;
- price = rep_match_price + LZeb_price_rep( &e->eb, rep, cur_state, pos_state );
+ int price = rep_match_price + LZeb_price_rep( &e->eb, rep, cur_state, pos_state );
for( i = min_match_len; i <= len; ++i )
Tr_update( &e->trials[cur+i], price +
Lp_price( &e->rep_len_prices, i, pos_state ), rep, cur );
@@ -400,17 +392,14 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
if( rep == 0 ) start_len = len + 1; /* discard shorter matches */
/* try rep + literal + rep0 */
- {
int len2 = len + 1;
const int limit = min( e->match_len_limit + len2, triable_bytes );
- int pos_state2;
- State state2;
while( len2 < limit && data[len2-dis] == data[len2] ) ++len2;
len2 -= len + 1;
if( len2 < min_match_len ) continue;
- pos_state2 = ( pos_state + len ) & pos_state_mask;
- state2 = St_set_rep( cur_state );
+ int pos_state2 = ( pos_state + len ) & pos_state_mask;
+ State state2 = St_set_rep( cur_state );
price += Lp_price( &e->rep_len_prices, len, pos_state ) +
price0( e->eb.bm_match[state2][pos_state2] ) +
LZeb_price_matched( &e->eb, data[len-1], data[len], data[len-dis] );
@@ -423,21 +412,19 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
e->trials[++num_trials].price = infinite_price;
Tr_update3( &e->trials[cur+len+1+len2], price, rep, cur + len + 1, cur );
}
- }
/* try matches */
if( newlen >= start_len && newlen <= len_limit )
{
- int dis;
const int normal_match_price = match_price +
price0( e->eb.bm_rep[cur_state] );
while( num_trials < cur + newlen )
e->trials[++num_trials].price = infinite_price;
- i = 0;
+ int i = 0;
while( e->pairs[i].len < start_len ) ++i;
- dis = e->pairs[i].dis;
+ int dis = e->pairs[i].dis;
for( len = start_len; ; ++len )
{
int price = normal_match_price + LZe_price_pair( e, dis, len, pos_state );
@@ -484,7 +471,7 @@ static bool LZe_encode_member( struct LZ_encoder * const e )
const int dis_price_count = best ? 1 : 512;
const int align_price_count = best ? 1 : dis_align_size;
const int price_count = ( e->match_len_limit > 36 ) ? 1013 : 4093;
- int ahead, i;
+ int i;
State * const state = &e->eb.state;
if( e->eb.member_finished ) return true;
@@ -494,11 +481,10 @@ static bool LZe_encode_member( struct LZ_encoder * const e )
if( Mb_data_position( &e->eb.mb ) == 0 &&
!Mb_data_finished( &e->eb.mb ) ) /* encode first byte */
{
- const uint8_t prev_byte = 0;
- uint8_t cur_byte;
if( !Mb_enough_available_bytes( &e->eb.mb ) ||
!Re_enough_free_bytes( &e->eb.renc ) ) return true;
- cur_byte = Mb_peek( &e->eb.mb, 0 );
+ const uint8_t prev_byte = 0;
+ const uint8_t cur_byte = Mb_peek( &e->eb.mb, 0 );
Re_encode_bit( &e->eb.renc, &e->eb.bm_match[*state][0], 0 );
LZeb_encode_literal( &e->eb, prev_byte, cur_byte );
CRC32_update_byte( &e->eb.crc, cur_byte );
@@ -525,7 +511,7 @@ static bool LZe_encode_member( struct LZ_encoder * const e )
Lp_update_prices( &e->rep_len_prices );
}
- ahead = LZe_sequence_optimizer( e, e->eb.reps, *state );
+ int ahead = LZe_sequence_optimizer( e, e->eb.reps, *state );
e->price_counter -= ahead;
for( i = 0; ahead > 0; )
@@ -542,14 +528,13 @@ static bool LZe_encode_member( struct LZ_encoder * const e )
const uint8_t prev_byte = Mb_peek( &e->eb.mb, ahead + 1 );
const uint8_t cur_byte = Mb_peek( &e->eb.mb, ahead );
CRC32_update_byte( &e->eb.crc, cur_byte );
- if( St_is_char( *state ) )
+ if( ( *state = St_set_char( *state ) ) < 4 )
LZeb_encode_literal( &e->eb, prev_byte, cur_byte );
else
{
const uint8_t match_byte = Mb_peek( &e->eb.mb, ahead + e->eb.reps[0] + 1 );
LZeb_encode_matched( &e->eb, prev_byte, cur_byte, match_byte );
}
- *state = St_set_char( *state );
}
else /* match or repeated match */
{
diff --git a/encoder.h b/encoder.h
index 7a7ecaf..f17bb99 100644
--- a/encoder.h
+++ b/encoder.h
@@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
- Copyright (C) 2009-2021 Antonio Diaz Diaz.
+ Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -194,10 +194,9 @@ static inline int LZeb_price_rep( const struct LZ_encoder_base * const eb,
const int rep, const State state,
const int pos_state )
{
- int price;
if( rep == 0 ) return price0( eb->bm_rep0[state] ) +
price1( eb->bm_len[state][pos_state] );
- price = price1( eb->bm_rep0[state] );
+ int price = price1( eb->bm_rep0[state] );
if( rep == 1 )
price += price0( eb->bm_rep1[state] );
else
diff --git a/encoder_base.c b/encoder_base.c
index c1ef9ef..4535352 100644
--- a/encoder_base.c
+++ b/encoder_base.c
@@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
- Copyright (C) 2009-2021 Antonio Diaz Diaz.
+ Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -47,7 +47,6 @@ static bool Mb_init( struct Matchfinder_base * const mb, const int before_size,
{
const int buffer_size_limit =
( dict_factor * dict_size ) + before_size + after_size;
- unsigned size;
int i;
mb->partial_data_pos = 0;
@@ -66,9 +65,8 @@ static bool Mb_init( struct Matchfinder_base * const mb, const int before_size,
mb->saved_dictionary_size = dict_size;
mb->dictionary_size = dict_size;
mb->pos_limit = mb->buffer_size - after_size;
- size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 );
- if( mb->dictionary_size > 1 << 26 ) /* 64 MiB */
- size >>= 1;
+ unsigned size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 );
+ if( mb->dictionary_size > 1 << 26 ) size >>= 1; /* 64 MiB */
mb->key4_mask = size - 1; /* increases with dictionary size */
size += num_prev_positions23;
mb->num_prev_positions = size;
@@ -88,8 +86,7 @@ static bool Mb_init( struct Matchfinder_base * const mb, const int before_size,
static void Mb_adjust_array( struct Matchfinder_base * const mb )
{
int size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 );
- if( mb->dictionary_size > 1 << 26 ) /* 64 MiB */
- size >>= 1;
+ if( mb->dictionary_size > 1 << 26 ) size >>= 1; /* 64 MiB */
mb->key4_mask = size - 1;
size += mb->num_prev_positions23;
mb->num_prev_positions = size;
@@ -129,21 +126,21 @@ static void Mb_reset( struct Matchfinder_base * const mb )
/* End Of Stream marker => (dis == 0xFFFFFFFFU, len == min_match_len) */
static void LZeb_try_full_flush( struct LZ_encoder_base * const eb )
{
- int i;
- const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask;
- const State state = eb->state;
- Lzip_trailer trailer;
if( eb->member_finished ||
Cb_free_bytes( &eb->renc.cb ) < max_marker_size + eb->renc.ff_count + Lt_size )
return;
eb->member_finished = true;
+ const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask;
+ const State state = eb->state;
Re_encode_bit( &eb->renc, &eb->bm_match[state][pos_state], 1 );
Re_encode_bit( &eb->renc, &eb->bm_rep[state], 0 );
LZeb_encode_pair( eb, 0xFFFFFFFFU, min_match_len, pos_state );
Re_flush( &eb->renc );
+ Lzip_trailer trailer;
Lt_set_data_crc( trailer, LZeb_crc( eb ) );
Lt_set_data_size( trailer, Mb_data_position( &eb->mb ) );
Lt_set_member_size( trailer, Re_member_position( &eb->renc ) + Lt_size );
+ int i;
for( i = 0; i < Lt_size; ++i )
Cb_put_byte( &eb->renc.cb, trailer[i] );
}
@@ -152,13 +149,13 @@ static void LZeb_try_full_flush( struct LZ_encoder_base * const eb )
/* Sync Flush marker => (dis == 0xFFFFFFFFU, len == min_match_len + 1) */
static void LZeb_try_sync_flush( struct LZ_encoder_base * const eb )
{
- const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask;
- const State state = eb->state;
const unsigned min_size = eb->renc.ff_count + max_marker_size;
if( eb->member_finished ||
Cb_free_bytes( &eb->renc.cb ) < min_size + max_marker_size ) return;
eb->mb.sync_flush_pending = false;
const unsigned long long old_mpos = Re_member_position( &eb->renc );
+ const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask;
+ const State state = eb->state;
do { /* size of markers must be >= rd_min_available_bytes + 5 */
Re_encode_bit( &eb->renc, &eb->bm_match[state][pos_state], 1 );
Re_encode_bit( &eb->renc, &eb->bm_rep[state], 0 );
diff --git a/encoder_base.h b/encoder_base.h
index e727a7d..17ffc93 100644
--- a/encoder_base.h
+++ b/encoder_base.h
@@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
- Copyright (C) 2009-2021 Antonio Diaz Diaz.
+ Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -147,10 +147,9 @@ static inline int price_bit( const Bit_model bm, const bool bit )
static inline int price_symbol3( const Bit_model bm[], int symbol )
{
- int price;
bool bit = symbol & 1;
symbol |= 8; symbol >>= 1;
- price = price_bit( bm[symbol], bit );
+ int price = price_bit( bm[symbol], bit );
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
return price + price_bit( bm[1], symbol & 1 );
}
@@ -158,10 +157,9 @@ static inline int price_symbol3( const Bit_model bm[], int symbol )
static inline int price_symbol6( const Bit_model bm[], unsigned symbol )
{
- int price;
bool bit = symbol & 1;
symbol |= 64; symbol >>= 1;
- price = price_bit( bm[symbol], bit );
+ int price = price_bit( bm[symbol], bit );
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
@@ -172,10 +170,9 @@ static inline int price_symbol6( const Bit_model bm[], unsigned symbol )
static inline int price_symbol8( const Bit_model bm[], int symbol )
{
- int price;
bool bit = symbol & 1;
symbol |= 0x100; symbol >>= 1;
- price = price_bit( bm[symbol], bit );
+ int price = price_bit( bm[symbol], bit );
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
@@ -427,10 +424,9 @@ static inline void Re_encode_bit( struct Range_encoder * const renc,
static inline void Re_encode_tree3( struct Range_encoder * const renc,
Bit_model bm[], const int symbol )
{
- int model;
bool bit = ( symbol >> 2 ) & 1;
Re_encode_bit( renc, &bm[1], bit );
- model = 2 | bit;
+ int model = 2 | bit;
bit = ( symbol >> 1 ) & 1;
Re_encode_bit( renc, &bm[model], bit ); model <<= 1; model |= bit;
Re_encode_bit( renc, &bm[model], symbol & 1 );
@@ -439,10 +435,9 @@ static inline void Re_encode_tree3( struct Range_encoder * const renc,
static inline void Re_encode_tree6( struct Range_encoder * const renc,
Bit_model bm[], const unsigned symbol )
{
- int model;
bool bit = ( symbol >> 5 ) & 1;
Re_encode_bit( renc, &bm[1], bit );
- model = 2 | bit;
+ int model = 2 | bit;
bit = ( symbol >> 4 ) & 1;
Re_encode_bit( renc, &bm[model], bit ); model <<= 1; model |= bit;
bit = ( symbol >> 3 ) & 1;
@@ -583,8 +578,7 @@ static inline int LZeb_price_matched( const struct LZ_encoder_base * const eb,
static inline void LZeb_encode_literal( struct LZ_encoder_base * const eb,
const uint8_t prev_byte, const uint8_t symbol )
- { Re_encode_tree8( &eb->renc, eb->bm_literal[get_lit_state(prev_byte)],
- symbol ); }
+ { Re_encode_tree8( &eb->renc, eb->bm_literal[get_lit_state(prev_byte)], symbol ); }
static inline void LZeb_encode_matched( struct LZ_encoder_base * const eb,
const uint8_t prev_byte, const uint8_t symbol, const uint8_t match_byte )
@@ -595,8 +589,8 @@ static inline void LZeb_encode_pair( struct LZ_encoder_base * const eb,
const unsigned dis, const int len,
const int pos_state )
{
- const unsigned dis_slot = get_slot( dis );
Re_encode_len( &eb->renc, &eb->match_len_model, len, pos_state );
+ const unsigned dis_slot = get_slot( dis );
Re_encode_tree6( &eb->renc, eb->bm_dis_slot[get_len_state(len)], dis_slot );
if( dis_slot >= start_dis_model )
diff --git a/fast_encoder.c b/fast_encoder.c
index bdcbb97..618c3d6 100644
--- a/fast_encoder.c
+++ b/fast_encoder.c
@@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
- Copyright (C) 2009-2021 Antonio Diaz Diaz.
+ Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -20,25 +20,24 @@
static int FLZe_longest_match_len( struct FLZ_encoder * const fe, int * const distance )
{
enum { len_limit = 16 };
- const uint8_t * const data = Mb_ptr_to_current_pos( &fe->eb.mb );
int32_t * ptr0 = fe->eb.mb.pos_array + fe->eb.mb.cyclic_pos;
- const int pos1 = fe->eb.mb.pos + 1;
- int maxlen = 0, newpos1, count;
const int available = min( Mb_available_bytes( &fe->eb.mb ), max_match_len );
if( available < len_limit ) { *ptr0 = 0; return 0; }
+ const uint8_t * const data = Mb_ptr_to_current_pos( &fe->eb.mb );
fe->key4 = ( ( fe->key4 << 4 ) ^ data[3] ) & fe->eb.mb.key4_mask;
- newpos1 = fe->eb.mb.prev_positions[fe->key4];
+ const int pos1 = fe->eb.mb.pos + 1;
+ int newpos1 = fe->eb.mb.prev_positions[fe->key4];
fe->eb.mb.prev_positions[fe->key4] = pos1;
+ int maxlen = 0, count;
for( count = 4; ; )
{
- int32_t * newptr;
int delta;
if( newpos1 <= 0 || --count < 0 ||
( delta = pos1 - newpos1 ) > fe->eb.mb.dictionary_size )
{ *ptr0 = 0; break; }
- newptr = fe->eb.mb.pos_array +
+ int32_t * const newptr = fe->eb.mb.pos_array +
( fe->eb.mb.cyclic_pos - delta +
( ( fe->eb.mb.cyclic_pos >= delta ) ? 0 : fe->eb.mb.dictionary_size + 1 ) );
@@ -71,11 +70,10 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe )
if( Mb_data_position( &fe->eb.mb ) == 0 &&
!Mb_data_finished( &fe->eb.mb ) ) /* encode first byte */
{
- const uint8_t prev_byte = 0;
- uint8_t cur_byte;
if( !Mb_enough_available_bytes( &fe->eb.mb ) ||
!Re_enough_free_bytes( &fe->eb.renc ) ) return true;
- cur_byte = Mb_peek( &fe->eb.mb, 0 );
+ const uint8_t prev_byte = 0;
+ const uint8_t cur_byte = Mb_peek( &fe->eb.mb, 0 );
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[*state][0], 0 );
LZeb_encode_literal( &fe->eb, prev_byte, cur_byte );
CRC32_update_byte( &fe->eb.crc, cur_byte );
@@ -86,13 +84,12 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe )
while( !Mb_data_finished( &fe->eb.mb ) &&
Re_member_position( &fe->eb.renc ) < fe->eb.member_size_limit )
{
- int match_distance = 0; /* avoid warning from gcc 6.1.0 */
- int main_len, pos_state;
- int len = 0;
if( !Mb_enough_available_bytes( &fe->eb.mb ) ||
!Re_enough_free_bytes( &fe->eb.renc ) ) return true;
- main_len = FLZe_longest_match_len( fe, &match_distance );
- pos_state = Mb_data_position( &fe->eb.mb ) & pos_state_mask;
+ int match_distance = 0; /* avoid warning from gcc 6.1.0 */
+ const int main_len = FLZe_longest_match_len( fe, &match_distance );
+ const int pos_state = Mb_data_position( &fe->eb.mb ) & pos_state_mask;
+ int len = 0;
for( i = 0; i < num_rep_distances; ++i )
{
@@ -109,11 +106,10 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe )
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_len[*state][pos_state], 1 );
else
{
- int distance;
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep1[*state], rep > 1 );
if( rep > 1 )
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep2[*state], rep > 2 );
- distance = fe->eb.reps[rep];
+ const int distance = fe->eb.reps[rep];
for( i = rep; i > 0; --i ) fe->eb.reps[i] = fe->eb.reps[i-1];
fe->eb.reps[0] = distance;
}
@@ -138,7 +134,6 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe )
continue;
}
- {
const uint8_t prev_byte = Mb_peek( &fe->eb.mb, 1 );
const uint8_t cur_byte = Mb_peek( &fe->eb.mb, 0 );
const uint8_t match_byte = Mb_peek( &fe->eb.mb, fe->eb.reps[0] + 1 );
@@ -169,12 +164,10 @@ static bool FLZe_encode_member( struct FLZ_encoder * const fe )
/* literal byte */
Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[*state][pos_state], 0 );
- if( St_is_char( *state ) )
+ if( ( *state = St_set_char( *state ) ) < 4 )
LZeb_encode_literal( &fe->eb, prev_byte, cur_byte );
else
LZeb_encode_matched( &fe->eb, prev_byte, cur_byte, match_byte );
- *state = St_set_char( *state );
- }
}
LZeb_try_full_flush( &fe->eb );
diff --git a/fast_encoder.h b/fast_encoder.h
index 1c3a6ff..54756bd 100644
--- a/fast_encoder.h
+++ b/fast_encoder.h
@@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
- Copyright (C) 2009-2021 Antonio Diaz Diaz.
+ Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
diff --git a/ffexample.c b/ffexample.c
index 2891f02..59345ee 100644
--- a/ffexample.c
+++ b/ffexample.c
@@ -1,5 +1,5 @@
/* File to file example - Test program for the library lzlib
- Copyright (C) 2010-2021 Antonio Diaz Diaz.
+ Copyright (C) 2010-2022 Antonio Diaz Diaz.
This program is free software: you have unlimited permission
to copy, distribute, and modify it.
@@ -20,7 +20,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
+#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__
#include <fcntl.h>
#include <io.h>
#endif
@@ -178,9 +178,9 @@ int ffmmcompress( FILE * const infile, FILE * const outfile )
}
-/* Compresses 'infile' to 'outfile' as a multimember stream with one member
+/* Compress 'infile' to 'outfile' as a multimember stream with one member
for each line of text terminated by a newline character or by EOF.
- Returns 0 if success, 1 if error.
+ Return 0 if success, 1 if error.
*/
int fflfcompress( struct LZ_Encoder * const encoder,
FILE * const infile, FILE * const outfile )
@@ -219,7 +219,7 @@ int fflfcompress( struct LZ_Encoder * const encoder,
}
-/* Decompresses 'infile' to 'outfile' with automatic resynchronization to
+/* Decompress 'infile' to 'outfile' with automatic resynchronization to
next member in case of data error, including the automatic removal of
leading garbage.
*/
@@ -257,7 +257,7 @@ int ffrsdecompress( struct LZ_Decoder * const decoder,
int main( const int argc, const char * const argv[] )
{
-#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
+#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__
setmode( STDIN_FILENO, O_BINARY );
setmode( STDOUT_FILENO, O_BINARY );
#endif
diff --git a/lzcheck.c b/lzcheck.c
index 7e00e6c..88dd4c9 100644
--- a/lzcheck.c
+++ b/lzcheck.c
@@ -1,5 +1,5 @@
/* Lzcheck - Test program for the library lzlib
- Copyright (C) 2009-2021 Antonio Diaz Diaz.
+ Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you have unlimited permission
to copy, distribute, and modify it.
@@ -133,10 +133,11 @@ static void xclose_decoder( struct LZ_Decoder * const decoder,
}
-/* Returns the next (usually newline-terminated) chunk of data from file.
+/* Return the next (usually newline-terminated) chunk of data from file.
The size returned in *sizep is always <= buffer_size.
- If sizep is a null pointer, rewinds the file, resets state, and returns.
- If file is at EOF, returns an empty line. */
+ If sizep is a null pointer, rewind the file, reset state, and return.
+ If file is at EOF, return an empty line.
+*/
static const uint8_t * next_line( FILE * const file, int * const sizep )
{
static int l = 0;
@@ -332,7 +333,7 @@ int main( const int argc, const char * const argv[] )
if( argc < 2 )
{
- fputs( "Usage: lzcheck filename.txt...\n", stderr );
+ fputs( "Usage: lzcheck [-m|-s] filename.txt...\n", stderr );
return 1;
}
diff --git a/lzip.h b/lzip.h
index 3e3df2c..aad7e00 100644
--- a/lzip.h
+++ b/lzip.h
@@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
- Copyright (C) 2009-2021 Antonio Diaz Diaz.
+ Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -168,6 +168,7 @@ static const uint32_t crc32[256] =
static inline void CRC32_update_byte( uint32_t * const crc, const uint8_t byte )
{ *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); }
+/* about as fast as it is possible without messing with endianness */
static inline void CRC32_update_buf( uint32_t * const crc,
const uint8_t * const buffer,
const int size )
diff --git a/lzlib.c b/lzlib.c
index b7969f2..3084fbd 100644
--- a/lzlib.c
+++ b/lzlib.c
@@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
- Copyright (C) 2009-2021 Antonio Diaz Diaz.
+ Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
diff --git a/lzlib.h b/lzlib.h
index b6374d8..ba29f97 100644
--- a/lzlib.h
+++ b/lzlib.h
@@ -1,5 +1,5 @@
/* Lzlib - Compression library for the lzip format
- Copyright (C) 2009-2021 Antonio Diaz Diaz.
+ Copyright (C) 2009-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -24,9 +24,9 @@ extern "C" {
/* LZ_API_VERSION was first defined in lzlib 1.8 to 1.
Since lzlib 1.12, LZ_API_VERSION is defined as (major * 1000 + minor). */
-#define LZ_API_VERSION 1012
+#define LZ_API_VERSION 1013
-static const char * const LZ_version_string = "1.12";
+static const char * const LZ_version_string = "1.13";
enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error,
LZ_sequence_error, LZ_header_error, LZ_unexpected_eof,
diff --git a/main.c b/minilzip.c
index c623d9b..f9313b2 100644
--- a/main.c
+++ b/minilzip.c
@@ -1,5 +1,5 @@
/* Minilzip - Test program for the library lzlib
- Copyright (C) 2009-2021 Antonio Diaz Diaz.
+ Copyright (C) 2009-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,11 +18,12 @@
Exit status: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error
- (eg, bug) which caused minilzip to panic.
+ (e.g., bug) which caused minilzip to panic.
*/
#define _FILE_OFFSET_BITS 64
+#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
@@ -35,9 +36,9 @@
#include <unistd.h>
#include <utime.h>
#include <sys/stat.h>
-#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
+#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__
#include <io.h>
-#if defined(__MSVCRT__)
+#if defined __MSVCRT__
#define fchmod(x,y) 0
#define fchown(x,y,z) 0
#define strtoull strtoul
@@ -50,7 +51,7 @@
#define S_IWOTH 0
#endif
#endif
-#if defined(__DJGPP__)
+#if defined __DJGPP__
#define S_ISSOCK(x) 0
#define S_ISVTX 0
#endif
@@ -67,6 +68,11 @@
#error "Environments where CHAR_BIT != 8 are not supported."
#endif
+#if ( defined SIZE_MAX && SIZE_MAX < UINT_MAX ) || \
+ ( defined SSIZE_MAX && SSIZE_MAX < INT_MAX )
+#error "Environments where 'size_t' is narrower than 'int' are not supported."
+#endif
+
#ifndef max
#define max(x,y) ((x) >= (y) ? (x) : (y))
#endif
@@ -85,7 +91,7 @@ static const char * const mem_msg = "Not enough memory.";
int verbosity = 0;
static const char * const program_name = "minilzip";
-static const char * const program_year = "2021";
+static const char * const program_year = "2022";
static const char * invocation_name = "minilzip"; /* default value */
static const struct { const char * from; const char * to; } known_extensions[] = {
@@ -114,13 +120,14 @@ static void show_help( void )
"compatible with lzip 1.4 or newer.\n"
"\nLzip is a lossless data compressor with a user interface similar to the one\n"
"of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n"
- "chain-Algorithm' (LZMA) stream format, chosen to maximize safety and\n"
- "interoperability. Lzip can compress about as fast as gzip (lzip -0) or\n"
- "compress most files more than bzip2 (lzip -9). Decompression speed is\n"
- "intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 from\n"
- "a data recovery perspective. Lzip has been designed, written, and tested\n"
- "with great care to replace gzip and bzip2 as the standard general-purpose\n"
- "compressed format for unix-like systems.\n"
+ "chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity\n"
+ "checking to maximize interoperability and optimize safety. Lzip can compress\n"
+ "about as fast as gzip (lzip -0) or compress most files more than bzip2\n"
+ "(lzip -9). Decompression speed is intermediate between gzip and bzip2.\n"
+ "Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip\n"
+ "has been designed, written, and tested with great care to replace gzip and\n"
+ "bzip2 as the standard general-purpose compressed format for unix-like\n"
+ "systems.\n"
"\nUsage: %s [options] [files]\n", invocation_name );
printf( "\nOptions:\n"
" -h, --help display this help and exit\n"
@@ -158,7 +165,7 @@ static void show_help( void )
"'tar -xf foo.tar.lz' or 'minilzip -cd foo.tar.lz | tar -xf -'.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
- "invalid input file, 3 for an internal consistency error (eg, bug) which\n"
+ "invalid input file, 3 for an internal consistency error (e.g., bug) which\n"
"caused minilzip to panic.\n"
"\nThe ideas embodied in lzlib are due to (at least) the following people:\n"
"Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the\n"
@@ -181,17 +188,48 @@ static void show_version( void )
}
-int check_lib()
+static inline void set_retval( int * retval, const int new_val )
+ { if( *retval < new_val ) *retval = new_val; }
+
+
+static int check_lzlib_ver() /* <major>.<minor> or <major>.<minor>[a-z.-]* */
{
- bool warning = false;
+#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012
+ const unsigned char * p = (unsigned char *)LZ_version_string;
+ unsigned major = 0, minor = 0;
+ while( major < 100000 && isdigit( *p ) )
+ { major *= 10; major += *p - '0'; ++p; }
+ if( *p == '.' ) ++p;
+ else
+out: { show_error( "Invalid LZ_version_string in lzlib.h", 0, false ); return 2; }
+ while( minor < 100 && isdigit( *p ) )
+ { minor *= 10; minor += *p - '0'; ++p; }
+ if( *p && *p != '-' && *p != '.' && !islower( *p ) ) goto out;
+ const unsigned version = major * 1000 + minor;
+ if( LZ_API_VERSION != version )
+ {
+ if( verbosity >= 0 )
+ fprintf( stderr, "%s: Version mismatch in lzlib.h: "
+ "LZ_API_VERSION = %u, should be %u.\n",
+ program_name, LZ_API_VERSION, version );
+ return 2;
+ }
+#endif
+ return 0;
+ }
+
+
+static int check_lib()
+ {
+ int retval = check_lzlib_ver();
if( strcmp( LZ_version_string, LZ_version() ) != 0 )
- { warning = true;
+ { set_retval( &retval, 1 );
if( verbosity >= 0 )
printf( "warning: LZ_version_string != LZ_version() (%s vs %s)\n",
LZ_version_string, LZ_version() ); }
#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012
if( LZ_API_VERSION != LZ_api_version() )
- { warning = true;
+ { set_retval( &retval, 1 );
if( verbosity >= 0 )
printf( "warning: LZ_API_VERSION != LZ_api_version() (%u vs %u)\n",
LZ_API_VERSION, LZ_api_version() ); }
@@ -208,7 +246,7 @@ int check_lib()
"Using an unknown LZ_API_VERSION\n", LZ_API_VERSION );
#endif
}
- return warning;
+ return retval;
}
@@ -234,8 +272,6 @@ struct Pretty_print
static void Pp_init( struct Pretty_print * const pp,
const char * const filenames[], const int num_filenames )
{
- unsigned stdin_name_len;
- int i;
pp->name = 0;
pp->padded_name = 0;
pp->stdin_name = "(stdin)";
@@ -243,7 +279,8 @@ static void Pp_init( struct Pretty_print * const pp,
pp->first_post = false;
if( verbosity <= 0 ) return;
- stdin_name_len = strlen( pp->stdin_name );
+ const unsigned stdin_name_len = strlen( pp->stdin_name );
+ int i;
for( i = 0; i < num_filenames; ++i )
{
const char * const s = filenames[i];
@@ -277,16 +314,14 @@ static void Pp_reset( struct Pretty_print * const pp )
static void Pp_show_msg( struct Pretty_print * const pp, const char * const msg )
{
- if( verbosity >= 0 )
+ if( verbosity < 0 ) return;
+ if( pp->first_post )
{
- if( pp->first_post )
- {
- pp->first_post = false;
- fputs( pp->padded_name, stderr );
- if( !msg ) fflush( stderr );
- }
- if( msg ) fprintf( stderr, "%s\n", msg );
+ pp->first_post = false;
+ fputs( pp->padded_name, stderr );
+ if( !msg ) fflush( stderr );
}
+ if( msg ) fprintf( stderr, "%s\n", msg );
}
@@ -307,17 +342,53 @@ static void show_header( const unsigned dictionary_size )
}
-static unsigned long long getnum( const char * const ptr,
+/* separate large numbers >= 100_000 in groups of 3 digits using '_' */
+static const char * format_num3( unsigned long long num )
+ {
+ const char * const si_prefix = "kMGTPEZY";
+ const char * const binary_prefix = "KMGTPEZY";
+ enum { buffers = 8, bufsize = 4 * sizeof (long long) };
+ static char buffer[buffers][bufsize]; /* circle of static buffers for printf */
+ static int current = 0;
+ int i;
+ char * const buf = buffer[current++]; current %= buffers;
+ char * p = buf + bufsize - 1; /* fill the buffer backwards */
+ *p = 0; /* terminator */
+ if( num > 1024 )
+ {
+ char prefix = 0; /* try binary first, then si */
+ for( i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i )
+ { num /= 1024; prefix = binary_prefix[i]; }
+ if( prefix ) *(--p) = 'i';
+ else
+ for( i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i )
+ { num /= 1000; prefix = si_prefix[i]; }
+ if( prefix ) *(--p) = prefix;
+ }
+ const bool split = num >= 100000;
+
+ for( i = 0; ; )
+ {
+ *(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break;
+ if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; }
+ }
+ return p;
+ }
+
+
+static unsigned long long getnum( const char * const arg,
+ const char * const option_name,
const unsigned long long llimit,
const unsigned long long ulimit )
{
- unsigned long long result;
char * tail;
errno = 0;
- result = strtoull( ptr, &tail, 0 );
- if( tail == ptr )
+ unsigned long long result = strtoull( arg, &tail, 0 );
+ if( tail == arg )
{
- show_error( "Bad or missing numerical argument.", 0, true );
+ if( verbosity >= 0 )
+ fprintf( stderr, "%s: Bad or missing numerical argument in "
+ "option '%s'.\n", program_name, option_name );
exit( 1 );
}
@@ -340,7 +411,9 @@ static unsigned long long getnum( const char * const ptr,
}
if( exponent <= 0 )
{
- show_error( "Bad multiplier in numerical argument.", 0, true );
+ if( verbosity >= 0 )
+ fprintf( stderr, "%s: Bad multiplier in numerical argument of "
+ "option '%s'.\n", program_name, option_name );
exit( 1 );
}
for( i = 0; i < exponent; ++i )
@@ -352,23 +425,25 @@ static unsigned long long getnum( const char * const ptr,
if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
if( errno )
{
- show_error( "Numerical argument out of limits.", 0, false );
+ if( verbosity >= 0 )
+ fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] "
+ "in option '%s'.\n", program_name, format_num3( llimit ),
+ format_num3( ulimit ), option_name );
exit( 1 );
}
return result;
}
-static int get_dict_size( const char * const arg )
+static int get_dict_size( const char * const arg, const char * const option_name )
{
char * tail;
- int dictionary_size;
const long bits = strtol( arg, &tail, 0 );
if( bits >= LZ_min_dictionary_bits() &&
bits <= LZ_max_dictionary_bits() && *tail == 0 )
return 1 << bits;
- dictionary_size = getnum( arg, LZ_min_dictionary_size(),
- LZ_max_dictionary_size() );
+ int dictionary_size = getnum( arg, option_name, LZ_min_dictionary_size(),
+ LZ_max_dictionary_size() );
if( dictionary_size == 65535 ) ++dictionary_size; /* no fast encoder */
return dictionary_size;
}
@@ -442,34 +517,31 @@ static int open_instream( const char * const name, struct stat * const in_statsp
const enum Mode program_mode, const int eindex,
const bool one_to_one, const bool recompress )
{
- int infd = -1;
if( program_mode == m_compress && !recompress && eindex >= 0 )
{
if( verbosity >= 0 )
fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n",
program_name, name, known_extensions[eindex].from );
+ return -1;
}
+ int infd = open( name, O_RDONLY | O_BINARY );
+ if( infd < 0 )
+ show_file_error( name, "Can't open input file", errno );
else
{
- infd = open( name, O_RDONLY | O_BINARY );
- if( infd < 0 )
- show_file_error( name, "Can't open input file", errno );
- else
+ const int i = fstat( infd, in_statsp );
+ const mode_t mode = in_statsp->st_mode;
+ const bool can_read = ( i == 0 &&
+ ( S_ISBLK( mode ) || S_ISCHR( mode ) ||
+ S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
+ if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) )
{
- const int i = fstat( infd, in_statsp );
- const mode_t mode = in_statsp->st_mode;
- const bool can_read = ( i == 0 &&
- ( S_ISBLK( mode ) || S_ISCHR( mode ) ||
- S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
- if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) )
- {
- if( verbosity >= 0 )
- fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
- program_name, name, ( can_read && one_to_one ) ?
- ",\n and neither '-c' nor '-o' were specified" : "" );
- close( infd );
- infd = -1;
- }
+ if( verbosity >= 0 )
+ fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
+ program_name, name, ( can_read && one_to_one ) ?
+ ",\n and neither '-c' nor '-o' were specified" : "" );
+ close( infd );
+ infd = -1;
}
}
return infd;
@@ -532,10 +604,6 @@ static void signal_handler( int sig )
}
-static inline void set_retval( int * retval, const int new_val )
- { if( *retval < new_val ) *retval = new_val; }
-
-
static bool check_tty_in( const char * const input_filename, const int infd,
const enum Mode program_mode, int * const retval )
{
@@ -543,7 +611,7 @@ static bool check_tty_in( const char * const input_filename, const int infd,
isatty( infd ) ) /* for example /dev/tty */
{ show_file_error( input_filename,
"I won't read compressed data from a terminal.", 0 );
- close( infd ); set_retval( retval, 1 );
+ close( infd ); set_retval( retval, 2 );
if( program_mode != m_test ) cleanup_and_fail( *retval );
return false; }
return true;
@@ -594,8 +662,8 @@ static void close_and_set_permissions( const struct stat * const in_statsp )
}
-/* Returns the number of bytes really read.
- If (returned value < size) and (errno == 0), means EOF was reached.
+/* Return the number of bytes really read.
+ If (value returned < size) and (errno == 0), means EOF was reached.
*/
static int readblock( const int fd, uint8_t * const buf, const int size )
{
@@ -613,8 +681,8 @@ static int readblock( const int fd, uint8_t * const buf, const int size )
}
-/* Returns the number of bytes really written.
- If (returned value < size), it is always an error.
+/* Return the number of bytes really written.
+ If (value returned < size), it is always an error.
*/
static int writeblock( const int fd, const uint8_t * const buf, const int size )
{
@@ -659,7 +727,7 @@ static int do_compress( struct LZ_Encoder * const encoder,
while( true )
{
- int in_size = 0, out_size;
+ int in_size = 0;
while( LZ_compress_write_size( encoder ) > 0 )
{
const int size = min( LZ_compress_write_size( encoder ), buffer_size );
@@ -675,7 +743,7 @@ static int do_compress( struct LZ_Encoder * const encoder,
/* else LZ_compress_sync_flush( encoder ); */
in_size += rd;
}
- out_size = LZ_compress_read( encoder, buffer, buffer_size );
+ const int out_size = LZ_compress_read( encoder, buffer, buffer_size );
if( out_size < 0 )
{
Pp_show_msg( pp, 0 );
@@ -843,7 +911,7 @@ static int do_decompress( struct LZ_Decoder * const decoder, const int infd,
fputs( testing ? "ok\n" : "done\n", stderr ); Pp_reset( pp );
}
}
- first_member = false;
+ first_member = false; /* member decompressed successfully */
}
if( rd <= 0 ) break;
}
@@ -985,23 +1053,15 @@ int main( const int argc, const char * const argv[] )
unsigned long long member_size = max_member_size;
unsigned long long volume_size = 0;
const char * default_output_filename = "";
- static struct Arg_parser parser; /* static because valgrind complains */
- static struct Pretty_print pp; /* and memory management in C sucks */
- static const char ** filenames = 0;
- int num_filenames = 0;
enum Mode program_mode = m_compress;
- int argind = 0;
- int failed_tests = 0;
- int retval = 0;
int i;
- bool filenames_given = false;
bool force = false;
bool ignore_trailing = true;
bool keep_input_files = false;
bool loose_trailing = false;
bool recompress = false;
- bool stdin_used = false;
bool to_stdout = false;
+ if( argc > 0 ) invocation_name = argv[0];
enum { opt_chk = 256, opt_lt };
const struct ap_Option options[] =
@@ -1037,25 +1097,27 @@ int main( const int argc, const char * const argv[] )
{ opt_lt, "loose-trailing", ap_no },
{ 0, 0, ap_no } };
- if( argc > 0 ) invocation_name = argv[0];
-
+ /* static because valgrind complains and memory management in C sucks */
+ static struct Arg_parser parser;
if( !ap_init( &parser, argc, argv, options, 0 ) )
{ show_error( mem_msg, 0, false ); return 1; }
if( ap_error( &parser ) ) /* bad option */
{ show_error( ap_error( &parser ), 0, true ); return 1; }
+ int argind = 0;
for( ; argind < ap_arguments( &parser ); ++argind )
{
const int code = ap_code( &parser, argind );
- const char * const arg = ap_argument( &parser, argind );
if( !code ) break; /* no more options */
+ const char * const pn = ap_parsed_name( &parser, argind );
+ const char * const arg = ap_argument( &parser, argind );
switch( code )
{
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
encoder_options = option_mapping[code-'0']; break;
case 'a': ignore_trailing = false; break;
- case 'b': member_size = getnum( arg, 100000, max_member_size ); break;
+ case 'b': member_size = getnum( arg, pn, 100000, max_member_size ); break;
case 'c': to_stdout = true; break;
case 'd': set_mode( &program_mode, m_decompress ); break;
case 'f': force = true; break;
@@ -1063,15 +1125,15 @@ int main( const int argc, const char * const argv[] )
case 'h': show_help(); return 0;
case 'k': keep_input_files = true; break;
case 'm': encoder_options.match_len_limit =
- getnum( arg, LZ_min_match_len_limit(),
- LZ_max_match_len_limit() ); break;
+ getnum( arg, pn, LZ_min_match_len_limit(),
+ LZ_max_match_len_limit() ); break;
case 'n': break;
case 'o': if( strcmp( arg, "-" ) == 0 ) to_stdout = true;
else { default_output_filename = arg; } break;
case 'q': verbosity = -1; break;
- case 's': encoder_options.dictionary_size = get_dict_size( arg );
+ case 's': encoder_options.dictionary_size = get_dict_size( arg, pn );
break;
- case 'S': volume_size = getnum( arg, 100000, max_volume_size ); break;
+ case 'S': volume_size = getnum( arg, pn, 100000, max_volume_size ); break;
case 't': set_mode( &program_mode, m_test ); break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0;
@@ -1096,15 +1158,17 @@ int main( const int argc, const char * const argv[] )
if( strcmp( LZ_version_string, LZ_version() ) != 0 ) show_error(
"warning: wrong library version_string. Try --check-lib.", 0, false );
-#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
+#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__
setmode( STDIN_FILENO, O_BINARY );
setmode( STDOUT_FILENO, O_BINARY );
#endif
- num_filenames = max( 1, ap_arguments( &parser ) - argind );
+ static const char ** filenames = 0;
+ int num_filenames = max( 1, ap_arguments( &parser ) - argind );
filenames = resize_buffer( filenames, num_filenames * sizeof filenames[0] );
filenames[0] = "-";
+ bool filenames_given = false;
for( i = 0; argind + i < ap_arguments( &parser ); ++i )
{
filenames[i] = ap_argument( &parser, argind + i );
@@ -1133,16 +1197,18 @@ int main( const int argc, const char * const argv[] )
if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) )
set_signals( signal_handler );
+ static struct Pretty_print pp;
Pp_init( &pp, filenames, num_filenames );
+ int failed_tests = 0;
+ int retval = 0;
const bool one_to_one = !to_stdout && program_mode != m_test && !to_file;
+ bool stdin_used = false;
for( i = 0; i < num_filenames; ++i )
{
const char * input_filename = "";
int infd;
- int tmp;
struct stat in_stats;
- const struct stat * in_statsp;
Pp_set_name( &pp, filenames[i] );
if( strcmp( filenames[i], "-" ) == 0 )
@@ -1184,7 +1250,9 @@ int main( const int argc, const char * const argv[] )
return 1; /* check tty only once and don't try to delete a tty */
}
- in_statsp = ( input_filename[0] && one_to_one ) ? &in_stats : 0;
+ const struct stat * const in_statsp =
+ ( input_filename[0] && one_to_one ) ? &in_stats : 0;
+ int tmp;
if( program_mode == m_compress )
tmp = compress( member_size, volume_size, infd, &encoder_options, &pp,
in_statsp );
diff --git a/testsuite/check.sh b/testsuite/check.sh
index 44e2428..e93697e 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -1,6 +1,6 @@
#! /bin/sh
# check script for Lzlib - Compression library for the lzip format
-# Copyright (C) 2009-2021 Antonio Diaz Diaz.
+# Copyright (C) 2009-2022 Antonio Diaz Diaz.
#
# This script is free software: you have unlimited permission
# to copy, distribute, and modify it.
@@ -39,7 +39,8 @@ fox_lz="${testdir}"/fox.lz
fail=0
test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
-"${LZIP}" --check-lib # just print warning
+"${LZIP}" --check-lib # just print warning
+[ $? != 2 ] || { test_failed $LINENO ; exit 2 ; } # unless bad lzlib.h
printf "testing lzlib-%s..." "$2"
"${LZIP}" -fkqm4 in
@@ -99,6 +100,7 @@ done
printf "LZIP\001-.............................." | "${LZIP}" -t 2> /dev/null
printf "LZIP\002-.............................." | "${LZIP}" -t 2> /dev/null
printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null
+rm -f out || framework_failure
printf "\ntesting decompression..."
@@ -118,25 +120,28 @@ done
lines=$("${LZIP}" -tvv "${in_em}" 2>&1 | wc -l) || test_failed $LINENO
[ "${lines}" -eq 8 ] || test_failed $LINENO "${lines}"
+"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO
cat "${in_lz}" > copy.lz || framework_failure
"${LZIP}" -dk copy.lz || test_failed $LINENO
cmp in copy || test_failed $LINENO
-printf "to be overwritten" > copy || framework_failure
-"${LZIP}" -d copy.lz 2> /dev/null
+cat fox > copy || framework_failure
+cat "${in_lz}" > out.lz || framework_failure
+rm -f out || framework_failure
+"${LZIP}" -d copy.lz out.lz 2> /dev/null # skip copy, decompress out
[ $? = 1 ] || test_failed $LINENO
+cmp fox copy || test_failed $LINENO
+cmp in out || test_failed $LINENO
"${LZIP}" -df copy.lz || test_failed $LINENO
[ ! -e copy.lz ] || test_failed $LINENO
cmp in copy || test_failed $LINENO
+rm -f copy out || framework_failure
-rm -f copy || framework_failure
cat "${in_lz}" > copy.lz || framework_failure
"${LZIP}" -d -S100k copy.lz || test_failed $LINENO # ignore -S
[ ! -e copy.lz ] || test_failed $LINENO
cmp in copy || test_failed $LINENO
printf "to be overwritten" > copy || framework_failure
-"${LZIP}" -d -o copy < "${in_lz}" 2> /dev/null
-[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO
cmp in copy || test_failed $LINENO
rm -f out copy || framework_failure
@@ -160,7 +165,7 @@ rm -f copy anyothername.out || framework_failure
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -cdq in "${in_lz}" > copy
[ $? = 2 ] || test_failed $LINENO
-cat copy in | cmp in - || test_failed $LINENO
+cat copy in | cmp in - || test_failed $LINENO # copy must be empty
"${LZIP}" -cdq nx_file.lz "${in_lz}" > copy
[ $? = 1 ] || test_failed $LINENO
cmp in copy || test_failed $LINENO
@@ -381,7 +386,6 @@ for i in fox_v2.lz fox_s11.lz fox_de20.lz \
[ $? = 2 ] || test_failed $LINENO $i
done
-"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO
for i in fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do
"${LZIP}" -cdq "${testdir}"/$i > out
[ $? = 2 ] || test_failed $LINENO $i