diff options
author | Daniel Baumann <mail@daniel-baumann.ch> | 2015-11-07 14:06:17 +0000 |
---|---|---|
committer | Daniel Baumann <mail@daniel-baumann.ch> | 2015-11-07 14:06:17 +0000 |
commit | ed6378d3594ab8317f2e510a3c66391c9b13d3c7 (patch) | |
tree | 8f14fb35f13658ee640e78ce4c6e481cb5c7ebcc | |
parent | Adding upstream version 1.6. (diff) | |
download | lzlib-ed6378d3594ab8317f2e510a3c66391c9b13d3c7.tar.xz lzlib-ed6378d3594ab8317f2e510a3c66391c9b13d3c7.zip |
Adding upstream version 1.7~pre1.upstream/1.7_pre1
Signed-off-by: Daniel Baumann <mail@daniel-baumann.ch>
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | INSTALL | 6 | ||||
-rw-r--r-- | Makefile.in | 33 | ||||
-rw-r--r-- | NEWS | 49 | ||||
-rw-r--r-- | README | 23 | ||||
-rw-r--r-- | bbexample.c | 4 | ||||
-rw-r--r-- | carg_parser.c | 2 | ||||
-rw-r--r-- | carg_parser.h | 2 | ||||
-rw-r--r-- | cbuffer.c | 4 | ||||
-rwxr-xr-x | configure | 6 | ||||
-rw-r--r-- | decoder.c | 4 | ||||
-rw-r--r-- | decoder.h | 3 | ||||
-rw-r--r-- | doc/lzlib.info | 101 | ||||
-rw-r--r-- | doc/lzlib.texi | 85 | ||||
-rw-r--r-- | doc/minilzip.1 | 13 | ||||
-rw-r--r-- | encoder.c | 436 | ||||
-rw-r--r-- | encoder.h | 662 | ||||
-rw-r--r-- | encoder_base.c | 192 | ||||
-rw-r--r-- | encoder_base.h | 587 | ||||
-rw-r--r-- | fast_encoder.c | 203 | ||||
-rw-r--r-- | fast_encoder.h | 82 | ||||
-rw-r--r-- | lzcheck.c | 46 | ||||
-rw-r--r-- | lzip.h | 2 | ||||
-rw-r--r-- | lzlib.c | 152 | ||||
-rw-r--r-- | lzlib.h | 4 | ||||
-rw-r--r-- | main.c | 92 | ||||
-rwxr-xr-x | testsuite/check.sh | 2 |
27 files changed, 1626 insertions, 1178 deletions
@@ -1,3 +1,10 @@ +2015-02-24 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.7-pre1 released. + * Ported fast encoder and option '-0' from lzip. + * If open-->write-->finish, produce same dictionary size as lzip. + * Makefile.in: Added new targets 'install*-compress'. + 2014-08-27 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.6 released. @@ -146,7 +153,7 @@ * Version 0.1 released. -Copyright (C) 2009-2014 Antonio Diaz Diaz. +Copyright (C) 2009-2015 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and @@ -32,6 +32,10 @@ the main archive. 5. Type 'make install' to install the library and any data files and documentation. (You might have to run ldconfig also). + Or type 'make install-compress', which additionally compresses the + info manual and the man page after installation. (Installing + compressed docs may become the default in the future). + You can install only the library, the info manual or the man page by typing 'make install-bin', 'make install-info' or 'make install-man' respectively. @@ -58,7 +62,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2009-2014 Antonio Diaz Diaz. +Copyright (C) 2009-2015 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Makefile.in b/Makefile.in index cabff9a..81e029b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -11,7 +11,9 @@ SHELL = /bin/sh objs = carg_parser.o main.o -.PHONY : all install install-bin install-info install-man install-strip \ +.PHONY : all install install-bin install-info install-man \ + install-strip install-compress install-strip-compress \ + install-bin-strip install-info-compress install-man-compress \ install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \ doc info man check dist clean distclean @@ -29,9 +31,6 @@ $(progname) : $(objs) lib$(libname).a $(progname)_shared : $(objs) lib$(libname).so.$(pkgversion) $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs) lib$(libname).so.$(pkgversion) -$(progname)_profiled : $(objs) lib$(libname).a - $(CC) $(CFLAGS) $(LDFLAGS) -pg -o $@ $(objs) lib$(libname).a - bbexample : bbexample.o lib$(libname).a $(CC) $(CFLAGS) $(LDFLAGS) -o $@ bbexample.o lib$(libname).a @@ -47,7 +46,8 @@ lzlib_sh.o : lzlib.c %.o : %.c $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $< -lzdeps = lzlib.h lzip.h cbuffer.c decoder.h decoder.c encoder.h encoder.c +lzdeps = lzlib.h lzip.h cbuffer.c decoder.h decoder.c encoder_base.h \ + encoder_base.c encoder.h encoder.c fast_encoder.h fast_encoder.c $(objs) : Makefile carg_parser.o : carg_parser.h @@ -77,6 +77,9 @@ check : $(progname) bbexample lzcheck @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion) install : install-bin install-info +install-strip : install-bin-strip install-info +install-compress : install-bin install-info-compress +install-strip-compress : install-bin-strip install-info-compress install-bin : all if [ ! -d "$(DESTDIR)$(includedir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(includedir)" ; fi @@ -99,17 +102,25 @@ install-bin : all [ -x "$(LDCONFIG)" ] ; then "$(LDCONFIG)" -n "$(DESTDIR)$(libdir)" || true ; fi ; \ fi +install-bin-strip : all + $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install-bin + install-info : if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi + -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info" -install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" +install-info-compress : install-info + lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info" + install-man : if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi + -rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"* $(INSTALL_DATA) $(VPATH)/doc/$(progname).1 "$(DESTDIR)$(mandir)/man1/$(progname).1" -install-strip : all - $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install +install-man-compress : install-man + lzip -v -9 "$(DESTDIR)$(mandir)/man1/$(progname).1" install-as-lzip : install install-man if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi @@ -117,7 +128,7 @@ install-as-lzip : install install-man -rm -f "$(DESTDIR)$(bindir)/lzip" cd "$(DESTDIR)$(bindir)" && ln -s $(progname) lzip -uninstall : uninstall-bin uninstall-info uninstall-man +uninstall : uninstall-man uninstall-info uninstall-bin uninstall-bin : -rm -f "$(DESTDIR)$(bindir)/$(progname)" @@ -129,10 +140,10 @@ uninstall-bin : uninstall-info : -install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" - -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info" + -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* uninstall-man : - -rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1" + -rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"* dist : doc ln -sf $(VPATH) $(DISTNAME) @@ -158,7 +169,7 @@ dist : doc lzip -v -9 $(DISTNAME).tar clean : - -rm -f $(progname) $(progname)_profiled $(objs) + -rm -f $(progname) $(objs) -rm -f $(progname)_shared lzlib_sh.o *.so.$(pkgversion) -rm -f bbexample bbexample.o lzcheck lzcheck.o lzlib.o *.a @@ -1,41 +1,16 @@ -Changes in version 1.6: +Changes in version 1.7: -Compression ratio of option -9 has been slightly increased. +The fast encoder, which produces a compression speed and ratio +comparable to those of gzip, has been ported from lzip. -The configure script now accepts the option "--disable-static". +The option "-0" has been ported from lzip to minilzip. -Improved portability to BSD systems: +If all the data to be compressed are written in advance, lzlib will +automatically adjust the header of the compressed data to use the +smallest possible dictionary size. This feature reduces the amount of +memory needed for decompression and allows minilzip to produce identical +compressed output as lzip. - The configure script now accepts the option "--disable-ldconfig". - - "make install" now ignores any errors from ldconfig. - -Minilzip now copies file dates, permissions, and ownership like "cp -p". -(If the user ID or the group ID can't be duplicated, the file permission -bits S_ISUID and S_ISGID are cleared). - -"lzlib.texinfo" has been renamed to "lzlib.texi". - -The license has been changed to "GPL version 2 or later with link -exception". - - ----- - -Rationale for the license change. - -I have recently been shocked to know that it is apparently not legal to -use lzlib in GPLv2 programs. Linking those programs with lzlib violates -the GPLv2. But linking proprietary programs with lzlib is fine and legal! - -This problem is not caused by any fault in lzlib's link exception; the -LGPLv3 has the same problem. See for example -http://nmav.gnutls.org/2013/03/the-perils-of-lgplv3.html - -Of course it was never my intention to prevent GPLv2 programs from using -lzlib. I'm sorry for any inconveniences my oversight may have caused to -distributions, GPLv2 software projects, and users in general. - -I am also ashamed of having participated in this schism of the GPL -community. The mere proliferation of different free software licenses is -a burden in and of itself. From now on I'll license all my projects and -contributions in a way compatible with the GPLv2. +The targets "install-compress", "install-strip-compress", +"install-info-compress" and "install-man-compress" have been added to +the Makefile. @@ -5,8 +5,9 @@ and decompression functions, including integrity checking of the decompressed data. The compressed data format used by the library is the lzip format. Lzlib is written in C. -The lzip file format is designed for long-term data archiving, taking -into account both data integrity and decoder availability: +The lzip file format is designed for data sharing and long-term +archiving, taking into account both data integrity and decoder +availability: * The lzip format provides very safe integrity checking and some data recovery means. The lziprecover program can repair bit-flip errors @@ -21,8 +22,8 @@ into account both data integrity and decoder availability: extract the data from a lzip file long after quantum computers eventually render LZMA obsolete. - * Additionally lzip is copylefted, which guarantees that it will - remain free forever. + * Additionally the lzip reference implementation is copylefted, which + guarantees that it will remain free forever. A nice feature of the lzip format is that a corrupt byte is easier to repair the nearer it is from the beginning of the file. Therefore, with @@ -45,6 +46,12 @@ until some data is read, even if you write a lot of data. If you want the data to be compressed in advance, just call the read function with a size equal to 0. +If all the data to be compressed are written in advance, lzlib will +automatically adjust the header of the compressed data to use the +smallest possible dictionary size. This feature reduces the amount of +memory needed for decompression and allows minilzip to produce identical +compressed output as lzip. + Lzlib will correctly decompress a data stream which is the concatenation of two or more compressed data streams. The result is the concatenation of the corresponding decompressed data streams. Integrity testing of @@ -62,9 +69,9 @@ elaborated way of finding coding sequences of minimum price than the one currently used by lzip could be developed, and the resulting sequence could also be coded using the LZMA coding scheme. -Lzip currently implements two variants of the LZMA algorithm; fast (used -by option -0) and normal (used by all other compression levels). Lzlib -just implements the "normal" variant. +Lzlib currently implements two variants of the LZMA algorithm; fast +(used by option -0 of minilzip) and normal (used by all other +compression levels). The high compression of LZMA comes from combining two basic, well-proven compression ideas: sliding dictionaries (LZ77/78) and markov models (the @@ -79,7 +86,7 @@ range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI). -Copyright (C) 2009-2014 Antonio Diaz Diaz. +Copyright (C) 2009-2015 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/bbexample.c b/bbexample.c index 883e636..56c655b 100644 --- a/bbexample.c +++ b/bbexample.c @@ -1,5 +1,5 @@ /* Buff to buff example - Test program for the lzlib library - Copyright (C) 2010-2014 Antonio Diaz Diaz. + Copyright (C) 2010-2015 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute and modify it. @@ -164,7 +164,7 @@ int main( const int argc, const char * const argv[] ) file = fopen( argv[1], "rb" ); if( !file ) { - fprintf( stderr, "bbexample: Can't open file '%s' for reading\n", argv[1] ); + fprintf( stderr, "bbexample: Can't open file '%s' for reading.\n", argv[1] ); return 1; } diff --git a/carg_parser.c b/carg_parser.c index f9c0f08..a453e36 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2014 Antonio Diaz Diaz. + Copyright (C) 2006-2015 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/carg_parser.h b/carg_parser.h index b6ba67d..34b1263 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2014 Antonio Diaz Diaz. + Copyright (C) 2006-2015 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -77,7 +77,7 @@ static int Cb_read_data( struct Circular_buffer * const cb, uint8_t * const out_buffer, const int out_size ) { int size = 0; - if( out_size < 0 ) return 0; + if( out_size <= 0 ) return 0; if( cb->get > cb->put ) { size = min( cb->buffer_size - cb->get, out_size ); @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lzlib - Compression library for the lzip format -# Copyright (C) 2009-2014 Antonio Diaz Diaz. +# Copyright (C) 2009-2015 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=lzlib -pkgversion=1.6 +pkgversion=1.7-pre1 soversion=1 progname=minilzip progname_static=${progname} @@ -191,7 +191,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lzlib - Compression library for the lzip format -# Copyright (C) 2009-2014 Antonio Diaz Diaz. +# Copyright (C) 2009-2015 Antonio Diaz Diaz. # This file was generated automatically by configure. Do not edit. # # This Makefile is free software: you have unlimited permission @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -129,7 +129,7 @@ static int LZd_decode_member( struct LZ_decoder * const d ) { d->rep0 += Rd_decode( rdec, direct_bits - dis_align_bits ) << dis_align_bits; d->rep0 += Rd_decode_tree_reversed4( rdec, d->bm_align ); - if( d->rep0 == 0xFFFFFFFFU ) /* Marker found */ + if( d->rep0 == 0xFFFFFFFFU ) /* marker found */ { d->rep0 = rep0_saved; Rd_normalize( rdec ); @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -407,7 +407,6 @@ static inline bool LZd_init( struct LZ_decoder * const d, Bm_array_init( d->bm_dis_slot[0], len_states * (1 << dis_slot_bits) ); Bm_array_init( d->bm_dis, modeled_distances - end_dis_model ); Bm_array_init( d->bm_align, dis_align_size ); - Lm_init( &d->match_len_model ); Lm_init( &d->rep_len_model ); d->cb.buffer[d->cb.buffer_size-1] = 0; /* prev_byte of first byte */ diff --git a/doc/lzlib.info b/doc/lzlib.info index 6814d19..5a32927 100644 --- a/doc/lzlib.info +++ b/doc/lzlib.info @@ -11,7 +11,7 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir) Lzlib Manual ************ -This manual is for Lzlib (version 1.6, 27 August 2014). +This manual is for Lzlib (version 1.7-pre1, 24 February 2015). * Menu: @@ -29,7 +29,7 @@ This manual is for Lzlib (version 1.6, 27 August 2014). * Concept index:: Index of concepts - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -45,8 +45,9 @@ and decompression functions, including integrity checking of the decompressed data. The compressed data format used by the library is the lzip format. Lzlib is written in C. - The lzip file format is designed for long-term data archiving, taking -into account both data integrity and decoder availability: + The lzip file format is designed for data sharing and long-term +archiving, taking into account both data integrity and decoder +availability: * The lzip format provides very safe integrity checking and some data recovery means. The lziprecover program can repair bit-flip errors @@ -61,8 +62,8 @@ into account both data integrity and decoder availability: archaeologist to extract the data from a lzip file long after quantum computers eventually render LZMA obsolete. - * Additionally lzip is copylefted, which guarantees that it will - remain free forever. + * Additionally the lzip reference implementation is copylefted, which + guarantees that it will remain free forever. A nice feature of the lzip format is that a corrupt byte is easier to repair the nearer it is from the beginning of the file. Therefore, with @@ -75,15 +76,21 @@ library are given in the files 'main.c' and 'bbexample.c' from the source distribution. Compression/decompression is done by repeatedly calling a couple of -read/write functions until all the data has been processed by the +read/write functions until all the data have been processed by the library. This interface is safer and less error prone than the traditional zlib interface. Compression/decompression is done when the read function is called. This means the value returned by the position functions will not be -updated until some data is read, even if you write a lot of data. If -you want the data to be compressed in advance, just call the read -function with a SIZE equal to 0. +updated until a read call, even if a lot of data is written. If you +want the data to be compressed in advance, just call the read function +with a SIZE equal to 0. + + If all the data to be compressed are written in advance, lzlib will +automatically adjust the header of the compressed data to use the +smallest possible dictionary size. This feature reduces the amount of +memory needed for decompression and allows minilzip to produce identical +compressed output as lzip. Lzlib will correctly decompress a data stream which is the concatenation of two or more compressed data streams. The result is the @@ -103,9 +110,9 @@ elaborated way of finding coding sequences of minimum price than the one currently used by lzip could be developed, and the resulting sequence could also be coded using the LZMA coding scheme. - Lzip currently implements two variants of the LZMA algorithm; fast -(used by option -0) and normal (used by all other compression levels). -Lzlib just implements the "normal" variant. + Lzlib currently implements two variants of the LZMA algorithm; fast +(used by option -0 of minilzip) and normal (used by all other +compression levels). The high compression of LZMA comes from combining two basic, well-proven compression ideas: sliding dictionaries (LZ77/78) and @@ -147,15 +154,18 @@ File: lzlib.info, Node: Buffering, Next: Parameter limits, Prev: Library vers Lzlib internal functions need access to a memory chunk at least as large as the dictionary size (sliding window). For efficiency reasons, the -input buffer for compression is twice as large as the dictionary size. -Finally, for safety reasons, lzlib uses two more internal buffers. +input buffer for compression is twice or sixteen times as large as the +dictionary size. + + Finally, for safety reasons, lzlib uses two more internal buffers. These are the four buffers used by lzlib, and their guaranteed minimum sizes: * Input compression buffer. Written to by the 'LZ_compress_write' - function. Its size is two times the dictionary size set with the - 'LZ_compress_open' function or 64 KiB, whichever is larger. + function. For the normal variant of LZMA, its size is two times + the dictionary size set with the 'LZ_compress_open' function or 64 + KiB, whichever is larger. For the fast variant, its size is 1 MiB. * Output compression buffer. Read from by the 'LZ_compress_read' function. Its size is 64 KiB. @@ -230,6 +240,11 @@ calling 'LZ_compress_errno' before using it. range from 5 to 273. Larger values usually give better compression ratios but longer compression times. + If DICTIONARY_SIZE is 65535 and MATCH_LEN_LIMIT is 16, the fast + variant of LZMA is chosen, which produces identical compressed + output as 'lzip -0'. (The DICTIONARY_SIZE used will be rounded + upwards to 64 KiB). + MEMBER_SIZE sets the member size limit in bytes. Minimum member size limit is 100 kB. Small member size may degrade compression ratio, so use it only when needed. To produce a single-member data @@ -246,8 +261,8 @@ calling 'LZ_compress_errno' before using it. -- Function: int LZ_compress_finish ( struct LZ_Encoder * const ENCODER ) Use this function to tell 'lzlib' that all the data for this member - has already been written (with the 'LZ_compress_write' function). - After all the produced compressed data has been read with + have already been written (with the 'LZ_compress_write' function). + After all the produced compressed data have been read with 'LZ_compress_read' and 'LZ_compress_member_finished' returns 1, a new member can be started with 'LZ_compress_restart_member'. @@ -262,9 +277,8 @@ calling 'LZ_compress_errno' before using it. ENCODER ) Use this function to make available to 'LZ_compress_read' all the data already written with the 'LZ_compress_write' function. First - call 'LZ_compress_read' until it returns 0. Then call - 'LZ_compress_sync_flush'. Finally, call 'LZ_compress_read' again - to read the remaining data. + call 'LZ_compress_sync_flush'. Then call 'LZ_compress_read' until + it returns 0. Repeated use of 'LZ_compress_sync_flush' may degrade compression ratio, so use it only when needed. @@ -304,8 +318,8 @@ calling 'LZ_compress_errno' before using it. -- Function: int LZ_compress_finished ( struct LZ_Encoder * const ENCODER ) - Returns 1 if all the data has been read and 'LZ_compress_close' can - be safely called. Otherwise it returns 0. + Returns 1 if all the data have been read and 'LZ_compress_close' + can be safely called. Otherwise it returns 0. -- Function: int LZ_compress_member_finished ( struct LZ_Encoder * const ENCODER ) @@ -364,7 +378,8 @@ verified by calling 'LZ_decompress_errno' before using it. -- Function: int LZ_decompress_finish ( struct LZ_Decoder * const DECODER ) Use this function to tell 'lzlib' that all the data for this stream - has already been written (with the 'LZ_decompress_write' function). + have already been written (with the 'LZ_decompress_write' + function). -- Function: int LZ_decompress_reset ( struct LZ_Decoder * const DECODER ) @@ -420,7 +435,7 @@ verified by calling 'LZ_decompress_errno' before using it. -- Function: int LZ_decompress_finished ( struct LZ_Decoder * const DECODER ) - Returns 1 if all the data has been read and 'LZ_decompress_close' + Returns 1 if all the data have been read and 'LZ_decompress_close' can be safely called. Otherwise it returns 0. -- Function: int LZ_decompress_member_finished ( struct LZ_Decoder * @@ -589,8 +604,8 @@ with no additional information before, between, or after them. 'Lzma stream' The lzma stream, finished by an end of stream marker. Uses default - values for encoder properties. See the lzip manual for a full - description. + values for encoder properties. *Note Stream format: (lzip)Stream + format, for a complete description. Lzip only uses the LZMA marker '2' ("End Of Stream" marker). Lzlib also uses the LZMA marker '3' ("Sync Flush" marker). @@ -630,7 +645,7 @@ Example 1: Normal compression (MEMBER_SIZE > total output). 1) LZ_compress_open 2) LZ_compress_write 3) LZ_compress_read - 4) go back to step 2 until all input data has been written + 4) go back to step 2 until all input data have been written 5) LZ_compress_finish 6) LZ_compress_read 7) go back to step 6 until LZ_compress_finished returns 1 @@ -653,7 +668,7 @@ Example 3: Decompression. 1) LZ_decompress_open 2) LZ_decompress_write 3) LZ_decompress_read - 4) go back to step 2 until all input data has been written + 4) go back to step 2 until all input data have been written 5) LZ_decompress_finish 6) LZ_decompress_read 7) go back to step 6 until LZ_decompress_finished returns 1 @@ -697,7 +712,7 @@ Example 6: Multi-member compression (user-restarted members). 6) LZ_compress_read 7) go back to step 6 until LZ_compress_member_finished returns 1 8) verify that LZ_compress_finished returns 1 - 9) go to step 12 if all input data has been written + 9) go to step 12 if all input data have been written 10) LZ_compress_restart_member 11) go back to step 2 12) LZ_compress_close @@ -770,18 +785,18 @@ Concept index Tag Table: Node: Top220 -Node: Introduction1304 -Node: Library version5487 -Node: Buffering6132 -Node: Parameter limits7253 -Node: Compression functions8212 -Node: Decompression functions14594 -Node: Error codes20755 -Node: Error messages22694 -Node: Data format23273 -Node: Examples25922 -Node: Problems30005 -Node: Concept index30577 +Node: Introduction1311 +Node: Library version5808 +Node: Buffering6453 +Node: Parameter limits7673 +Node: Compression functions8632 +Node: Decompression functions15176 +Node: Error codes21344 +Node: Error messages23283 +Node: Data format23862 +Node: Examples26538 +Node: Problems30624 +Node: Concept index31196 End Tag Table diff --git a/doc/lzlib.texi b/doc/lzlib.texi index 1a9d9b6..417cc7b 100644 --- a/doc/lzlib.texi +++ b/doc/lzlib.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 27 August 2014 -@set VERSION 1.6 +@set UPDATED 24 February 2015 +@set VERSION 1.7-pre1 @dircategory Data Compression @direntry @@ -50,7 +50,7 @@ This manual is for Lzlib (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2009-2014 Antonio Diaz Diaz. +Copyright @copyright{} 2009-2015 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -65,8 +65,9 @@ and decompression functions, including integrity checking of the decompressed data. The compressed data format used by the library is the lzip format. Lzlib is written in C. -The lzip file format is designed for long-term data archiving, taking -into account both data integrity and decoder availability: +The lzip file format is designed for data sharing and long-term +archiving, taking into account both data integrity and decoder +availability: @itemize @bullet @item @@ -85,8 +86,8 @@ data from a lzip file long after quantum computers eventually render LZMA obsolete. @item -Additionally lzip is copylefted, which guarantees that it will remain -free forever. +Additionally the lzip reference implementation is copylefted, which +guarantees that it will remain free forever. @end itemize A nice feature of the lzip format is that a corrupt byte is easier to @@ -100,16 +101,22 @@ library are given in the files @samp{main.c} and @samp{bbexample.c} from the source distribution. Compression/decompression is done by repeatedly calling a couple of -read/write functions until all the data has been processed by the +read/write functions until all the data have been processed by the library. This interface is safer and less error prone than the traditional zlib interface. Compression/decompression is done when the read function is called. This means the value returned by the position functions will not be updated -until some data is read, even if you write a lot of data. If you want -the data to be compressed in advance, just call the read function with a +until a read call, even if a lot of data is written. If you want the +data to be compressed in advance, just call the read function with a @var{size} equal to 0. +If all the data to be compressed are written in advance, lzlib will +automatically adjust the header of the compressed data to use the +smallest possible dictionary size. This feature reduces the amount of +memory needed for decompression and allows minilzip to produce identical +compressed output as lzip. + Lzlib will correctly decompress a data stream which is the concatenation of two or more compressed data streams. The result is the concatenation of the corresponding decompressed data streams. Integrity testing of @@ -127,9 +134,9 @@ elaborated way of finding coding sequences of minimum price than the one currently used by lzip could be developed, and the resulting sequence could also be coded using the LZMA coding scheme. -Lzip currently implements two variants of the LZMA algorithm; fast (used -by option -0) and normal (used by all other compression levels). Lzlib -just implements the "normal" variant. +Lzlib currently implements two variants of the LZMA algorithm; fast +(used by option -0 of minilzip) and normal (used by all other +compression levels). The high compression of LZMA comes from combining two basic, well-proven compression ideas: sliding dictionaries (LZ77/78) and markov models (the @@ -173,7 +180,9 @@ if( LZ_version()[0] != LZ_version_string[0] ) Lzlib internal functions need access to a memory chunk at least as large as the dictionary size (sliding window). For efficiency reasons, the -input buffer for compression is twice as large as the dictionary size. +input buffer for compression is twice or sixteen times as large as the +dictionary size. + Finally, for safety reasons, lzlib uses two more internal buffers. These are the four buffers used by lzlib, and their guaranteed minimum @@ -181,9 +190,10 @@ sizes: @itemize @bullet @item Input compression buffer. Written to by the -@samp{LZ_compress_write} function. Its size is two times the dictionary -size set with the @samp{LZ_compress_open} function or 64 KiB, whichever -is larger. +@samp{LZ_compress_write} function. For the normal variant of LZMA, its +size is two times the dictionary size set with the +@samp{LZ_compress_open} function or 64 KiB, whichever is larger. For the +fast variant, its size is 1 MiB. @item Output compression buffer. Read from by the @samp{LZ_compress_read} function. Its size is 64 KiB. @@ -261,6 +271,11 @@ to it. range from 5 to 273. Larger values usually give better compression ratios but longer compression times. +If @var{dictionary_size} is 65535 and @var{match_len_limit} is 16, the +fast variant of LZMA is chosen, which produces identical compressed +output as @code{lzip -0}. (The @var{dictionary_size} used will be +rounded upwards to 64 KiB). + @var{member_size} sets the member size limit in bytes. Minimum member size limit is 100 kB. Small member size may degrade compression ratio, so use it only when needed. To produce a single-member data stream, give @@ -279,8 +294,8 @@ more be used as an argument to any LZ_compress function. @deftypefun int LZ_compress_finish ( struct LZ_Encoder * const @var{encoder} ) Use this function to tell @samp{lzlib} that all the data for this member -has already been written (with the @samp{LZ_compress_write} function). -After all the produced compressed data has been read with +have already been written (with the @samp{LZ_compress_write} function). +After all the produced compressed data have been read with @samp{LZ_compress_read} and @samp{LZ_compress_member_finished} returns 1, a new member can be started with @samp{LZ_compress_restart_member}. @end deftypefun @@ -297,9 +312,8 @@ indicates that the current member has been fully read (with the @deftypefun int LZ_compress_sync_flush ( struct LZ_Encoder * const @var{encoder} ) Use this function to make available to @samp{LZ_compress_read} all the data already written with the @samp{LZ_compress_write} function. First -call @samp{LZ_compress_read} until it returns 0. Then call -@samp{LZ_compress_sync_flush}. Finally, call @samp{LZ_compress_read} -again to read the remaining data. +call @samp{LZ_compress_sync_flush}. Then call @samp{LZ_compress_read} +until it returns 0. Repeated use of @samp{LZ_compress_sync_flush} may degrade compression ratio, so use it only when needed. @@ -345,8 +359,8 @@ Returns the current error code for @var{encoder} (@pxref{Error codes}). @deftypefun int LZ_compress_finished ( struct LZ_Encoder * const @var{encoder} ) -Returns 1 if all the data has been read and @samp{LZ_compress_close} can -be safely called. Otherwise it returns 0. +Returns 1 if all the data have been read and @samp{LZ_compress_close} +can be safely called. Otherwise it returns 0. @end deftypefun @@ -414,7 +428,7 @@ more be used as an argument to any LZ_decompress function. @deftypefun int LZ_decompress_finish ( struct LZ_Decoder * const @var{decoder} ) Use this function to tell @samp{lzlib} that all the data for this stream -has already been written (with the @samp{LZ_decompress_write} function). +have already been written (with the @samp{LZ_decompress_write} function). @end deftypefun @@ -478,7 +492,7 @@ Returns the current error code for @var{decoder} (@pxref{Error codes}). @deftypefun int LZ_decompress_finished ( struct LZ_Decoder * const @var{decoder} ) -Returns 1 if all the data has been read and @samp{LZ_decompress_close} +Returns 1 if all the data have been read and @samp{LZ_decompress_close} can be safely called. Otherwise it returns 0. @end deftypefun @@ -665,8 +679,15 @@ Valid values for dictionary size range from 4 KiB to 512 MiB. @item Lzma stream The lzma stream, finished by an end of stream marker. Uses default -values for encoder properties. See the lzip manual for a full -description.@* +values for encoder properties. +@ifnothtml +@xref{Stream format,,,lzip}, +@end ifnothtml +@ifhtml +See +@uref{http://www.nongnu.org/lzip/manual/lzip_manual.html#Stream-format,,Stream format} +@end ifhtml +for a complete description.@* Lzip only uses the LZMA marker @samp{2} ("End Of Stream" marker). Lzlib also uses the LZMA marker @samp{3} ("Sync Flush" marker). @@ -706,7 +727,7 @@ Example 1: Normal compression (@var{member_size} > total output). 1) LZ_compress_open 2) LZ_compress_write 3) LZ_compress_read -4) go back to step 2 until all input data has been written +4) go back to step 2 until all input data have been written 5) LZ_compress_finish 6) LZ_compress_read 7) go back to step 6 until LZ_compress_finished returns 1 @@ -735,7 +756,7 @@ Example 3: Decompression. 1) LZ_decompress_open 2) LZ_decompress_write 3) LZ_decompress_read -4) go back to step 2 until all input data has been written +4) go back to step 2 until all input data have been written 5) LZ_decompress_finish 6) LZ_decompress_read 7) go back to step 6 until LZ_decompress_finished returns 1 @@ -788,7 +809,7 @@ Example 6: Multi-member compression (user-restarted members). 6) LZ_compress_read 7) go back to step 6 until LZ_compress_member_finished returns 1 8) verify that LZ_compress_finished returns 1 - 9) go to step 12 if all input data has been written + 9) go to step 12 if all input data have been written 10) LZ_compress_restart_member 11) go back to step 2 12) LZ_compress_close @@ -838,7 +859,7 @@ for all eternity, if not longer. If you find a bug in Lzlib, please send electronic mail to @email{lzip-bug@@nongnu.org}. Include the version number, which you can -find by running @w{@samp{minilzip --version}} or in +find by running @w{@code{minilzip --version}} or in @samp{LZ_version_string} from @samp{lzlib.h}. diff --git a/doc/minilzip.1 b/doc/minilzip.1 index 93ee47f..ba63b8c 100644 --- a/doc/minilzip.1 +++ b/doc/minilzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH MINILZIP "1" "August 2014" "minilzip 1.6" "User Commands" +.TH MINILZIP "1" "February 2015" "minilzip 1.7-pre1" "User Commands" .SH NAME minilzip \- reduces the size of files .SH SYNOPSIS @@ -54,11 +54,11 @@ test compressed file integrity \fB\-v\fR, \fB\-\-verbose\fR be verbose (a 2nd \fB\-v\fR gives more) .TP -\fB\-1\fR .. \fB\-9\fR +\fB\-0\fR .. \fB\-9\fR set compression level [default 6] .TP \fB\-\-fast\fR -alias for \fB\-1\fR +alias for \fB\-0\fR .TP \fB\-\-best\fR alias for \fB\-9\fR @@ -70,8 +70,7 @@ Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... The bidimensional parameter space of LZMA can't be mapped to a linear scale optimal for all files. If your files are large, very repetitive, etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR -options directly to achieve optimal performance. For example, \fB\-9m64\fR -usually compresses executables more (and faster) than \fB\-9\fR. +options directly to achieve optimal performance. .PP Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or @@ -82,8 +81,8 @@ Report bugs to lzip\-bug@nongnu.org .br Lzlib home page: http://www.nongnu.org/lzip/lzlib.html .SH COPYRIGHT -Copyright \(co 2014 Antonio Diaz Diaz. -Using lzlib 1.6 +Copyright \(co 2015 Antonio Diaz Diaz. +Using lzlib 1.7\-pre1 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,125 +25,26 @@ Public License. */ -static bool Mf_normalize_pos( struct Matchfinder * const mf ) +static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs ) { - if( mf->pos > mf->stream_pos ) - { mf->pos = mf->stream_pos; return false; } - if( !mf->at_stream_end ) - { - int i; - const int offset = mf->pos - mf->dictionary_size - before_size; - const int size = mf->stream_pos - offset; - memmove( mf->buffer, mf->buffer + offset, size ); - mf->partial_data_pos += offset; - mf->pos -= offset; - mf->stream_pos -= offset; - for( i = 0; i < mf->num_prev_positions; ++i ) - mf->prev_positions[i] -= min( mf->prev_positions[i], offset ); - for( i = 0; i < 2 * ( mf->dictionary_size + 1 ); ++i ) - mf->prev_pos_tree[i] -= min( mf->prev_pos_tree[i], offset ); - } - return true; - } - - -static bool Mf_init( struct Matchfinder * const mf, const int dict_size, - const int match_len_limit ) - { - const int buffer_size_limit = ( 2 * dict_size ) + before_size + after_size; - unsigned size; - int i; - - mf->partial_data_pos = 0; - mf->match_len_limit = match_len_limit; - mf->pos = 0; - mf->cyclic_pos = 0; - mf->stream_pos = 0; - mf->cycles = ( match_len_limit < max_match_len ) ? - 16 + ( match_len_limit / 2 ) : 256; - mf->at_stream_end = false; - mf->been_flushed = false; - mf->flushing = false; - - mf->buffer_size = max( 65536, buffer_size_limit ); - mf->buffer = (uint8_t *)malloc( mf->buffer_size ); - if( !mf->buffer ) return false; - mf->dictionary_size = dict_size; - mf->pos_limit = mf->buffer_size - after_size; - size = 1 << max( 16, real_bits( mf->dictionary_size - 1 ) - 2 ); - if( mf->dictionary_size > 1 << 26 ) /* 64 MiB */ - size >>= 1; - mf->key4_mask = size - 1; - size += num_prev_positions2; - size += num_prev_positions3; - - mf->num_prev_positions = size; - size += ( 2 * ( mf->dictionary_size + 1 ) ); - if( size * sizeof (int32_t) <= size ) mf->prev_positions = 0; - else mf->prev_positions = (int32_t *)malloc( size * sizeof (int32_t) ); - if( !mf->prev_positions ) { free( mf->buffer ); return false; } - mf->prev_pos_tree = mf->prev_positions + mf->num_prev_positions; - for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = 0; - return true; - } - - -static void Mf_adjust_distionary_size( struct Matchfinder * const mf ) - { - if( mf->stream_pos < mf->dictionary_size ) - { - int size; - mf->buffer_size = - mf->dictionary_size = - mf->pos_limit = max( min_dictionary_size, mf->stream_pos ); - size = 1 << max( 16, real_bits( mf->dictionary_size - 1 ) - 2 ); - if( mf->dictionary_size > 1 << 26 ) - size >>= 1; - mf->key4_mask = size - 1; - size += num_prev_positions2; - size += num_prev_positions3; - mf->num_prev_positions = size; - mf->prev_pos_tree = mf->prev_positions + mf->num_prev_positions; - } - } - - -static void Mf_reset( struct Matchfinder * const mf ) - { - int i; - if( mf->stream_pos > mf->pos ) - memmove( mf->buffer, mf->buffer + mf->pos, mf->stream_pos - mf->pos ); - mf->partial_data_pos = 0; - mf->stream_pos -= mf->pos; - mf->pos = 0; - mf->cyclic_pos = 0; - mf->at_stream_end = false; - mf->been_flushed = false; - mf->flushing = false; - for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = 0; - } - - -static int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs ) - { - int32_t * ptr0 = mf->prev_pos_tree + ( mf->cyclic_pos << 1 ); + int32_t * ptr0 = e->eb.mb.pos_array + ( e->eb.mb.cyclic_pos << 1 ); int32_t * ptr1 = ptr0 + 1; int32_t * newptr; int len = 0, len0 = 0, len1 = 0; int maxlen = 0; int num_pairs = 0; - const int pos1 = mf->pos + 1; - const int min_pos = - ( mf->pos > mf->dictionary_size ) ? mf->pos - mf->dictionary_size : 0; - const uint8_t * const data = mf->buffer + mf->pos; + const int pos1 = e->eb.mb.pos + 1; + const int min_pos = ( e->eb.mb.pos > e->eb.mb.dictionary_size ) ? + e->eb.mb.pos - e->eb.mb.dictionary_size : 0; + const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb ); int count, delta, key2, key3, key4, newpos; unsigned tmp; - int len_limit = mf->match_len_limit; + int len_limit = e->match_len_limit; - if( len_limit > Mf_available_bytes( mf ) ) + if( len_limit > Mb_available_bytes( &e->eb.mb ) ) { - mf->been_flushed = true; - len_limit = Mf_available_bytes( mf ); + e->been_flushed = true; + len_limit = Mb_available_bytes( &e->eb.mb ); if( len_limit < 4 ) { *ptr0 = *ptr1 = 0; return 0; } } @@ -152,23 +53,23 @@ static int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pair tmp ^= (unsigned)data[2] << 8; key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) ); key4 = num_prev_positions2 + num_prev_positions3 + - ( ( tmp ^ ( crc32[data[3]] << 5 ) ) & mf->key4_mask ); + ( ( tmp ^ ( crc32[data[3]] << 5 ) ) & e->eb.mb.key4_mask ); if( pairs ) { - int np2 = mf->prev_positions[key2]; - int np3 = mf->prev_positions[key3]; - if( np2 > min_pos && mf->buffer[np2-1] == data[0] ) + int np2 = e->eb.mb.prev_positions[key2]; + int np3 = e->eb.mb.prev_positions[key3]; + if( np2 > min_pos && e->eb.mb.buffer[np2-1] == data[0] ) { - pairs[0].dis = mf->pos - np2; + pairs[0].dis = e->eb.mb.pos - np2; pairs[0].len = maxlen = 2; num_pairs = 1; } - if( np2 != np3 && np3 > min_pos && mf->buffer[np3-1] == data[0] ) + if( np2 != np3 && np3 > min_pos && e->eb.mb.buffer[np3-1] == data[0] ) { maxlen = 3; np2 = np3; - pairs[num_pairs].dis = mf->pos - np2; + pairs[num_pairs].dis = e->eb.mb.pos - np2; ++num_pairs; } if( num_pairs > 0 ) @@ -182,20 +83,20 @@ static int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pair if( maxlen < 3 ) maxlen = 3; } - mf->prev_positions[key2] = pos1; - mf->prev_positions[key3] = pos1; - newpos = mf->prev_positions[key4]; - mf->prev_positions[key4] = pos1; + e->eb.mb.prev_positions[key2] = pos1; + e->eb.mb.prev_positions[key3] = pos1; + newpos = e->eb.mb.prev_positions[key4]; + e->eb.mb.prev_positions[key4] = pos1; - for( count = mf->cycles; ; ) + for( count = e->cycles; ; ) { if( newpos <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; } - if( mf->been_flushed ) len = 0; + if( e->been_flushed ) len = 0; delta = pos1 - newpos; - newptr = mf->prev_pos_tree + - ( ( mf->cyclic_pos - delta + - ( (mf->cyclic_pos >= delta) ? 0 : mf->dictionary_size + 1 ) ) << 1 ); + newptr = e->eb.mb.pos_array + + ( ( e->eb.mb.cyclic_pos - delta + + ( (e->eb.mb.cyclic_pos >= delta) ? 0 : e->eb.mb.dictionary_size + 1 ) ) << 1 ); if( data[len-delta] == data[len] ) { while( ++len < len_limit && data[len-delta] == data[len] ) {} @@ -231,43 +132,6 @@ static int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pair } - /* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */ -static bool LZe_full_flush( struct LZ_encoder * const e, const State state ) - { - int i; - const int pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask; - File_trailer trailer; - if( e->member_finished || - Cb_free_bytes( &e->renc.cb ) < max_marker_size + Ft_size ) - return false; - Re_encode_bit( &e->renc, &e->bm_match[state][pos_state], 1 ); - Re_encode_bit( &e->renc, &e->bm_rep[state], 0 ); - LZe_encode_pair( e, 0xFFFFFFFFU, min_match_len, pos_state ); - Re_flush( &e->renc ); - Ft_set_data_crc( trailer, LZe_crc( e ) ); - Ft_set_data_size( trailer, Mf_data_position( e->matchfinder ) ); - Ft_set_member_size( trailer, Re_member_position( &e->renc ) + Ft_size ); - for( i = 0; i < Ft_size; ++i ) - Cb_put_byte( &e->renc.cb, trailer[i] ); - return true; - } - - - /* Sync Flush mark => (dis == 0xFFFFFFFFU, len == min_match_len + 1) */ -static bool LZe_sync_flush( struct LZ_encoder * const e ) - { - const int pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask; - const State state = e->state; - if( e->member_finished || Cb_free_bytes( &e->renc.cb ) < max_marker_size ) - return false; - Re_encode_bit( &e->renc, &e->bm_match[state][pos_state], 1 ); - Re_encode_bit( &e->renc, &e->bm_rep[state], 0 ); - LZe_encode_pair( e, 0xFFFFFFFFU, min_match_len + 1, pos_state ); - Re_flush( &e->renc ); - return true; - } - - static void LZe_update_distance_prices( struct LZ_encoder * const e ) { int dis, len_state; @@ -276,7 +140,7 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e ) const int dis_slot = dis_slots[dis]; const int direct_bits = ( dis_slot >> 1 ) - 1; const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits; - const int price = price_symbol_reversed( e->bm_dis + base - dis_slot - 1, + const int price = price_symbol_reversed( e->eb.bm_dis + base - dis_slot - 1, dis - base, direct_bits ); for( len_state = 0; len_state < len_states; ++len_state ) e->dis_prices[len_state][dis] = price; @@ -286,7 +150,7 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e ) { int * const dsp = e->dis_slot_prices[len_state]; int * const dp = e->dis_prices[len_state]; - const Bit_model * const bmds = e->bm_dis_slot[len_state]; + const Bit_model * const bmds = e->eb.bm_dis_slot[len_state]; int slot = 0; for( ; slot < end_dis_model; ++slot ) dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ); @@ -302,48 +166,7 @@ static void LZe_update_distance_prices( struct LZ_encoder * const e ) } -static bool LZe_init( struct LZ_encoder * const e, - struct Matchfinder * const mf, - const File_header header, - const unsigned long long member_size ) - { - int i; - e->member_size_limit = member_size - Ft_size - max_marker_size; - e->pending_num_pairs = 0; - e->crc = 0xFFFFFFFFU; - - Bm_array_init( e->bm_literal[0], (1 << literal_context_bits) * 0x300 ); - Bm_array_init( e->bm_match[0], states * pos_states ); - Bm_array_init( e->bm_rep, states ); - Bm_array_init( e->bm_rep0, states ); - Bm_array_init( e->bm_rep1, states ); - Bm_array_init( e->bm_rep2, states ); - Bm_array_init( e->bm_len[0], states * pos_states ); - Bm_array_init( e->bm_dis_slot[0], len_states * (1 << dis_slot_bits) ); - Bm_array_init( e->bm_dis, modeled_distances - end_dis_model ); - Bm_array_init( e->bm_align, dis_align_size ); - - e->matchfinder = mf; - if( !Re_init( &e->renc ) ) return false; - Lm_init( &e->match_len_model ); - Lm_init( &e->rep_len_model ); - Lp_init( &e->match_len_prices, &e->match_len_model, mf->match_len_limit ); - Lp_init( &e->rep_len_prices, &e->rep_len_model, mf->match_len_limit ); - for( i = 0; i < num_rep_distances; ++i ) e->reps[i] = 0; - e->num_dis_slots = 2 * real_bits( mf->dictionary_size - 1 ); - e->price_counter = 0; - e->dis_price_counter = 0; - e->align_price_counter = 0; - e->state = 0; - e->member_finished = false; - - for( i = 0; i < Fh_size; ++i ) - Cb_put_byte( &e->renc.cb, header[i] ); - return true; - } - - -/* Return value == number of bytes advanced (ahead). +/* Returns the number of bytes advanced (ahead). trials[0]..trials[ahead-1] contain the steps to encode. ( trials[0].dis == -1 ) means literal. A match/rep longer or equal than match_len_limit finishes the sequence. @@ -367,45 +190,44 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, for( i = 0; i < num_rep_distances; ++i ) { - replens[i] = - Mf_true_match_len( e->matchfinder, 0, reps[i] + 1, max_match_len ); + replens[i] = Mb_true_match_len( &e->eb.mb, 0, reps[i] + 1, max_match_len ); if( replens[i] > replens[rep_index] ) rep_index = i; } - if( replens[rep_index] >= e->matchfinder->match_len_limit ) + if( replens[rep_index] >= e->match_len_limit ) { e->trials[0].dis = rep_index; e->trials[0].price = replens[rep_index]; - if( !LZe_move_pos( e, replens[rep_index] ) ) return 0; + if( !LZe_move_and_update( e, replens[rep_index] ) ) return 0; return replens[rep_index]; } - if( main_len >= e->matchfinder->match_len_limit ) + if( main_len >= e->match_len_limit ) { e->trials[0].dis = e->pairs[num_pairs-1].dis + num_rep_distances; e->trials[0].price = main_len; - if( !LZe_move_pos( e, main_len ) ) return 0; + if( !LZe_move_and_update( e, main_len ) ) return 0; return main_len; } { - const int pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask; - const int match_price = price1( e->bm_match[state][pos_state] ); - const int rep_match_price = match_price + price1( e->bm_rep[state] ); - const uint8_t prev_byte = Mf_peek( e->matchfinder, 1 ); - const uint8_t cur_byte = Mf_peek( e->matchfinder, 0 ); - const uint8_t match_byte = Mf_peek( e->matchfinder, reps[0] + 1 ); + const int pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask; + const int match_price = price1( e->eb.bm_match[state][pos_state] ); + const int rep_match_price = match_price + price1( e->eb.bm_rep[state] ); + const uint8_t prev_byte = Mb_peek( &e->eb.mb, 1 ); + const uint8_t cur_byte = Mb_peek( &e->eb.mb, 0 ); + const uint8_t match_byte = Mb_peek( &e->eb.mb, reps[0] + 1 ); e->trials[0].state = state; e->trials[1].dis = -1; /* literal */ - e->trials[1].price = price0( e->bm_match[state][pos_state] ); + e->trials[1].price = price0( e->eb.bm_match[state][pos_state] ); if( St_is_char( state ) ) - e->trials[1].price += LZe_price_literal( e, prev_byte, cur_byte ); + e->trials[1].price += LZeb_price_literal( &e->eb, prev_byte, cur_byte ); else - e->trials[1].price += LZe_price_matched( e, prev_byte, cur_byte, match_byte ); + e->trials[1].price += LZeb_price_matched( &e->eb, prev_byte, cur_byte, match_byte ); if( match_byte == cur_byte ) Tr_update( &e->trials[1], rep_match_price + - LZe_price_shortrep( e, state, pos_state ), 0, 0 ); + LZeb_price_shortrep( &e->eb, state, pos_state ), 0, 0 ); num_trials = max( main_len, replens[rep_index] ); @@ -413,7 +235,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, { e->trials[0].dis = e->trials[1].dis; e->trials[0].price = 1; - if( !Mf_move_pos( e->matchfinder ) ) return 0; + if( !Mb_move_pos( &e->eb.mb ) ) return 0; return 1; } @@ -429,8 +251,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, { int price; if( replens[rep] < min_match_len ) continue; - price = rep_match_price + LZe_price_rep( e, rep, state, pos_state ); - + price = rep_match_price + LZeb_price_rep( &e->eb, rep, state, pos_state ); for( len = min_match_len; len <= replens[rep]; ++len ) Tr_update( &e->trials[len], price + Lp_price( &e->rep_len_prices, len, pos_state ), rep, 0 ); @@ -438,7 +259,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, if( main_len > replens[0] ) { - const int normal_match_price = match_price + price0( e->bm_rep[state] ); + const int normal_match_price = match_price + price0( e->eb.bm_rep[state] ); i = 0, len = max( replens[0] + 1, min_match_len ); while( len > e->pairs[i].len ) ++i; while( true ) @@ -455,13 +276,13 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, while( true ) /* price optimization loop */ { struct Trial *cur_trial, *next_trial; - int newlen, pos_state, available_bytes, len_limit; + int newlen, pos_state, triable_bytes, len_limit; int start_len = min_match_len; int next_price, match_price, rep_match_price; State cur_state; uint8_t prev_byte, cur_byte, match_byte; - if( !Mf_move_pos( e->matchfinder ) ) return 0; + if( !Mb_move_pos( &e->eb.mb ) ) return 0; if( ++cur >= num_trials ) /* no more initialized trials */ { LZe_backward( e, cur ); @@ -470,7 +291,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, num_pairs = LZe_read_match_distances( e ); newlen = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0; - if( newlen >= e->matchfinder->match_len_limit ) + if( newlen >= e->match_len_limit ) { e->pending_num_pairs = num_pairs; LZe_backward( e, cur ); @@ -517,31 +338,31 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, mtf_reps( dis, cur_trial->reps ); } - pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask; - prev_byte = Mf_peek( e->matchfinder, 1 ); - cur_byte = Mf_peek( e->matchfinder, 0 ); - match_byte = Mf_peek( e->matchfinder, cur_trial->reps[0] + 1 ); + pos_state = Mb_data_position( &e->eb.mb ) & pos_state_mask; + prev_byte = Mb_peek( &e->eb.mb, 1 ); + cur_byte = Mb_peek( &e->eb.mb, 0 ); + match_byte = Mb_peek( &e->eb.mb, cur_trial->reps[0] + 1 ); next_price = cur_trial->price + - price0( e->bm_match[cur_state][pos_state] ); + price0( e->eb.bm_match[cur_state][pos_state] ); if( St_is_char( cur_state ) ) - next_price += LZe_price_literal( e, prev_byte, cur_byte ); + next_price += LZeb_price_literal( &e->eb, prev_byte, cur_byte ); else - next_price += LZe_price_matched( e, prev_byte, cur_byte, match_byte ); + next_price += LZeb_price_matched( &e->eb, prev_byte, cur_byte, match_byte ); /* try last updates to next trial */ next_trial = &e->trials[cur+1]; Tr_update( next_trial, next_price, -1, cur ); /* literal */ - match_price = cur_trial->price + price1( e->bm_match[cur_state][pos_state] ); - rep_match_price = match_price + price1( e->bm_rep[cur_state] ); + match_price = cur_trial->price + price1( e->eb.bm_match[cur_state][pos_state] ); + rep_match_price = match_price + price1( e->eb.bm_rep[cur_state] ); if( match_byte == cur_byte && next_trial->dis != 0 && next_trial->prev_index2 == single_step_trial ) { const int price = rep_match_price + - LZe_price_shortrep( e, cur_state, pos_state ); + LZeb_price_shortrep( &e->eb, cur_state, pos_state ); if( price <= next_trial->price ) { next_trial->price = price; @@ -550,19 +371,18 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, } } - available_bytes = min( Mf_available_bytes( e->matchfinder ), - max_num_trials - 1 - cur ); - if( available_bytes < min_match_len ) continue; + triable_bytes = + min( Mb_available_bytes( &e->eb.mb ), max_num_trials - 1 - cur ); + if( triable_bytes < min_match_len ) continue; - len_limit = min( e->matchfinder->match_len_limit, available_bytes ); + len_limit = min( e->match_len_limit, triable_bytes ); /* try literal + rep0 */ if( match_byte != cur_byte && next_trial->prev_index != cur ) { - const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ); + const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb ); const int dis = cur_trial->reps[0] + 1; - const int limit = min( e->matchfinder->match_len_limit + 1, - available_bytes ); + const int limit = min( e->match_len_limit + 1, triable_bytes ); len = 1; while( len < limit && data[len-dis] == data[len] ) ++len; if( --len >= min_match_len ) @@ -570,8 +390,8 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, const int pos_state2 = ( pos_state + 1 ) & pos_state_mask; const State state2 = St_set_char( cur_state ); const int price = next_price + - price1( e->bm_match[state2][pos_state2] ) + - price1( e->bm_rep[state2] ) + + price1( e->eb.bm_match[state2][pos_state2] ) + + price1( e->eb.bm_rep[state2] ) + LZe_price_rep0_len( e, len, state2, pos_state2 ); while( num_trials < cur + 1 + len ) e->trials[++num_trials].price = infinite_price; @@ -582,7 +402,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, /* try rep distances */ for( rep = 0; rep < num_rep_distances; ++rep ) { - const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ); + const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb ); int price; const int dis = cur_trial->reps[rep] + 1; @@ -591,7 +411,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, if( data[len-dis] != data[len] ) break; while( num_trials < cur + len ) e->trials[++num_trials].price = infinite_price; - price = rep_match_price + LZe_price_rep( e, rep, cur_state, pos_state ); + price = rep_match_price + LZeb_price_rep( &e->eb, rep, cur_state, pos_state ); for( i = min_match_len; i <= len; ++i ) Tr_update( &e->trials[cur+i], price + Lp_price( &e->rep_len_prices, i, pos_state ), rep, cur ); @@ -600,9 +420,9 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, /* try rep + literal + rep0 */ { - int len2 = len + 1, pos_state2; - const int limit = min( e->matchfinder->match_len_limit + len2, - available_bytes ); + int len2 = len + 1; + const int limit = min( e->match_len_limit + len2, triable_bytes ); + int pos_state2; State state2; while( len2 < limit && data[len2-dis] == data[len2] ) ++len2; len2 -= len + 1; @@ -611,12 +431,12 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, pos_state2 = ( pos_state + len ) & pos_state_mask; state2 = St_set_rep( cur_state ); price += Lp_price( &e->rep_len_prices, len, pos_state ) + - price0( e->bm_match[state2][pos_state2] ) + - LZe_price_matched( e, data[len-1], data[len], data[len-dis] ); + price0( e->eb.bm_match[state2][pos_state2] ) + + LZeb_price_matched( &e->eb, data[len-1], data[len], data[len-dis] ); pos_state2 = ( pos_state2 + 1 ) & pos_state_mask; state2 = St_set_char( state2 ); - price += price1( e->bm_match[state2][pos_state2] ) + - price1( e->bm_rep[state2] ) + + price += price1( e->eb.bm_match[state2][pos_state2] ) + + price1( e->eb.bm_rep[state2] ) + LZe_price_rep0_len( e, len2, state2, pos_state2 ); while( num_trials < cur + len + 1 + len2 ) e->trials[++num_trials].price = infinite_price; @@ -629,7 +449,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, { int dis; const int normal_match_price = match_price + - price0( e->bm_rep[cur_state] ); + price0( e->eb.bm_rep[cur_state] ); while( num_trials < cur + newlen ) e->trials[++num_trials].price = infinite_price; @@ -646,23 +466,22 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, /* try match + literal + rep0 */ if( len == e->pairs[i].len ) { - const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ); + const uint8_t * const data = Mb_ptr_to_current_pos( &e->eb.mb ); const int dis2 = dis + 1; int len2 = len + 1; - const int limit = min( e->matchfinder->match_len_limit + len2, - available_bytes ); + const int limit = min( e->match_len_limit + len2, triable_bytes ); while( len2 < limit && data[len2-dis2] == data[len2] ) ++len2; len2 -= len + 1; if( len2 >= min_match_len ) { int pos_state2 = ( pos_state + len ) & pos_state_mask; State state2 = St_set_match( cur_state ); - price += price0( e->bm_match[state2][pos_state2] ) + - LZe_price_matched( e, data[len-1], data[len], data[len-dis2] ); + price += price0( e->eb.bm_match[state2][pos_state2] ) + + LZeb_price_matched( &e->eb, data[len-1], data[len], data[len-dis2] ); pos_state2 = ( pos_state2 + 1 ) & pos_state_mask; state2 = St_set_char( state2 ); - price += price1( e->bm_match[state2][pos_state2] ) + - price1( e->bm_rep[state2] ) + + price += price1( e->eb.bm_match[state2][pos_state2] ) + + price1( e->eb.bm_rep[state2] ) + LZe_price_rep0_len( e, len2, state2, pos_state2 ); while( num_trials < cur + len + 1 + len2 ) @@ -681,39 +500,39 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, static bool LZe_encode_member( struct LZ_encoder * const e ) { - const bool best = ( e->matchfinder->match_len_limit > 12 ); + const bool best = ( e->match_len_limit > 12 ); const int dis_price_count = best ? 1 : 512; const int align_price_count = best ? 1 : dis_align_size; - const int price_count = ( e->matchfinder->match_len_limit > 36 ) ? 1013 : 4093; + const int price_count = ( e->match_len_limit > 36 ) ? 1013 : 4093; int ahead, i; - State * const state = &e->state; + State * const state = &e->eb.state; - if( e->member_finished ) return true; - if( Re_member_position( &e->renc ) >= e->member_size_limit ) + if( e->eb.member_finished ) return true; + if( Re_member_position( &e->eb.renc ) >= e->eb.member_size_limit ) { - if( LZe_full_flush( e, *state ) ) e->member_finished = true; + if( LZeb_full_flush( &e->eb ) ) e->eb.member_finished = true; return true; } - if( Mf_data_position( e->matchfinder ) == 0 && - !Mf_finished( e->matchfinder ) ) /* encode first byte */ + if( Mb_data_position( &e->eb.mb ) == 0 && + !Mb_data_finished( &e->eb.mb ) ) /* encode first byte */ { const uint8_t prev_byte = 0; uint8_t cur_byte; - if( !Mf_enough_available_bytes( e->matchfinder ) || - !Re_enough_free_bytes( &e->renc ) ) return true; - cur_byte = Mf_peek( e->matchfinder, 0 ); - Re_encode_bit( &e->renc, &e->bm_match[*state][0], 0 ); - LZe_encode_literal( e, prev_byte, cur_byte ); - CRC32_update_byte( &e->crc, cur_byte ); - Mf_get_match_pairs( e->matchfinder, 0 ); - if( !Mf_move_pos( e->matchfinder ) ) return false; + if( !Mb_enough_available_bytes( &e->eb.mb ) || + !Re_enough_free_bytes( &e->eb.renc ) ) return true; + cur_byte = Mb_peek( &e->eb.mb, 0 ); + Re_encode_bit( &e->eb.renc, &e->eb.bm_match[*state][0], 0 ); + LZeb_encode_literal( &e->eb, prev_byte, cur_byte ); + CRC32_update_byte( &e->eb.crc, cur_byte ); + LZe_get_match_pairs( e, 0 ); + if( !Mb_move_pos( &e->eb.mb ) ) return false; } - while( !Mf_finished( e->matchfinder ) ) + while( !Mb_data_finished( &e->eb.mb ) ) { - if( !Mf_enough_available_bytes( e->matchfinder ) || - !Re_enough_free_bytes( &e->renc ) ) return true; + if( !Mb_enough_available_bytes( &e->eb.mb ) || + !Re_enough_free_bytes( &e->eb.renc ) ) return true; if( e->price_counter <= 0 && e->pending_num_pairs == 0 ) { e->price_counter = price_count; /* recalculate prices every these bytes */ @@ -723,69 +542,68 @@ static bool LZe_encode_member( struct LZ_encoder * const e ) { e->align_price_counter = align_price_count; for( i = 0; i < dis_align_size; ++i ) - e->align_prices[i] = price_symbol_reversed( e->bm_align, i, dis_align_bits ); + e->align_prices[i] = price_symbol_reversed( e->eb.bm_align, i, dis_align_bits ); } Lp_update_prices( &e->match_len_prices ); Lp_update_prices( &e->rep_len_prices ); } - ahead = LZe_sequence_optimizer( e, e->reps, *state ); + ahead = LZe_sequence_optimizer( e, e->eb.reps, *state ); if( ahead <= 0 ) return false; /* can't happen */ e->price_counter -= ahead; for( i = 0; ahead > 0; ) { const int pos_state = - ( Mf_data_position( e->matchfinder ) - ahead ) & pos_state_mask; + ( Mb_data_position( &e->eb.mb ) - ahead ) & pos_state_mask; const int dis = e->trials[i].dis; const int len = e->trials[i].price; bool bit = ( dis < 0 ); - Re_encode_bit( &e->renc, &e->bm_match[*state][pos_state], !bit ); + Re_encode_bit( &e->eb.renc, &e->eb.bm_match[*state][pos_state], !bit ); if( bit ) /* literal byte */ { - const uint8_t prev_byte = Mf_peek( e->matchfinder, ahead + 1 ); - const uint8_t cur_byte = Mf_peek( e->matchfinder, ahead ); - CRC32_update_byte( &e->crc, cur_byte ); + const uint8_t prev_byte = Mb_peek( &e->eb.mb, ahead + 1 ); + const uint8_t cur_byte = Mb_peek( &e->eb.mb, ahead ); + CRC32_update_byte( &e->eb.crc, cur_byte ); if( St_is_char( *state ) ) - LZe_encode_literal( e, prev_byte, cur_byte ); + LZeb_encode_literal( &e->eb, prev_byte, cur_byte ); else { - const uint8_t match_byte = - Mf_peek( e->matchfinder, ahead + e->reps[0] + 1 ); - LZe_encode_matched( e, prev_byte, cur_byte, match_byte ); + const uint8_t match_byte = Mb_peek( &e->eb.mb, ahead + e->eb.reps[0] + 1 ); + LZeb_encode_matched( &e->eb, prev_byte, cur_byte, match_byte ); } *state = St_set_char( *state ); } else /* match or repeated match */ { - CRC32_update_buf( &e->crc, Mf_ptr_to_current_pos( e->matchfinder ) - ahead, len ); - mtf_reps( dis, e->reps ); + CRC32_update_buf( &e->eb.crc, Mb_ptr_to_current_pos( &e->eb.mb ) - ahead, len ); + mtf_reps( dis, e->eb.reps ); bit = ( dis < num_rep_distances ); - Re_encode_bit( &e->renc, &e->bm_rep[*state], bit ); + Re_encode_bit( &e->eb.renc, &e->eb.bm_rep[*state], bit ); if( bit ) /* repeated match */ { bit = ( dis == 0 ); - Re_encode_bit( &e->renc, &e->bm_rep0[*state], !bit ); + Re_encode_bit( &e->eb.renc, &e->eb.bm_rep0[*state], !bit ); if( bit ) - Re_encode_bit( &e->renc, &e->bm_len[*state][pos_state], len > 1 ); + Re_encode_bit( &e->eb.renc, &e->eb.bm_len[*state][pos_state], len > 1 ); else { - Re_encode_bit( &e->renc, &e->bm_rep1[*state], dis > 1 ); + Re_encode_bit( &e->eb.renc, &e->eb.bm_rep1[*state], dis > 1 ); if( dis > 1 ) - Re_encode_bit( &e->renc, &e->bm_rep2[*state], dis > 2 ); + Re_encode_bit( &e->eb.renc, &e->eb.bm_rep2[*state], dis > 2 ); } if( len == 1 ) *state = St_set_short_rep( *state ); else { - Re_encode_len( &e->renc, &e->rep_len_model, len, pos_state ); + Re_encode_len( &e->eb.renc, &e->eb.rep_len_model, len, pos_state ); Lp_decrement_counter( &e->rep_len_prices, pos_state ); *state = St_set_rep( *state ); } } else /* match */ { - LZe_encode_pair( e, dis - num_rep_distances, len, pos_state ); + LZeb_encode_pair( &e->eb, dis - num_rep_distances, len, pos_state ); if( get_slot( dis - num_rep_distances ) >= end_dis_model ) --e->align_price_counter; --e->dis_price_counter; @@ -794,14 +612,14 @@ static bool LZe_encode_member( struct LZ_encoder * const e ) } } ahead -= len; i += len; - if( Re_member_position( &e->renc ) >= e->member_size_limit ) + if( Re_member_position( &e->eb.renc ) >= e->eb.member_size_limit ) { - if( !Mf_dec_pos( e->matchfinder, ahead ) ) return false; - if( LZe_full_flush( e, *state ) ) e->member_finished = true; + if( !Mb_dec_pos( &e->eb.mb, ahead ) ) return false; + if( LZeb_full_flush( &e->eb ) ) e->eb.member_finished = true; return true; } } } - if( LZe_full_flush( e, *state ) ) e->member_finished = true; + if( LZeb_full_flush( &e->eb ) ) e->eb.member_finished = true; return true; } @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,475 +25,6 @@ Public License. */ -enum { max_num_trials = 1 << 13, - price_shift_bits = 6, - price_step_bits = 2 }; - -static const uint8_t dis_slots[1<<10] = - { - 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19 }; - -static inline uint8_t get_slot( const unsigned dis ) - { - if( dis < (1 << 10) ) return dis_slots[dis]; - if( dis < (1 << 19) ) return dis_slots[dis>> 9] + 18; - if( dis < (1 << 28) ) return dis_slots[dis>>18] + 36; - return dis_slots[dis>>27] + 54; - } - - -static const short prob_prices[bit_model_total >> price_step_bits] = -{ -640, 539, 492, 461, 438, 419, 404, 390, 379, 369, 359, 351, 343, 336, 330, 323, -318, 312, 307, 302, 298, 293, 289, 285, 281, 277, 274, 270, 267, 264, 261, 258, -255, 252, 250, 247, 244, 242, 239, 237, 235, 232, 230, 228, 226, 224, 222, 220, -218, 216, 214, 213, 211, 209, 207, 206, 204, 202, 201, 199, 198, 196, 195, 193, -192, 190, 189, 188, 186, 185, 184, 182, 181, 180, 178, 177, 176, 175, 174, 172, -171, 170, 169, 168, 167, 166, 165, 164, 163, 162, 161, 159, 158, 157, 157, 156, -155, 154, 153, 152, 151, 150, 149, 148, 147, 146, 145, 145, 144, 143, 142, 141, -140, 140, 139, 138, 137, 136, 136, 135, 134, 133, 133, 132, 131, 130, 130, 129, -128, 127, 127, 126, 125, 125, 124, 123, 123, 122, 121, 121, 120, 119, 119, 118, -117, 117, 116, 115, 115, 114, 114, 113, 112, 112, 111, 111, 110, 109, 109, 108, -108, 107, 106, 106, 105, 105, 104, 104, 103, 103, 102, 101, 101, 100, 100, 99, - 99, 98, 98, 97, 97, 96, 96, 95, 95, 94, 94, 93, 93, 92, 92, 91, - 91, 90, 90, 89, 89, 88, 88, 88, 87, 87, 86, 86, 85, 85, 84, 84, - 83, 83, 83, 82, 82, 81, 81, 80, 80, 80, 79, 79, 78, 78, 77, 77, - 77, 76, 76, 75, 75, 75, 74, 74, 73, 73, 73, 72, 72, 71, 71, 71, - 70, 70, 70, 69, 69, 68, 68, 68, 67, 67, 67, 66, 66, 65, 65, 65, - 64, 64, 64, 63, 63, 63, 62, 62, 61, 61, 61, 60, 60, 60, 59, 59, - 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 55, 55, 55, 54, 54, 54, - 53, 53, 53, 53, 52, 52, 52, 51, 51, 51, 50, 50, 50, 49, 49, 49, - 48, 48, 48, 48, 47, 47, 47, 46, 46, 46, 45, 45, 45, 45, 44, 44, - 44, 43, 43, 43, 43, 42, 42, 42, 41, 41, 41, 41, 40, 40, 40, 40, - 39, 39, 39, 38, 38, 38, 38, 37, 37, 37, 37, 36, 36, 36, 35, 35, - 35, 35, 34, 34, 34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 31, 31, - 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 27, 27, - 27, 27, 26, 26, 26, 26, 26, 25, 25, 25, 25, 24, 24, 24, 24, 23, - 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20, - 20, 19, 19, 19, 19, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, 16, - 16, 16, 16, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 13, - 13, 13, 12, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10, - 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, - 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, - 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1 }; - -static inline int get_price( const int probability ) - { return prob_prices[probability >> price_step_bits]; } - - -static inline int price0( const Bit_model probability ) - { return get_price( probability ); } - -static inline int price1( const Bit_model probability ) - { return get_price( bit_model_total - probability ); } - -static inline int price_bit( const Bit_model bm, const int bit ) - { if( bit ) return price1( bm ); else return price0( bm ); } - - -static inline int price_symbol( const Bit_model bm[], int symbol, - const int num_bits ) - { - int price = 0; - symbol |= ( 1 << num_bits ); - while( symbol > 1 ) - { - const int bit = symbol & 1; - symbol >>= 1; - price += price_bit( bm[symbol], bit ); - } - return price; - } - - -static inline int price_symbol_reversed( const Bit_model bm[], int symbol, - const int num_bits ) - { - int price = 0; - int model = 1; - int i; - for( i = num_bits; i > 0; --i ) - { - const int bit = symbol & 1; - price += price_bit( bm[model], bit ); - model = ( model << 1 ) | bit; - symbol >>= 1; - } - return price; - } - - -static inline int price_matched( const Bit_model bm[], int symbol, - int match_byte ) - { - int price = 0; - int mask = 0x100; - symbol |= mask; - - do { - int match_bit, bit; - match_byte <<= 1; - match_bit = match_byte & mask; - symbol <<= 1; - bit = symbol & 0x100; - price += price_bit( bm[match_bit+(symbol>>9)+mask], bit ); - mask &= ~(match_byte ^ symbol); /* if( match_bit != bit ) mask = 0; */ - } - while( symbol < 0x10000 ); - return price; - } - - -struct Pair /* distance-length pair */ - { - int dis; - int len; - }; - - -enum { /* bytes to keep in buffer before dictionary */ - before_size = max_num_trials + 1, - /* bytes to keep in buffer after pos */ - after_size = max_num_trials + ( 2 * max_match_len ) + 1, - num_prev_positions3 = 1 << 16, - num_prev_positions2 = 1 << 10 }; - -struct Matchfinder - { - unsigned long long partial_data_pos; - uint8_t * buffer; /* input buffer */ - int32_t * prev_positions; /* 1 + last seen position of key. else 0 */ - int32_t * prev_pos_tree; /* previous positions of key */ - int match_len_limit; - int buffer_size; - int dictionary_size; /* bytes to keep in buffer before pos */ - int pos; /* current pos in buffer */ - int cyclic_pos; /* cycles through [0, dictionary_size] */ - int stream_pos; /* first byte not yet read from file */ - int pos_limit; /* when reached, a new block must be read */ - int cycles; - int key4_mask; - int num_prev_positions; /* size of prev_positions */ - bool at_stream_end; /* stream_pos shows real end of file */ - bool been_flushed; - bool flushing; - }; - -static int Mf_write_data( struct Matchfinder * const mf, - const uint8_t * const inbuf, const int size ) - { - const int sz = min( mf->buffer_size - mf->stream_pos, size ); - if( !mf->at_stream_end && sz > 0 ) - { - memcpy( mf->buffer + mf->stream_pos, inbuf, sz ); - mf->stream_pos += sz; - } - return sz; - } - -static bool Mf_normalize_pos( struct Matchfinder * const mf ); - -static inline void Mf_free( struct Matchfinder * const mf ) - { - free( mf->prev_positions ); - free( mf->buffer ); - } - -static inline uint8_t Mf_peek( const struct Matchfinder * const mf, - const int distance ) - { return mf->buffer[mf->pos-distance]; } - -static inline int Mf_available_bytes( const struct Matchfinder * const mf ) - { return mf->stream_pos - mf->pos; } - -static inline unsigned long long -Mf_data_position( const struct Matchfinder * const mf ) - { return mf->partial_data_pos + mf->pos; } - -static inline void Mf_finish( struct Matchfinder * const mf ) - { mf->at_stream_end = true; mf->flushing = false; } - -static inline bool Mf_finished( const struct Matchfinder * const mf ) - { return mf->at_stream_end && mf->pos >= mf->stream_pos; } - -static inline bool Mf_flushing_or_end( const struct Matchfinder * const mf ) - { return mf->at_stream_end || mf->flushing; } - -static inline int Mf_free_bytes( const struct Matchfinder * const mf ) - { if( mf->at_stream_end ) return 0; return mf->buffer_size - mf->stream_pos; } - -static inline bool Mf_enough_available_bytes( const struct Matchfinder * const mf ) - { - return ( mf->pos + after_size <= mf->stream_pos || - ( Mf_flushing_or_end( mf ) && mf->pos < mf->stream_pos ) ); - } - -static inline const uint8_t * -Mf_ptr_to_current_pos( const struct Matchfinder * const mf ) - { return mf->buffer + mf->pos; } - -static inline bool Mf_dec_pos( struct Matchfinder * const mf, - const int ahead ) - { - if( ahead < 0 || mf->pos < ahead ) return false; - mf->pos -= ahead; - mf->cyclic_pos -= ahead; - if( mf->cyclic_pos < 0 ) mf->cyclic_pos += mf->dictionary_size + 1; - return true; - } - -static inline int Mf_true_match_len( const struct Matchfinder * const mf, - const int index, const int distance, - int len_limit ) - { - const uint8_t * const data = mf->buffer + mf->pos + index; - int i = 0; - - if( index + len_limit > Mf_available_bytes( mf ) ) - len_limit = Mf_available_bytes( mf ) - index; - while( i < len_limit && data[i-distance] == data[i] ) ++i; - return i; - } - -static inline bool Mf_move_pos( struct Matchfinder * const mf ) - { - if( ++mf->cyclic_pos > mf->dictionary_size ) mf->cyclic_pos = 0; - if( ++mf->pos >= mf->pos_limit ) return Mf_normalize_pos( mf ); - return true; - } - -static int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs ); - - -enum { re_min_free_bytes = 2 * max_num_trials }; - -struct Range_encoder - { - struct Circular_buffer cb; - uint64_t low; - unsigned long long partial_member_pos; - uint32_t range; - unsigned ff_count; - uint8_t cache; - }; - -static inline void Re_shift_low( struct Range_encoder * const renc ) - { - const bool carry = ( renc->low > 0xFFFFFFFFU ); - if( carry || renc->low < 0xFF000000U ) - { - Cb_put_byte( &renc->cb, renc->cache + carry ); - for( ; renc->ff_count > 0; --renc->ff_count ) - Cb_put_byte( &renc->cb, 0xFF + carry ); - renc->cache = renc->low >> 24; - } - else ++renc->ff_count; - renc->low = ( renc->low & 0x00FFFFFFU ) << 8; - } - -static inline bool Re_init( struct Range_encoder * const renc ) - { - if( !Cb_init( &renc->cb, 65536 + re_min_free_bytes ) ) return false; - renc->low = 0; - renc->partial_member_pos = 0; - renc->range = 0xFFFFFFFFU; - renc->ff_count = 0; - renc->cache = 0; - return true; - } - -static inline void Re_free( struct Range_encoder * const renc ) - { Cb_free( &renc->cb ); } - -static inline unsigned long long -Re_member_position( const struct Range_encoder * const renc ) - { return renc->partial_member_pos + Cb_used_bytes( &renc->cb ) + renc->ff_count; } - -static inline bool Re_enough_free_bytes( const struct Range_encoder * const renc ) - { return Cb_free_bytes( &renc->cb ) >= re_min_free_bytes; } - -static inline int Re_read_data( struct Range_encoder * const renc, - uint8_t * const out_buffer, const int out_size ) - { - const int size = Cb_read_data( &renc->cb, out_buffer, out_size ); - if( size > 0 ) renc->partial_member_pos += size; - return size; - } - -static inline void Re_flush( struct Range_encoder * const renc ) - { - int i; for( i = 0; i < 5; ++i ) Re_shift_low( renc ); - renc->low = 0; - renc->range = 0xFFFFFFFFU; - renc->ff_count = 0; - renc->cache = 0; - } - -static inline void Re_encode( struct Range_encoder * const renc, - const int symbol, const int num_bits ) - { - int i; - for( i = num_bits - 1; i >= 0; --i ) - { - renc->range >>= 1; - if( (symbol >> i) & 1 ) renc->low += renc->range; - if( renc->range <= 0x00FFFFFFU ) - { renc->range <<= 8; Re_shift_low( renc ); } - } - } - -static inline void Re_encode_bit( struct Range_encoder * const renc, - Bit_model * const probability, const int bit ) - { - const uint32_t bound = ( renc->range >> bit_model_total_bits ) * *probability; - if( !bit ) - { - renc->range = bound; - *probability += (bit_model_total - *probability) >> bit_model_move_bits; - } - else - { - renc->low += bound; - renc->range -= bound; - *probability -= *probability >> bit_model_move_bits; - } - if( renc->range <= 0x00FFFFFFU ) - { renc->range <<= 8; Re_shift_low( renc ); } - } - -static inline void Re_encode_tree( struct Range_encoder * const renc, - Bit_model bm[], const int symbol, const int num_bits ) - { - int mask = ( 1 << ( num_bits - 1 ) ); - int model = 1; - int i; - for( i = num_bits; i > 0; --i, mask >>= 1 ) - { - const int bit = ( symbol & mask ); - Re_encode_bit( renc, &bm[model], bit ); - model <<= 1; - if( bit ) model |= 1; - } - } - -static inline void Re_encode_tree_reversed( struct Range_encoder * const renc, - Bit_model bm[], int symbol, const int num_bits ) - { - int model = 1; - int i; - for( i = num_bits; i > 0; --i ) - { - const int bit = symbol & 1; - Re_encode_bit( renc, &bm[model], bit ); - model = ( model << 1 ) | bit; - symbol >>= 1; - } - } - -static inline void Re_encode_matched( struct Range_encoder * const renc, - Bit_model bm[], int symbol, - int match_byte ) - { - int mask = 0x100; - symbol |= mask; - - do { - int match_bit, bit; - match_byte <<= 1; - match_bit = match_byte & mask; - symbol <<= 1; - bit = symbol & 0x100; - Re_encode_bit( renc, &bm[match_bit+(symbol>>9)+mask], bit ); - mask &= ~(match_byte ^ symbol); /* if( match_bit != bit ) mask = 0; */ - } - while( symbol < 0x10000 ); - } - -static inline void Re_encode_len( struct Range_encoder * const renc, - struct Len_model * const lm, - int symbol, const int pos_state ) - { - bool bit = ( ( symbol -= min_match_len ) >= len_low_symbols ); - Re_encode_bit( renc, &lm->choice1, bit ); - if( !bit ) - Re_encode_tree( renc, lm->bm_low[pos_state], symbol, len_low_bits ); - else - { - bit = ( symbol >= len_low_symbols + len_mid_symbols ); - Re_encode_bit( renc, &lm->choice2, bit ); - if( !bit ) - Re_encode_tree( renc, lm->bm_mid[pos_state], - symbol - len_low_symbols, len_mid_bits ); - else - Re_encode_tree( renc, lm->bm_high, - symbol - len_low_symbols - len_mid_symbols, len_high_bits ); - } - } - - struct Len_prices { const struct Len_model * lm; @@ -530,15 +61,17 @@ static inline void Lp_update_high_prices( struct Len_prices * const lp ) price_symbol( lp->lm->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); } +static inline void Lp_reset( struct Len_prices * const lp ) + { int i; for( i = 0; i < pos_states; ++i ) lp->counters[i] = 0; } + static inline void Lp_init( struct Len_prices * const lp, const struct Len_model * const lm, const int match_len_limit ) { - int i; lp->lm = lm; lp->len_symbols = match_len_limit + 1 - min_match_len; lp->count = ( match_len_limit > 12 ) ? 1 : lp->len_symbols; - for( i = 0; i < pos_states; ++i ) lp->counters[i] = 0; + Lp_reset( lp ); } static inline void Lp_decrement_counter( struct Len_prices * const lp, @@ -561,9 +94,14 @@ static inline int Lp_price( const struct Len_prices * const lp, { return lp->prices[pos_state][symbol - min_match_len]; } +struct Pair /* distance-length pair */ + { + int dis; + int len; + }; + enum { infinite_price = 0x0FFFFFFF, - max_marker_size = 16, - num_rep_distances = 4, /* must be 4 */ + max_num_trials = 1 << 13, single_step_trial = -2, dual_step_trial = -1 }; @@ -613,31 +151,14 @@ static inline void Tr_update3( struct Trial * const trial, const int pr, struct LZ_encoder { - unsigned long long member_size_limit; - int pending_num_pairs; - uint32_t crc; - - Bit_model bm_literal[1<<literal_context_bits][0x300]; - Bit_model bm_match[states][pos_states]; - Bit_model bm_rep[states]; - Bit_model bm_rep0[states]; - Bit_model bm_rep1[states]; - Bit_model bm_rep2[states]; - Bit_model bm_len[states][pos_states]; - Bit_model bm_dis_slot[len_states][1<<dis_slot_bits]; - Bit_model bm_dis[modeled_distances-end_dis_model]; - Bit_model bm_align[dis_align_size]; - - struct Matchfinder * matchfinder; - struct Range_encoder renc; - struct Len_model match_len_model; - struct Len_model rep_len_model; + struct LZ_encoder_base eb; + int cycles; + int match_len_limit; struct Len_prices match_len_prices; struct Len_prices rep_len_prices; - + int pending_num_pairs; struct Pair pairs[max_match_len+1]; struct Trial trials[max_num_trials]; - int reps[num_rep_distances]; int dis_slot_prices[len_states][2*max_dictionary_bits]; int dis_prices[len_states][modeled_distances]; @@ -646,18 +167,20 @@ struct LZ_encoder int price_counter; int dis_price_counter; int align_price_counter; - State state; - bool member_finished; + bool been_flushed; }; -static inline bool LZe_member_finished( const struct LZ_encoder * const e ) - { return ( e->member_finished && !Cb_used_bytes( &e->renc.cb ) ); } - -static inline void LZe_free( struct LZ_encoder * const e ) - { Re_free( &e->renc ); } +static inline bool Mb_dec_pos( struct Matchfinder_base * const mb, + const int ahead ) + { + if( ahead < 0 || mb->pos < ahead ) return false; + mb->pos -= ahead; + mb->cyclic_pos -= ahead; + if( mb->cyclic_pos < 0 ) mb->cyclic_pos += mb->dictionary_size + 1; + return true; + } -static inline unsigned LZe_crc( const struct LZ_encoder * const e ) - { return e->crc ^ 0xFFFFFFFFU; } +static int LZe_get_match_pairs( struct LZ_encoder * const e, struct Pair * pairs ); /* move-to-front dis in/into reps if( dis > 0 ) */ static inline void mtf_reps( const int dis, int reps[num_rep_distances] ) @@ -676,26 +199,26 @@ static inline void mtf_reps( const int dis, int reps[num_rep_distances] ) } } -static inline int LZe_price_shortrep( const struct LZ_encoder * const e, - const State state, const int pos_state ) +static inline int LZeb_price_shortrep( const struct LZ_encoder_base * const eb, + const State state, const int pos_state ) { - return price0( e->bm_rep0[state] ) + price0( e->bm_len[state][pos_state] ); + return price0( eb->bm_rep0[state] ) + price0( eb->bm_len[state][pos_state] ); } -static inline int LZe_price_rep( const struct LZ_encoder * const e, - const int rep, - const State state, const int pos_state ) +static inline int LZeb_price_rep( const struct LZ_encoder_base * const eb, + const int rep, + const State state, const int pos_state ) { int price; - if( rep == 0 ) return price0( e->bm_rep0[state] ) + - price1( e->bm_len[state][pos_state] ); - price = price1( e->bm_rep0[state] ); + if( rep == 0 ) return price0( eb->bm_rep0[state] ) + + price1( eb->bm_len[state][pos_state] ); + price = price1( eb->bm_rep0[state] ); if( rep == 1 ) - price += price0( e->bm_rep1[state] ); + price += price0( eb->bm_rep1[state] ); else { - price += price1( e->bm_rep1[state] ); - price += price_bit( e->bm_rep2[state], rep - 2 ); + price += price1( eb->bm_rep1[state] ); + price += price_bit( eb->bm_rep2[state], rep - 2 ); } return price; } @@ -704,7 +227,7 @@ static inline int LZe_price_rep0_len( const struct LZ_encoder * const e, const int len, const State state, const int pos_state ) { - return LZe_price_rep( e, 0, state, pos_state ) + + return LZeb_price_rep( &e->eb, 0, state, pos_state ) + Lp_price( &e->rep_len_prices, len, pos_state ); } @@ -721,64 +244,15 @@ static inline int LZe_price_pair( const struct LZ_encoder * const e, e->align_prices[dis & (dis_align_size - 1)]; } -static inline int LZe_price_literal( const struct LZ_encoder * const e, - uint8_t prev_byte, uint8_t symbol ) - { return price_symbol( e->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); } - -static inline int LZe_price_matched( const struct LZ_encoder * const e, - uint8_t prev_byte, uint8_t symbol, - uint8_t match_byte ) - { return price_matched( e->bm_literal[get_lit_state(prev_byte)], symbol, - match_byte ); } - -static inline void LZe_encode_literal( struct LZ_encoder * const e, - uint8_t prev_byte, uint8_t symbol ) - { Re_encode_tree( &e->renc, - e->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); } - -static inline void LZe_encode_matched( struct LZ_encoder * const e, - uint8_t prev_byte, uint8_t symbol, - uint8_t match_byte ) - { Re_encode_matched( &e->renc, e->bm_literal[get_lit_state(prev_byte)], - symbol, match_byte ); } - -static inline void LZe_encode_pair( struct LZ_encoder * const e, - const unsigned dis, const int len, - const int pos_state ) - { - const int dis_slot = get_slot( dis ); - Re_encode_len( &e->renc, &e->match_len_model, len, pos_state ); - Re_encode_tree( &e->renc, e->bm_dis_slot[get_len_state(len)], dis_slot, - dis_slot_bits ); - - if( dis_slot >= start_dis_model ) - { - const int direct_bits = ( dis_slot >> 1 ) - 1; - const unsigned base = ( 2 | ( dis_slot & 1 ) ) << direct_bits; - const unsigned direct_dis = dis - base; - - if( dis_slot < end_dis_model ) - Re_encode_tree_reversed( &e->renc, e->bm_dis + base - dis_slot - 1, - direct_dis, direct_bits ); - else - { - Re_encode( &e->renc, direct_dis >> dis_align_bits, - direct_bits - dis_align_bits ); - Re_encode_tree_reversed( &e->renc, e->bm_align, direct_dis, dis_align_bits ); - } - } - } - static inline int LZe_read_match_distances( struct LZ_encoder * const e ) { - const int num_pairs = Mf_get_match_pairs( e->matchfinder, e->pairs ); + const int num_pairs = LZe_get_match_pairs( e, e->pairs ); if( num_pairs > 0 ) { int len = e->pairs[num_pairs-1].len; - if( len == e->matchfinder->match_len_limit && len < max_match_len ) + if( len == e->match_len_limit && len < max_match_len ) { - len += Mf_true_match_len( e->matchfinder, len, - e->pairs[num_pairs-1].dis + 1, + len += Mb_true_match_len( &e->eb.mb, len, e->pairs[num_pairs-1].dis + 1, max_match_len - len ); e->pairs[num_pairs-1].len = len; } @@ -786,13 +260,13 @@ static inline int LZe_read_match_distances( struct LZ_encoder * const e ) return num_pairs; } -static inline bool LZe_move_pos( struct LZ_encoder * const e, int n ) +static inline bool LZe_move_and_update( struct LZ_encoder * const e, int n ) { while( true ) { - if( !Mf_move_pos( e->matchfinder ) ) return false; + if( !Mb_move_pos( &e->eb.mb ) ) return false; if( --n <= 0 ) break; - Mf_get_match_pairs( e->matchfinder, 0 ); + LZe_get_match_pairs( e, 0 ); } return true; } @@ -823,3 +297,47 @@ static inline void LZe_backward( struct LZ_encoder * const e, int cur ) cur = prev_index; } } + +enum { num_prev_positions3 = 1 << 16, + num_prev_positions2 = 1 << 10 }; + +static inline bool LZe_init( struct LZ_encoder * const e, + const int dict_size, const int len_limit, + const unsigned long long member_size ) + { + enum { before = max_num_trials + 1, + /* bytes to keep in buffer after pos */ + after_size = max_num_trials + ( 2 * max_match_len ) + 1, + dict_factor = 2, + num_prev_positions23 = num_prev_positions2 + num_prev_positions3, + pos_array_factor = 2, + min_free_bytes = 2 * max_num_trials }; + + if( !LZeb_init( &e->eb, before, dict_size, after_size, dict_factor, + num_prev_positions23, pos_array_factor, min_free_bytes, + member_size ) ) return false; + e->cycles = ( len_limit < max_match_len ) ? 16 + ( len_limit / 2 ) : 256; + e->match_len_limit = len_limit; + Lp_init( &e->match_len_prices, &e->eb.match_len_model, e->match_len_limit ); + Lp_init( &e->rep_len_prices, &e->eb.rep_len_model, e->match_len_limit ); + e->pending_num_pairs = 0; + e->num_dis_slots = 2 * real_bits( e->eb.mb.dictionary_size - 1 ); + e->price_counter = 0; + e->dis_price_counter = 0; + e->align_price_counter = 0; + e->been_flushed = false; + return true; + } + +static inline void LZe_reset( struct LZ_encoder * const e, + const unsigned long long member_size ) + { + LZeb_reset( &e->eb, member_size ); + Lp_reset( &e->match_len_prices ); + Lp_reset( &e->rep_len_prices ); + e->pending_num_pairs = 0; + e->price_counter = 0; + e->dis_price_counter = 0; + e->align_price_counter = 0; + e->been_flushed = false; + } diff --git a/encoder_base.c b/encoder_base.c new file mode 100644 index 0000000..d666c6e --- /dev/null +++ b/encoder_base.c @@ -0,0 +1,192 @@ +/* Lzlib - Compression library for the lzip format + Copyright (C) 2009-2015 Antonio Diaz Diaz. + + This library is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this library. If not, see <http://www.gnu.org/licenses/>. + + As a special exception, you may use this file as part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this + file, or you compile this file and link it with other files to + produce an executable, this file does not by itself cause the + resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. +*/ + +static bool Mb_normalize_pos( struct Matchfinder_base * const mb ) + { + if( mb->pos > mb->stream_pos ) + { mb->pos = mb->stream_pos; return false; } + if( !mb->at_stream_end ) + { + int i; + const int offset = mb->pos - mb->dictionary_size - mb->before_size; + const int size = mb->stream_pos - offset; + memmove( mb->buffer, mb->buffer + offset, size ); + mb->partial_data_pos += offset; + mb->pos -= offset; + mb->stream_pos -= offset; + for( i = 0; i < mb->num_prev_positions; ++i ) + mb->prev_positions[i] -= min( mb->prev_positions[i], offset ); + for( i = 0; i < mb->pos_array_size; ++i ) + mb->pos_array[i] -= min( mb->pos_array[i], offset ); + } + return true; + } + + +static bool Mb_init( struct Matchfinder_base * const mb, + const int before, const int dict_size, + const int after_size, const int dict_factor, + const int num_prev_positions23, + const int pos_array_factor ) + { + const int buffer_size_limit = ( dict_factor * dict_size ) + before + after_size; + unsigned size; + int i; + + mb->partial_data_pos = 0; + mb->before_size = before; + mb->after_size = after_size; + mb->pos = 0; + mb->cyclic_pos = 0; + mb->stream_pos = 0; + mb->at_stream_end = false; + mb->flushing = false; + + mb->buffer_size = max( 65536, buffer_size_limit ); + mb->buffer = (uint8_t *)malloc( mb->buffer_size ); + if( !mb->buffer ) return false; + mb->dictionary_size = dict_size; + mb->pos_limit = mb->buffer_size - after_size; + size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 ); + if( mb->dictionary_size > 1 << 26 ) /* 64 MiB */ + size >>= 1; + mb->key4_mask = size - 1; + mb->num_prev_positions23 = num_prev_positions23; + size += num_prev_positions23; + + mb->num_prev_positions = size; + mb->pos_array_size = pos_array_factor * ( mb->dictionary_size + 1 ); + size += mb->pos_array_size; + if( size * sizeof (int32_t) <= size ) mb->prev_positions = 0; + else mb->prev_positions = (int32_t *)malloc( size * sizeof (int32_t) ); + if( !mb->prev_positions ) { free( mb->buffer ); return false; } + mb->pos_array = mb->prev_positions + mb->num_prev_positions; + for( i = 0; i < mb->num_prev_positions; ++i ) mb->prev_positions[i] = 0; + return true; + } + + +static void Mb_adjust_distionary_size( struct Matchfinder_base * const mb ) + { + if( mb->stream_pos < mb->dictionary_size ) + { + int size; + mb->buffer_size = + mb->dictionary_size = + mb->pos_limit = max( min_dictionary_size, mb->stream_pos ); + size = 1 << max( 16, real_bits( mb->dictionary_size - 1 ) - 2 ); + if( mb->dictionary_size > 1 << 26 ) + size >>= 1; + mb->key4_mask = size - 1; + size += mb->num_prev_positions23; + mb->num_prev_positions = size; + mb->pos_array = mb->prev_positions + mb->num_prev_positions; + } + } + + +static void Mb_reset( struct Matchfinder_base * const mb ) + { + int i; + if( mb->stream_pos > mb->pos ) + memmove( mb->buffer, mb->buffer + mb->pos, mb->stream_pos - mb->pos ); + mb->partial_data_pos = 0; + mb->stream_pos -= mb->pos; + mb->pos = 0; + mb->cyclic_pos = 0; + mb->at_stream_end = false; + mb->flushing = false; + for( i = 0; i < mb->num_prev_positions; ++i ) mb->prev_positions[i] = 0; + } + + + /* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */ +static bool LZeb_full_flush( struct LZ_encoder_base * const eb ) + { + int i; + const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask; + const State state = eb->state; + File_trailer trailer; + if( eb->member_finished || + Cb_free_bytes( &eb->renc.cb ) < max_marker_size + Ft_size ) + return false; + Re_encode_bit( &eb->renc, &eb->bm_match[state][pos_state], 1 ); + Re_encode_bit( &eb->renc, &eb->bm_rep[state], 0 ); + LZeb_encode_pair( eb, 0xFFFFFFFFU, min_match_len, pos_state ); + Re_flush( &eb->renc ); + Ft_set_data_crc( trailer, LZeb_crc( eb ) ); + Ft_set_data_size( trailer, Mb_data_position( &eb->mb ) ); + Ft_set_member_size( trailer, Re_member_position( &eb->renc ) + Ft_size ); + for( i = 0; i < Ft_size; ++i ) + Cb_put_byte( &eb->renc.cb, trailer[i] ); + return true; + } + + + /* Sync Flush mark => (dis == 0xFFFFFFFFU, len == min_match_len + 1) */ +static bool LZeb_sync_flush( struct LZ_encoder_base * const eb ) + { + int i; + const int pos_state = Mb_data_position( &eb->mb ) & pos_state_mask; + const State state = eb->state; + if( eb->member_finished || Cb_free_bytes( &eb->renc.cb ) < 2 * max_marker_size ) + return false; + for( i = 0; i < 2; ++i ) /* 2 consecutive markers guarantee decoding */ + { + Re_encode_bit( &eb->renc, &eb->bm_match[state][pos_state], 1 ); + Re_encode_bit( &eb->renc, &eb->bm_rep[state], 0 ); + LZeb_encode_pair( eb, 0xFFFFFFFFU, min_match_len + 1, pos_state ); + Re_flush( &eb->renc ); + } + return true; + } + + +static void LZeb_reset( struct LZ_encoder_base * const eb, + const unsigned long long member_size ) + { + int i; + Mb_reset( &eb->mb ); + eb->member_size_limit = member_size - Ft_size - max_marker_size; + eb->crc = 0xFFFFFFFFU; + Bm_array_init( eb->bm_literal[0], (1 << literal_context_bits) * 0x300 ); + Bm_array_init( eb->bm_match[0], states * pos_states ); + Bm_array_init( eb->bm_rep, states ); + Bm_array_init( eb->bm_rep0, states ); + Bm_array_init( eb->bm_rep1, states ); + Bm_array_init( eb->bm_rep2, states ); + Bm_array_init( eb->bm_len[0], states * pos_states ); + Bm_array_init( eb->bm_dis_slot[0], len_states * (1 << dis_slot_bits) ); + Bm_array_init( eb->bm_dis, modeled_distances - end_dis_model ); + Bm_array_init( eb->bm_align, dis_align_size ); + Lm_init( &eb->match_len_model ); + Lm_init( &eb->rep_len_model ); + Re_reset( &eb->renc ); + for( i = 0; i < num_rep_distances; ++i ) eb->reps[i] = 0; + eb->state = 0; + eb->member_finished = false; + } diff --git a/encoder_base.h b/encoder_base.h new file mode 100644 index 0000000..35ad605 --- /dev/null +++ b/encoder_base.h @@ -0,0 +1,587 @@ +/* Lzlib - Compression library for the lzip format + Copyright (C) 2009-2015 Antonio Diaz Diaz. + + This library is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this library. If not, see <http://www.gnu.org/licenses/>. + + As a special exception, you may use this file as part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this + file, or you compile this file and link it with other files to + produce an executable, this file does not by itself cause the + resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. +*/ + +enum { price_shift_bits = 6, + price_step_bits = 2 }; + +static const uint8_t dis_slots[1<<10] = + { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19 }; + +static inline uint8_t get_slot( const unsigned dis ) + { + if( dis < (1 << 10) ) return dis_slots[dis]; + if( dis < (1 << 19) ) return dis_slots[dis>> 9] + 18; + if( dis < (1 << 28) ) return dis_slots[dis>>18] + 36; + return dis_slots[dis>>27] + 54; + } + + +static const short prob_prices[bit_model_total >> price_step_bits] = +{ +640, 539, 492, 461, 438, 419, 404, 390, 379, 369, 359, 351, 343, 336, 330, 323, +318, 312, 307, 302, 298, 293, 289, 285, 281, 277, 274, 270, 267, 264, 261, 258, +255, 252, 250, 247, 244, 242, 239, 237, 235, 232, 230, 228, 226, 224, 222, 220, +218, 216, 214, 213, 211, 209, 207, 206, 204, 202, 201, 199, 198, 196, 195, 193, +192, 190, 189, 188, 186, 185, 184, 182, 181, 180, 178, 177, 176, 175, 174, 172, +171, 170, 169, 168, 167, 166, 165, 164, 163, 162, 161, 159, 158, 157, 157, 156, +155, 154, 153, 152, 151, 150, 149, 148, 147, 146, 145, 145, 144, 143, 142, 141, +140, 140, 139, 138, 137, 136, 136, 135, 134, 133, 133, 132, 131, 130, 130, 129, +128, 127, 127, 126, 125, 125, 124, 123, 123, 122, 121, 121, 120, 119, 119, 118, +117, 117, 116, 115, 115, 114, 114, 113, 112, 112, 111, 111, 110, 109, 109, 108, +108, 107, 106, 106, 105, 105, 104, 104, 103, 103, 102, 101, 101, 100, 100, 99, + 99, 98, 98, 97, 97, 96, 96, 95, 95, 94, 94, 93, 93, 92, 92, 91, + 91, 90, 90, 89, 89, 88, 88, 88, 87, 87, 86, 86, 85, 85, 84, 84, + 83, 83, 83, 82, 82, 81, 81, 80, 80, 80, 79, 79, 78, 78, 77, 77, + 77, 76, 76, 75, 75, 75, 74, 74, 73, 73, 73, 72, 72, 71, 71, 71, + 70, 70, 70, 69, 69, 68, 68, 68, 67, 67, 67, 66, 66, 65, 65, 65, + 64, 64, 64, 63, 63, 63, 62, 62, 61, 61, 61, 60, 60, 60, 59, 59, + 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 55, 55, 55, 54, 54, 54, + 53, 53, 53, 53, 52, 52, 52, 51, 51, 51, 50, 50, 50, 49, 49, 49, + 48, 48, 48, 48, 47, 47, 47, 46, 46, 46, 45, 45, 45, 45, 44, 44, + 44, 43, 43, 43, 43, 42, 42, 42, 41, 41, 41, 41, 40, 40, 40, 40, + 39, 39, 39, 38, 38, 38, 38, 37, 37, 37, 37, 36, 36, 36, 35, 35, + 35, 35, 34, 34, 34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 31, 31, + 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 27, 27, + 27, 27, 26, 26, 26, 26, 26, 25, 25, 25, 25, 24, 24, 24, 24, 23, + 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20, + 20, 19, 19, 19, 19, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, 16, + 16, 16, 16, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 13, + 13, 13, 12, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10, + 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, + 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1 }; + +static inline int get_price( const int probability ) + { return prob_prices[probability >> price_step_bits]; } + + +static inline int price0( const Bit_model probability ) + { return get_price( probability ); } + +static inline int price1( const Bit_model probability ) + { return get_price( bit_model_total - probability ); } + +static inline int price_bit( const Bit_model bm, const int bit ) + { if( bit ) return price1( bm ); else return price0( bm ); } + + +static inline int price_symbol( const Bit_model bm[], int symbol, + const int num_bits ) + { + int price = 0; + symbol |= ( 1 << num_bits ); + while( symbol > 1 ) + { + const int bit = symbol & 1; + symbol >>= 1; + price += price_bit( bm[symbol], bit ); + } + return price; + } + + +static inline int price_symbol_reversed( const Bit_model bm[], int symbol, + const int num_bits ) + { + int price = 0; + int model = 1; + int i; + for( i = num_bits; i > 0; --i ) + { + const int bit = symbol & 1; + price += price_bit( bm[model], bit ); + model = ( model << 1 ) | bit; + symbol >>= 1; + } + return price; + } + + +static inline int price_matched( const Bit_model bm[], int symbol, + int match_byte ) + { + int price = 0; + int mask = 0x100; + symbol |= mask; + + do { + int match_bit, bit; + match_byte <<= 1; + match_bit = match_byte & mask; + symbol <<= 1; + bit = symbol & 0x100; + price += price_bit( bm[match_bit+(symbol>>9)+mask], bit ); + mask &= ~(match_byte ^ symbol); /* if( match_bit != bit ) mask = 0; */ + } + while( symbol < 0x10000 ); + return price; + } + + +struct Matchfinder_base + { + unsigned long long partial_data_pos; + uint8_t * buffer; /* input buffer */ + int32_t * prev_positions; /* 1 + last seen position of key. else 0 */ + int32_t * pos_array; /* may be tree or chain */ + int before_size; /* bytes to keep in buffer before dictionary */ + int after_size; /* bytes to keep in buffer after pos */ + int buffer_size; + int dictionary_size; /* bytes to keep in buffer before pos */ + int pos; /* current pos in buffer */ + int cyclic_pos; /* cycles through [0, dictionary_size] */ + int stream_pos; /* first byte not yet read from file */ + int pos_limit; /* when reached, a new block must be read */ + int key4_mask; + int num_prev_positions23; + int num_prev_positions; /* size of prev_positions */ + int pos_array_size; + bool at_stream_end; /* stream_pos shows real end of file */ + bool flushing; + }; + +static bool Mb_normalize_pos( struct Matchfinder_base * const mb ); + +static bool Mb_init( struct Matchfinder_base * const mb, + const int before, const int dict_size, + const int after_size, const int dict_factor, + const int num_prev_positions23, + const int pos_array_factor ); + +static inline void Mb_free( struct Matchfinder_base * const mb ) + { free( mb->prev_positions ); free( mb->buffer ); } + +static inline uint8_t Mb_peek( const struct Matchfinder_base * const mb, + const int distance ) + { return mb->buffer[mb->pos-distance]; } + +static inline int Mb_available_bytes( const struct Matchfinder_base * const mb ) + { return mb->stream_pos - mb->pos; } + +static inline unsigned long long +Mb_data_position( const struct Matchfinder_base * const mb ) + { return mb->partial_data_pos + mb->pos; } + +static inline void Mb_finish( struct Matchfinder_base * const mb ) + { mb->at_stream_end = true; mb->flushing = false; } + +static inline bool Mb_data_finished( const struct Matchfinder_base * const mb ) + { return mb->at_stream_end && !mb->flushing && mb->pos >= mb->stream_pos; } + +static inline bool Mb_flushing_or_end( const struct Matchfinder_base * const mb ) + { return mb->at_stream_end || mb->flushing; } + +static inline int Mb_free_bytes( const struct Matchfinder_base * const mb ) + { if( Mb_flushing_or_end( mb ) ) return 0; + return mb->buffer_size - mb->stream_pos; } + +static inline bool Mb_enough_available_bytes( const struct Matchfinder_base * const mb ) + { + return ( mb->pos + mb->after_size <= mb->stream_pos || + ( Mb_flushing_or_end( mb ) && mb->pos < mb->stream_pos ) ); + } + +static inline const uint8_t * +Mb_ptr_to_current_pos( const struct Matchfinder_base * const mb ) + { return mb->buffer + mb->pos; } + +static int Mb_write_data( struct Matchfinder_base * const mb, + const uint8_t * const inbuf, const int size ) + { + const int sz = min( mb->buffer_size - mb->stream_pos, size ); + if( Mb_flushing_or_end( mb ) || sz <= 0 ) return 0; + memcpy( mb->buffer + mb->stream_pos, inbuf, sz ); + mb->stream_pos += sz; + return sz; + } + +static inline int Mb_true_match_len( const struct Matchfinder_base * const mb, + const int index, const int distance, + int len_limit ) + { + const uint8_t * const data = mb->buffer + mb->pos + index; + int i = 0; + if( index + len_limit > Mb_available_bytes( mb ) ) + len_limit = Mb_available_bytes( mb ) - index; + while( i < len_limit && data[i-distance] == data[i] ) ++i; + return i; + } + +static inline bool Mb_move_pos( struct Matchfinder_base * const mb ) + { + if( ++mb->cyclic_pos > mb->dictionary_size ) mb->cyclic_pos = 0; + if( ++mb->pos >= mb->pos_limit ) return Mb_normalize_pos( mb ); + return true; + } + + +struct Range_encoder + { + struct Circular_buffer cb; + int min_free_bytes; + uint64_t low; + unsigned long long partial_member_pos; + uint32_t range; + unsigned ff_count; + uint8_t cache; + File_header header; + }; + +static inline void Re_shift_low( struct Range_encoder * const renc ) + { + const bool carry = ( renc->low > 0xFFFFFFFFU ); + if( carry || renc->low < 0xFF000000U ) + { + Cb_put_byte( &renc->cb, renc->cache + carry ); + for( ; renc->ff_count > 0; --renc->ff_count ) + Cb_put_byte( &renc->cb, 0xFF + carry ); + renc->cache = renc->low >> 24; + } + else ++renc->ff_count; + renc->low = ( renc->low & 0x00FFFFFFU ) << 8; + } + +static inline void Re_reset( struct Range_encoder * const renc ) + { + int i; + Cb_reset( &renc->cb ); + renc->low = 0; + renc->partial_member_pos = 0; + renc->range = 0xFFFFFFFFU; + renc->ff_count = 0; + renc->cache = 0; + for( i = 0; i < Fh_size; ++i ) + Cb_put_byte( &renc->cb, renc->header[i] ); + } + +static inline bool Re_init( struct Range_encoder * const renc, + const unsigned dictionary_size, + const int min_free_bytes ) + { + if( !Cb_init( &renc->cb, 65536 + min_free_bytes ) ) return false; + renc->min_free_bytes = min_free_bytes; + Fh_set_magic( renc->header ); + Fh_set_dictionary_size( renc->header, dictionary_size ); + Re_reset( renc ); + return true; + } + +static inline void Re_free( struct Range_encoder * const renc ) + { Cb_free( &renc->cb ); } + +static inline unsigned long long +Re_member_position( const struct Range_encoder * const renc ) + { return renc->partial_member_pos + Cb_used_bytes( &renc->cb ) + renc->ff_count; } + +static inline bool Re_enough_free_bytes( const struct Range_encoder * const renc ) + { return Cb_free_bytes( &renc->cb ) >= renc->min_free_bytes; } + +static inline int Re_read_data( struct Range_encoder * const renc, + uint8_t * const out_buffer, const int out_size ) + { + const int size = Cb_read_data( &renc->cb, out_buffer, out_size ); + if( size > 0 ) renc->partial_member_pos += size; + return size; + } + +static inline void Re_flush( struct Range_encoder * const renc ) + { + int i; for( i = 0; i < 5; ++i ) Re_shift_low( renc ); + renc->low = 0; + renc->range = 0xFFFFFFFFU; + renc->ff_count = 0; + renc->cache = 0; + } + +static inline void Re_encode( struct Range_encoder * const renc, + const int symbol, const int num_bits ) + { + int i; + for( i = num_bits - 1; i >= 0; --i ) + { + renc->range >>= 1; + if( (symbol >> i) & 1 ) renc->low += renc->range; + if( renc->range <= 0x00FFFFFFU ) + { renc->range <<= 8; Re_shift_low( renc ); } + } + } + +static inline void Re_encode_bit( struct Range_encoder * const renc, + Bit_model * const probability, const int bit ) + { + const uint32_t bound = ( renc->range >> bit_model_total_bits ) * *probability; + if( !bit ) + { + renc->range = bound; + *probability += (bit_model_total - *probability) >> bit_model_move_bits; + } + else + { + renc->low += bound; + renc->range -= bound; + *probability -= *probability >> bit_model_move_bits; + } + if( renc->range <= 0x00FFFFFFU ) + { renc->range <<= 8; Re_shift_low( renc ); } + } + +static inline void Re_encode_tree( struct Range_encoder * const renc, + Bit_model bm[], const int symbol, const int num_bits ) + { + int mask = ( 1 << ( num_bits - 1 ) ); + int model = 1; + int i; + for( i = num_bits; i > 0; --i, mask >>= 1 ) + { + const int bit = ( symbol & mask ); + Re_encode_bit( renc, &bm[model], bit ); + model <<= 1; + if( bit ) model |= 1; + } + } + +static inline void Re_encode_tree_reversed( struct Range_encoder * const renc, + Bit_model bm[], int symbol, const int num_bits ) + { + int model = 1; + int i; + for( i = num_bits; i > 0; --i ) + { + const int bit = symbol & 1; + Re_encode_bit( renc, &bm[model], bit ); + model = ( model << 1 ) | bit; + symbol >>= 1; + } + } + +static inline void Re_encode_matched( struct Range_encoder * const renc, + Bit_model bm[], int symbol, + int match_byte ) + { + int mask = 0x100; + symbol |= mask; + + do { + int match_bit, bit; + match_byte <<= 1; + match_bit = match_byte & mask; + symbol <<= 1; + bit = symbol & 0x100; + Re_encode_bit( renc, &bm[match_bit+(symbol>>9)+mask], bit ); + mask &= ~(match_byte ^ symbol); /* if( match_bit != bit ) mask = 0; */ + } + while( symbol < 0x10000 ); + } + +static inline void Re_encode_len( struct Range_encoder * const renc, + struct Len_model * const lm, + int symbol, const int pos_state ) + { + bool bit = ( ( symbol -= min_match_len ) >= len_low_symbols ); + Re_encode_bit( renc, &lm->choice1, bit ); + if( !bit ) + Re_encode_tree( renc, lm->bm_low[pos_state], symbol, len_low_bits ); + else + { + bit = ( symbol >= len_low_symbols + len_mid_symbols ); + Re_encode_bit( renc, &lm->choice2, bit ); + if( !bit ) + Re_encode_tree( renc, lm->bm_mid[pos_state], + symbol - len_low_symbols, len_mid_bits ); + else + Re_encode_tree( renc, lm->bm_high, + symbol - len_low_symbols - len_mid_symbols, len_high_bits ); + } + } + + +enum { max_marker_size = 16, + num_rep_distances = 4 }; /* must be 4 */ + +struct LZ_encoder_base + { + struct Matchfinder_base mb; + unsigned long long member_size_limit; + uint32_t crc; + + Bit_model bm_literal[1<<literal_context_bits][0x300]; + Bit_model bm_match[states][pos_states]; + Bit_model bm_rep[states]; + Bit_model bm_rep0[states]; + Bit_model bm_rep1[states]; + Bit_model bm_rep2[states]; + Bit_model bm_len[states][pos_states]; + Bit_model bm_dis_slot[len_states][1<<dis_slot_bits]; + Bit_model bm_dis[modeled_distances-end_dis_model]; + Bit_model bm_align[dis_align_size]; + struct Len_model match_len_model; + struct Len_model rep_len_model; + struct Range_encoder renc; + int reps[num_rep_distances]; + State state; + bool member_finished; + }; + +static void LZeb_reset( struct LZ_encoder_base * const eb, + const unsigned long long member_size ); + +static inline bool LZeb_init( struct LZ_encoder_base * const eb, + const int before, const int dict_size, + const int after_size, const int dict_factor, + const int num_prev_positions23, + const int pos_array_factor, + const int min_free_bytes, + const unsigned long long member_size ) + { + if( !Mb_init( &eb->mb, before, dict_size, after_size, dict_factor, + num_prev_positions23, pos_array_factor ) ) return false; + if( !Re_init( &eb->renc, eb->mb.dictionary_size, min_free_bytes ) ) + return false; + LZeb_reset( eb, member_size ); + return true; + } + +static inline bool LZeb_member_finished( const struct LZ_encoder_base * const eb ) + { return ( eb->member_finished && !Cb_used_bytes( &eb->renc.cb ) ); } + +static inline void LZeb_free( struct LZ_encoder_base * const eb ) + { Re_free( &eb->renc ); Mb_free( &eb->mb ); } + +static inline unsigned LZeb_crc( const struct LZ_encoder_base * const eb ) + { return eb->crc ^ 0xFFFFFFFFU; } + +static inline int LZeb_price_literal( const struct LZ_encoder_base * const eb, + uint8_t prev_byte, uint8_t symbol ) + { return price_symbol( eb->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); } + +static inline int LZeb_price_matched( const struct LZ_encoder_base * const eb, + uint8_t prev_byte, uint8_t symbol, + uint8_t match_byte ) + { return price_matched( eb->bm_literal[get_lit_state(prev_byte)], symbol, + match_byte ); } + +static inline void LZeb_encode_literal( struct LZ_encoder_base * const eb, + uint8_t prev_byte, uint8_t symbol ) + { Re_encode_tree( &eb->renc, + eb->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); } + +static inline void LZeb_encode_matched( struct LZ_encoder_base * const eb, + uint8_t prev_byte, uint8_t symbol, + uint8_t match_byte ) + { Re_encode_matched( &eb->renc, eb->bm_literal[get_lit_state(prev_byte)], + symbol, match_byte ); } + +static inline void LZeb_encode_pair( struct LZ_encoder_base * const eb, + const unsigned dis, const int len, + const int pos_state ) + { + const int dis_slot = get_slot( dis ); + Re_encode_len( &eb->renc, &eb->match_len_model, len, pos_state ); + Re_encode_tree( &eb->renc, eb->bm_dis_slot[get_len_state(len)], dis_slot, + dis_slot_bits ); + + if( dis_slot >= start_dis_model ) + { + const int direct_bits = ( dis_slot >> 1 ) - 1; + const unsigned base = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + const unsigned direct_dis = dis - base; + + if( dis_slot < end_dis_model ) + Re_encode_tree_reversed( &eb->renc, eb->bm_dis + base - dis_slot - 1, + direct_dis, direct_bits ); + else + { + Re_encode( &eb->renc, direct_dis >> dis_align_bits, + direct_bits - dis_align_bits ); + Re_encode_tree_reversed( &eb->renc, eb->bm_align, direct_dis, dis_align_bits ); + } + } + } diff --git a/fast_encoder.c b/fast_encoder.c new file mode 100644 index 0000000..6172b1a --- /dev/null +++ b/fast_encoder.c @@ -0,0 +1,203 @@ +/* Lzlib - Compression library for the lzip format + Copyright (C) 2009-2015 Antonio Diaz Diaz. + + This library is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this library. If not, see <http://www.gnu.org/licenses/>. + + As a special exception, you may use this file as part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this + file, or you compile this file and link it with other files to + produce an executable, this file does not by itself cause the + resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. +*/ + +int FLZe_longest_match_len( struct FLZ_encoder * const fe, int * const distance ) + { + enum { len_limit = 16 }; + const uint8_t * const data = Mb_ptr_to_current_pos( &fe->eb.mb ); + int32_t * ptr0 = fe->eb.mb.pos_array + fe->eb.mb.cyclic_pos; + int32_t * newptr; + const int pos1 = fe->eb.mb.pos + 1; + int maxlen = 0; + int count, delta, newpos; + if( len_limit > Mb_available_bytes( &fe->eb.mb ) ) { *ptr0 = 0; return 0; } + + fe->key4 = ( ( fe->key4 << 4 ) ^ data[3] ) & fe->eb.mb.key4_mask; + newpos = fe->eb.mb.prev_positions[fe->key4]; + fe->eb.mb.prev_positions[fe->key4] = pos1; + + + for( count = 4; ; ) + { + if( --count < 0 || newpos <= 0 ) { *ptr0 = 0; break; } + delta = pos1 - newpos; + if( delta > fe->eb.mb.dictionary_size ) { *ptr0 = 0; break; } + newptr = fe->eb.mb.pos_array + + ( fe->eb.mb.cyclic_pos - delta + + ( ( fe->eb.mb.cyclic_pos >= delta ) ? 0 : fe->eb.mb.dictionary_size + 1 ) ); + + if( data[maxlen-delta] == data[maxlen] ) + { + int len = 0; + while( len < len_limit && data[len-delta] == data[len] ) ++len; + if( maxlen < len ) { maxlen = len; *distance = delta - 1; } + } + + if( maxlen < len_limit ) + { + *ptr0 = newpos; + ptr0 = newptr; + newpos = *ptr0; + } + else + { + *ptr0 = *newptr; + maxlen += Mb_true_match_len( &fe->eb.mb, maxlen, *distance + 1, + max_match_len - maxlen ); + break; + } + } + return maxlen; + } + + +bool FLZe_encode_member( struct FLZ_encoder * const fe ) + { + int rep = 0, i; + State * const state = &fe->eb.state; + + if( fe->eb.member_finished ) return true; + if( Re_member_position( &fe->eb.renc ) >= fe->eb.member_size_limit ) + { + if( LZeb_full_flush( &fe->eb ) ) fe->eb.member_finished = true; + return true; + } + + if( Mb_data_position( &fe->eb.mb ) == 0 && + !Mb_data_finished( &fe->eb.mb ) ) /* encode first byte */ + { + const uint8_t prev_byte = 0; + uint8_t cur_byte; + if( !Mb_enough_available_bytes( &fe->eb.mb ) || + !Re_enough_free_bytes( &fe->eb.renc ) ) return true; + cur_byte = Mb_peek( &fe->eb.mb, 0 ); + Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[*state][0], 0 ); + LZeb_encode_literal( &fe->eb, prev_byte, cur_byte ); + CRC32_update_byte( &fe->eb.crc, cur_byte ); + FLZe_reset_key4( fe ); + if( !FLZe_update_and_move( fe, 1 ) ) return false; + } + + while( !Mb_data_finished( &fe->eb.mb ) && + Re_member_position( &fe->eb.renc ) < fe->eb.member_size_limit ) + { + int match_distance; + int main_len, pos_state, len; + if( !Mb_enough_available_bytes( &fe->eb.mb ) || + !Re_enough_free_bytes( &fe->eb.renc ) ) return true; + main_len = FLZe_longest_match_len( fe, &match_distance ); + pos_state = Mb_data_position( &fe->eb.mb ) & pos_state_mask; + len = 0; + + for( i = 0; i < num_rep_distances; ++i ) + { + const int tlen = Mb_true_match_len( &fe->eb.mb, 0, + fe->eb.reps[i] + 1, max_match_len ); + if( tlen > len ) { len = tlen; rep = i; } + } + if( len > min_match_len && len + 3 > main_len ) + { + CRC32_update_buf( &fe->eb.crc, Mb_ptr_to_current_pos( &fe->eb.mb ), len ); + Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[*state][pos_state], 1 ); + Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep[*state], 1 ); + Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep0[*state], rep != 0 ); + if( rep == 0 ) + Re_encode_bit( &fe->eb.renc, &fe->eb.bm_len[*state][pos_state], 1 ); + else + { + int distance; + Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep1[*state], rep > 1 ); + if( rep > 1 ) + Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep2[*state], rep > 2 ); + distance = fe->eb.reps[rep]; + for( i = rep; i > 0; --i ) fe->eb.reps[i] = fe->eb.reps[i-1]; + fe->eb.reps[0] = distance; + } + *state = St_set_rep( *state ); + Re_encode_len( &fe->eb.renc, &fe->eb.rep_len_model, len, pos_state ); + if( !Mb_move_pos( &fe->eb.mb ) ) return false; + if( !FLZe_update_and_move( fe, len - 1 ) ) return false; + continue; + } + + if( main_len > min_match_len ) + { + CRC32_update_buf( &fe->eb.crc, Mb_ptr_to_current_pos( &fe->eb.mb ), main_len ); + Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[*state][pos_state], 1 ); + Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep[*state], 0 ); + *state = St_set_match( *state ); + for( i = num_rep_distances - 1; i > 0; --i ) fe->eb.reps[i] = fe->eb.reps[i-1]; + fe->eb.reps[0] = match_distance; + LZeb_encode_pair( &fe->eb, match_distance, main_len, pos_state ); + if( !Mb_move_pos( &fe->eb.mb ) ) return false; + if( !FLZe_update_and_move( fe, main_len - 1 ) ) return false; + continue; + } + + { + const uint8_t prev_byte = Mb_peek( &fe->eb.mb, 1 ); + const uint8_t cur_byte = Mb_peek( &fe->eb.mb, 0 ); + const uint8_t match_byte = Mb_peek( &fe->eb.mb, fe->eb.reps[0] + 1 ); + if( !Mb_move_pos( &fe->eb.mb ) ) return false; + CRC32_update_byte( &fe->eb.crc, cur_byte ); + + if( match_byte == cur_byte ) + { + int price = price0( fe->eb.bm_match[*state][pos_state] ); + int short_rep_price; + if( St_is_char( *state ) ) + price += LZeb_price_literal( &fe->eb, prev_byte, cur_byte ); + else + price += LZeb_price_matched( &fe->eb, prev_byte, cur_byte, match_byte ); + short_rep_price = price1( fe->eb.bm_match[*state][pos_state] ) + + price1( fe->eb.bm_rep[*state] ) + + price0( fe->eb.bm_rep0[*state] ) + + price0( fe->eb.bm_len[*state][pos_state] ); + if( short_rep_price < price ) + { + Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[*state][pos_state], 1 ); + Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep[*state], 1 ); + Re_encode_bit( &fe->eb.renc, &fe->eb.bm_rep0[*state], 0 ); + Re_encode_bit( &fe->eb.renc, &fe->eb.bm_len[*state][pos_state], 0 ); + *state = St_set_short_rep( *state ); + continue; + } + } + + /* literal byte */ + Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[*state][pos_state], 0 ); + if( St_is_char( *state ) ) + LZeb_encode_literal( &fe->eb, prev_byte, cur_byte ); + else + LZeb_encode_matched( &fe->eb, prev_byte, cur_byte, match_byte ); + *state = St_set_char( *state ); + } + } + + if( LZeb_full_flush( &fe->eb ) ) fe->eb.member_finished = true; + return true; + } diff --git a/fast_encoder.h b/fast_encoder.h new file mode 100644 index 0000000..7265739 --- /dev/null +++ b/fast_encoder.h @@ -0,0 +1,82 @@ +/* Lzlib - Compression library for the lzip format + Copyright (C) 2009-2015 Antonio Diaz Diaz. + + This library is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this library. If not, see <http://www.gnu.org/licenses/>. + + As a special exception, you may use this file as part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this + file, or you compile this file and link it with other files to + produce an executable, this file does not by itself cause the + resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. +*/ + +struct FLZ_encoder + { + struct LZ_encoder_base eb; + int key4; /* key made from latest 4 bytes */ + }; + +static inline void FLZe_reset_key4( struct FLZ_encoder * const fe ) + { + int i; + fe->key4 = 0; + for( i = 0; i < 3 && i < Mb_available_bytes( &fe->eb.mb ); ++i ) + fe->key4 = ( fe->key4 << 4 ) ^ fe->eb.mb.buffer[i]; + } + +int FLZe_longest_match_len( struct FLZ_encoder * const fe, int * const distance ); + +static inline bool FLZe_update_and_move( struct FLZ_encoder * const fe, int n ) + { + while( --n >= 0 ) + { + if( Mb_available_bytes( &fe->eb.mb ) >= 4 ) + { + int newpos; + fe->key4 = ( ( fe->key4 << 4 ) ^ fe->eb.mb.buffer[fe->eb.mb.pos+3] ) & + fe->eb.mb.key4_mask; + newpos = fe->eb.mb.prev_positions[fe->key4]; + fe->eb.mb.prev_positions[fe->key4] = fe->eb.mb.pos + 1; + fe->eb.mb.pos_array[fe->eb.mb.cyclic_pos] = newpos; + } + else fe->eb.mb.pos_array[fe->eb.mb.cyclic_pos] = 0; + if( !Mb_move_pos( &fe->eb.mb ) ) return false; + } + return true; + } + +static inline bool FLZe_init( struct FLZ_encoder * const fe, + const unsigned long long member_size ) + { + enum { before = 0, + dict_size = 65536, + /* bytes to keep in buffer after pos */ + after_size = max_match_len, + dict_factor = 16, + num_prev_positions23 = 0, + pos_array_factor = 1, + min_free_bytes = max_marker_size }; + + return LZeb_init( &fe->eb, before, dict_size, after_size, dict_factor, + num_prev_positions23, pos_array_factor, min_free_bytes, + member_size ); + } + +static inline void FLZe_reset( struct FLZ_encoder * const fe, + const unsigned long long member_size ) + { LZeb_reset( &fe->eb, member_size ); } @@ -1,5 +1,5 @@ /* Lzcheck - Test program for the lzlib library - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute and modify it. @@ -34,30 +34,14 @@ uint8_t mid_buffer[buffer_size]; uint8_t out_buffer[buffer_size]; -int main( const int argc, const char * const argv[] ) +int lzcheck( FILE * const file, const int dictionary_size ) { - const int dictionary_size = 1 << 20; const int match_len_limit = 16; const unsigned long long member_size = 0x7FFFFFFFFFFFFFFFULL; /* INT64_MAX */ struct LZ_Encoder * encoder; struct LZ_Decoder * decoder; - FILE * file; int retval = 0; - if( argc < 2 ) - { - fprintf( stderr, "Usage: lzcheck filename.txt\n" ); - return 1; - } - - file = fopen( argv[1], "rb" ); - if( !file ) - { - fprintf( stderr, "lzcheck: Can't open file '%s' for reading\n", argv[1] ); - return 1; - } -/* fprintf( stderr, "lzcheck: Testing file '%s'\n", argv[1] ); */ - encoder = LZ_compress_open( dictionary_size, match_len_limit, member_size ); if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) { @@ -224,6 +208,32 @@ int main( const int argc, const char * const argv[] ) LZ_decompress_close( decoder ); LZ_compress_close( encoder ); + return retval; + } + + +int main( const int argc, const char * const argv[] ) + { + FILE * file; + int retval; + + if( argc < 2 ) + { + fprintf( stderr, "Usage: lzcheck filename.txt\n" ); + return 1; + } + + file = fopen( argv[1], "rb" ); + if( !file ) + { + fprintf( stderr, "lzcheck: Can't open file '%s' for reading.\n", argv[1] ); + return 1; + } +/* fprintf( stderr, "lzcheck: Testing file '%s'\n", argv[1] ); */ + + retval = lzcheck( file, 65535 ); + if( retval == 0 ) + { rewind( file ); retval = lzcheck( file, 1 << 20 ); } fclose( file ); return retval; } @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,33 +35,33 @@ #include "cbuffer.c" #include "decoder.h" #include "decoder.c" +#include "encoder_base.h" +#include "encoder_base.c" #include "encoder.h" #include "encoder.c" +#include "fast_encoder.h" +#include "fast_encoder.c" struct LZ_Encoder { unsigned long long partial_in_size; unsigned long long partial_out_size; - struct Matchfinder * matchfinder; - struct LZ_encoder * lz_encoder; + struct LZ_encoder_base * lz_encoder_base; /* these 3 pointers make a */ + struct LZ_encoder * lz_encoder; /* polymorphic encoder */ + struct FLZ_encoder * flz_encoder; enum LZ_Errno lz_errno; - int flush_pending; - File_header member_header; bool fatal; }; -static void LZ_Encoder_init( struct LZ_Encoder * const e, - const File_header header ) +static void LZ_Encoder_init( struct LZ_Encoder * const e ) { - int i; e->partial_in_size = 0; e->partial_out_size = 0; - e->matchfinder = 0; + e->lz_encoder_base = 0; e->lz_encoder = 0; + e->flz_encoder = 0; e->lz_errno = LZ_ok; - e->flush_pending = 0; - for( i = 0; i < Fh_size; ++i ) e->member_header[i] = header[i]; e->fatal = false; } @@ -95,7 +95,8 @@ static void LZ_Decoder_init( struct LZ_Decoder * const d ) static bool verify_encoder( struct LZ_Encoder * const e ) { if( !e ) return false; - if( !e->matchfinder || !e->lz_encoder ) + if( !e->lz_encoder_base || ( !e->lz_encoder && !e->flz_encoder ) || + ( e->lz_encoder && e->flz_encoder ) ) { e->lz_errno = LZ_bad_argument; return false; } return true; } @@ -147,35 +148,31 @@ struct LZ_Encoder * LZ_compress_open( const int dictionary_size, const unsigned long long member_size ) { File_header header; - const bool error = ( !Fh_set_dictionary_size( header, dictionary_size ) || - match_len_limit < min_match_len_limit || - match_len_limit > max_match_len || - member_size < min_dictionary_size ); - struct LZ_Encoder * const e = (struct LZ_Encoder *)malloc( sizeof (struct LZ_Encoder) ); if( !e ) return 0; - Fh_set_magic( header ); - LZ_Encoder_init( e, header ); - if( error ) e->lz_errno = LZ_bad_argument; + LZ_Encoder_init( e ); + if( !Fh_set_dictionary_size( header, dictionary_size ) || + match_len_limit < min_match_len_limit || + match_len_limit > max_match_len || + member_size < min_dictionary_size ) + e->lz_errno = LZ_bad_argument; else { - e->matchfinder = (struct Matchfinder *)malloc( sizeof (struct Matchfinder) ); - if( e->matchfinder ) + if( dictionary_size == 65535 && match_len_limit == 16 ) + { + e->flz_encoder = (struct FLZ_encoder *)malloc( sizeof (struct FLZ_encoder) ); + if( e->flz_encoder && FLZe_init( e->flz_encoder, member_size ) ) + { e->lz_encoder_base = &e->flz_encoder->eb; return e; } + free( e->flz_encoder ); e->flz_encoder = 0; + } + else { e->lz_encoder = (struct LZ_encoder *)malloc( sizeof (struct LZ_encoder) ); - if( e->lz_encoder ) - { - if( Mf_init( e->matchfinder, - Fh_get_dictionary_size( header ), match_len_limit ) ) - { - if( LZe_init( e->lz_encoder, e->matchfinder, header, member_size ) ) - return e; - Mf_free( e->matchfinder ); - } - free( e->lz_encoder ); e->lz_encoder = 0; - } - free( e->matchfinder ); e->matchfinder = 0; + if( e->lz_encoder && LZe_init( e->lz_encoder, Fh_get_dictionary_size( header ), + match_len_limit, member_size ) ) + { e->lz_encoder_base = &e->lz_encoder->eb; return e; } + free( e->lz_encoder ); e->lz_encoder = 0; } e->lz_errno = LZ_mem_error; } @@ -187,10 +184,9 @@ struct LZ_Encoder * LZ_compress_open( const int dictionary_size, int LZ_compress_close( struct LZ_Encoder * const e ) { if( !e ) return -1; - if( e->lz_encoder ) - { LZe_free( e->lz_encoder ); free( e->lz_encoder ); } - if( e->matchfinder ) - { Mf_free( e->matchfinder ); free( e->matchfinder ); } + if( e->lz_encoder_base ) + { LZeb_free( e->lz_encoder_base ); + free( e->lz_encoder ); free( e->flz_encoder ); } free( e ); return 0; } @@ -199,13 +195,17 @@ int LZ_compress_close( struct LZ_Encoder * const e ) int LZ_compress_finish( struct LZ_Encoder * const e ) { if( !verify_encoder( e ) || e->fatal ) return -1; - Mf_finish( e->matchfinder ); - e->flush_pending = 0; + Mb_finish( &e->lz_encoder_base->mb ); /* if (open --> write --> finish) use same dictionary size as lzip. */ /* this does not save any memory. */ - if( Mf_data_position( e->matchfinder ) == 0 && + if( Mb_data_position( &e->lz_encoder_base->mb ) == 0 && LZ_compress_total_out_size( e ) == Fh_size ) - Mf_adjust_distionary_size( e->matchfinder ); + { + Mb_adjust_distionary_size( &e->lz_encoder_base->mb ); + Fh_set_dictionary_size( e->lz_encoder_base->renc.header, + e->lz_encoder_base->mb.dictionary_size ); + e->lz_encoder_base->renc.cb.buffer[5] = e->lz_encoder_base->renc.header[5]; + } return 0; } @@ -214,22 +214,16 @@ int LZ_compress_restart_member( struct LZ_Encoder * const e, const unsigned long long member_size ) { if( !verify_encoder( e ) || e->fatal ) return -1; - if( !LZe_member_finished( e->lz_encoder ) ) + if( !LZeb_member_finished( e->lz_encoder_base ) ) { e->lz_errno = LZ_sequence_error; return -1; } if( member_size < min_dictionary_size ) { e->lz_errno = LZ_bad_argument; return -1; } - e->partial_in_size += Mf_data_position( e->matchfinder ); - e->partial_out_size += Re_member_position( &e->lz_encoder->renc ); - Mf_reset( e->matchfinder ); + e->partial_in_size += Mb_data_position( &e->lz_encoder_base->mb ); + e->partial_out_size += Re_member_position( &e->lz_encoder_base->renc ); - LZe_free( e->lz_encoder ); - if( !LZe_init( e->lz_encoder, e->matchfinder, e->member_header, member_size ) ) - { - LZe_free( e->lz_encoder ); free( e->lz_encoder ); e->lz_encoder = 0; - e->lz_errno = LZ_mem_error; e->fatal = true; - return -1; - } + if( e->lz_encoder ) LZe_reset( e->lz_encoder, member_size ); + else FLZe_reset( e->flz_encoder, member_size ); e->lz_errno = LZ_ok; return 0; } @@ -238,15 +232,8 @@ int LZ_compress_restart_member( struct LZ_Encoder * const e, int LZ_compress_sync_flush( struct LZ_Encoder * const e ) { if( !verify_encoder( e ) || e->fatal ) return -1; - if( e->flush_pending <= 0 && !Mf_flushing_or_end( e->matchfinder ) ) - { - e->flush_pending = 2; /* 2 consecutive markers guarantee decoding */ - e->matchfinder->flushing = true; - if( !LZe_encode_member( e->lz_encoder ) ) - { e->lz_errno = LZ_library_error; e->fatal = true; return -1; } - while( e->flush_pending > 0 && LZe_sync_flush( e->lz_encoder ) ) - { if( --e->flush_pending <= 0 ) e->matchfinder->flushing = false; } - } + if( !Mb_flushing_or_end( &e->lz_encoder_base->mb ) ) + e->lz_encoder_base->mb.flushing = true; return 0; } @@ -254,12 +241,23 @@ int LZ_compress_sync_flush( struct LZ_Encoder * const e ) int LZ_compress_read( struct LZ_Encoder * const e, uint8_t * const buffer, const int size ) { + int out_size = 0; if( !verify_encoder( e ) || e->fatal ) return -1; - if( !LZe_encode_member( e->lz_encoder ) ) - { e->lz_errno = LZ_library_error; e->fatal = true; return -1; } - while( e->flush_pending > 0 && LZe_sync_flush( e->lz_encoder ) ) - { if( --e->flush_pending <= 0 ) e->matchfinder->flushing = false; } - return Re_read_data( &e->lz_encoder->renc, buffer, size ); + do { + if( ( e->flz_encoder && !FLZe_encode_member( e->flz_encoder ) ) || + ( e->lz_encoder && !LZe_encode_member( e->lz_encoder ) ) ) + { e->lz_errno = LZ_library_error; e->fatal = true; return -1; } + if( e->lz_encoder_base->mb.flushing && + Mb_available_bytes( &e->lz_encoder_base->mb ) <= 0 && + LZeb_sync_flush( e->lz_encoder_base ) ) + e->lz_encoder_base->mb.flushing = false; + out_size += Re_read_data( &e->lz_encoder_base->renc, + buffer + out_size, size - out_size ); + } + while( e->lz_encoder_base->mb.flushing && out_size < size && + Mb_enough_available_bytes( &e->lz_encoder_base->mb ) && + Re_enough_free_bytes( &e->lz_encoder_base->renc ) ); + return out_size; } @@ -267,16 +265,14 @@ int LZ_compress_write( struct LZ_Encoder * const e, const uint8_t * const buffer, const int size ) { if( !verify_encoder( e ) || e->fatal ) return -1; - if( e->flush_pending > 0 || size < 0 ) return 0; - return Mf_write_data( e->matchfinder, buffer, size ); + return Mb_write_data( &e->lz_encoder_base->mb, buffer, size ); } int LZ_compress_write_size( struct LZ_Encoder * const e ) { if( !verify_encoder( e ) || e->fatal ) return -1; - if( e->flush_pending > 0 ) return 0; - return Mf_free_bytes( e->matchfinder ); + return Mb_free_bytes( &e->lz_encoder_base->mb ); } @@ -290,43 +286,43 @@ enum LZ_Errno LZ_compress_errno( struct LZ_Encoder * const e ) int LZ_compress_finished( struct LZ_Encoder * const e ) { if( !verify_encoder( e ) ) return -1; - return ( e->flush_pending <= 0 && Mf_finished( e->matchfinder ) && - LZe_member_finished( e->lz_encoder ) ); + return ( Mb_data_finished( &e->lz_encoder_base->mb ) && + LZeb_member_finished( e->lz_encoder_base ) ); } int LZ_compress_member_finished( struct LZ_Encoder * const e ) { if( !verify_encoder( e ) ) return -1; - return LZe_member_finished( e->lz_encoder ); + return LZeb_member_finished( e->lz_encoder_base ); } unsigned long long LZ_compress_data_position( struct LZ_Encoder * const e ) { if( !verify_encoder( e ) ) return 0; - return Mf_data_position( e->matchfinder ); + return Mb_data_position( &e->lz_encoder_base->mb ); } unsigned long long LZ_compress_member_position( struct LZ_Encoder * const e ) { if( !verify_encoder( e ) ) return 0; - return Re_member_position( &e->lz_encoder->renc ); + return Re_member_position( &e->lz_encoder_base->renc ); } unsigned long long LZ_compress_total_in_size( struct LZ_Encoder * const e ) { if( !verify_encoder( e ) ) return 0; - return e->partial_in_size + Mf_data_position( e->matchfinder ); + return e->partial_in_size + Mb_data_position( &e->lz_encoder_base->mb ); } unsigned long long LZ_compress_total_out_size( struct LZ_Encoder * const e ) { if( !verify_encoder( e ) ) return 0; - return e->partial_out_size + Re_member_position( &e->lz_encoder->renc ); + return e->partial_out_size + Re_member_position( &e->lz_encoder_base->renc ); } @@ -1,5 +1,5 @@ /* Lzlib - Compression library for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,7 +29,7 @@ extern "C" { #endif -static const char * const LZ_version_string = "1.6"; +static const char * const LZ_version_string = "1.7-pre1"; enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error, LZ_sequence_error, LZ_header_error, LZ_unexpected_eof, @@ -1,5 +1,5 @@ /* Minilzip - Test program for the lzlib library - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -76,7 +76,7 @@ void internal_error( const char * const msg ); const char * const Program_name = "Minilzip"; const char * const program_name = "minilzip"; -const char * const program_year = "2014"; +const char * const program_year = "2015"; const char * invocation_name = 0; struct { const char * from; const char * to; } const known_extensions[] = { @@ -179,8 +179,8 @@ static void show_help( void ) " -S, --volume-size=<bytes> set volume size limit in bytes\n" " -t, --test test compressed file integrity\n" " -v, --verbose be verbose (a 2nd -v gives more)\n" - " -1 .. -9 set compression level [default 6]\n" - " --fast alias for -1\n" + " -0 .. -9 set compression level [default 6]\n" + " --fast alias for -0\n" " --best alias for -9\n" "If no file names are given, minilzip compresses or decompresses\n" "from standard input to standard output.\n" @@ -189,8 +189,7 @@ static void show_help( void ) "The bidimensional parameter space of LZMA can't be mapped to a linear\n" "scale optimal for all files. If your files are large, very repetitive,\n" "etc, you may need to use the --match-length and --dictionary-size\n" - "options directly to achieve optimal performance. For example, -9m64\n" - "usually compresses executables more (and faster) than -9.\n" + "options directly to achieve optimal performance.\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" "invalid input file, 3 for an internal consistency error (eg, bug) which\n" @@ -213,18 +212,21 @@ static void show_version( void ) static void show_header( const unsigned dictionary_size ) { - const char * const prefix[8] = - { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; - enum { factor = 1024 }; - const char * p = ""; - const char * np = " "; - unsigned num = dictionary_size, i; - bool exact = ( num % factor == 0 ); - - for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) - { num /= factor; if( num % factor != 0 ) exact = false; - p = prefix[i]; np = ""; } - fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p ); + if( verbosity >= 3 ) + { + const char * const prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + enum { factor = 1024 }; + const char * p = ""; + const char * np = " "; + unsigned num = dictionary_size, i; + bool exact = ( num % factor == 0 ); + + for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; + p = prefix[i]; np = ""; } + fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p ); + } } @@ -301,8 +303,10 @@ static int extension_index( const char * const name ) for( i = 0; known_extensions[i].from; ++i ) { const char * const ext = known_extensions[i].from; - if( strlen( name ) > strlen( ext ) && - strncmp( name + strlen( name ) - strlen( ext ), ext, strlen( ext ) ) == 0 ) + const unsigned name_len = strlen( name ); + const unsigned ext_len = strlen( ext ); + if( name_len > ext_len && + strncmp( name + name_len - ext_len, ext, ext_len ) == 0 ) return i; } return -1; @@ -379,20 +383,21 @@ static void set_c_outname( const char * const name, const bool multifile ) static void set_d_outname( const char * const name, const int i ) { + const unsigned name_len = strlen( name ); if( i >= 0 ) { const char * const from = known_extensions[i].from; - if( strlen( name ) > strlen( from ) ) + const unsigned from_len = strlen( from ); + if( name_len > from_len ) { - output_filename = resize_buffer( output_filename, strlen( name ) + + output_filename = resize_buffer( output_filename, name_len + strlen( known_extensions[0].to ) + 1 ); strcpy( output_filename, name ); - strcpy( output_filename + strlen( name ) - strlen( from ), - known_extensions[i].to ); + strcpy( output_filename + name_len - from_len, known_extensions[i].to ); return; } } - output_filename = resize_buffer( output_filename, strlen( name ) + 4 + 1 ); + output_filename = resize_buffer( output_filename, name_len + 4 + 1 ); strcpy( output_filename, name ); strcat( output_filename, ".out" ); if( verbosity >= 1 ) @@ -422,7 +427,7 @@ static bool open_outstream( const bool force ) static bool check_tty( const int infd, const enum Mode program_mode ) { - if( program_mode == m_compress && outfd >= 0 && isatty( outfd ) ) + if( program_mode == m_compress && isatty( outfd ) ) { show_error( "I won't write compressed data to a terminal.", 0, true ); return false; @@ -522,11 +527,11 @@ static int writeblock( const int fd, const uint8_t * const buf, const int size ) static bool next_filename( void ) { - const unsigned len = strlen( known_extensions[0].from ); + const unsigned name_len = strlen( output_filename ); + const unsigned ext_len = strlen( known_extensions[0].from ); int i, j; - - if( strlen( output_filename ) >= len + 5 ) /* "*00001.lz" */ - for( i = strlen( output_filename ) - len - 1, j = 0; j < 5; --i, ++j ) + if( name_len >= ext_len + 5 ) /* "*00001.lz" */ + for( i = name_len - ext_len - 1, j = 0; j < 5; --i, ++j ) { if( output_filename[i] < '9' ) { ++output_filename[i]; return true; } else output_filename[i] = '0'; @@ -635,11 +640,11 @@ static int do_compress( struct LZ_Encoder * const encoder, } -int compress( const unsigned long long member_size, - const unsigned long long volume_size, - const struct Lzma_options * const encoder_options, - const int infd, struct Pretty_print * const pp, - const struct stat * const in_statsp ) +static int compress( const unsigned long long member_size, + const unsigned long long volume_size, const int infd, + const struct Lzma_options * const encoder_options, + struct Pretty_print * const pp, + const struct stat * const in_statsp ) { struct LZ_Encoder * const encoder = LZ_compress_open( encoder_options->dictionary_size, @@ -662,8 +667,8 @@ int compress( const unsigned long long member_size, } -int do_decompress( struct LZ_Decoder * const decoder, const int infd, - struct Pretty_print * const pp, const bool testing ) +static int do_decompress( struct LZ_Decoder * const decoder, const int infd, + struct Pretty_print * const pp, const bool testing ) { enum { buffer_size = 65536 }; uint8_t buffer[buffer_size]; @@ -710,8 +715,7 @@ int do_decompress( struct LZ_Decoder * const decoder, const int infd, const unsigned long long data_position = LZ_decompress_data_position( decoder ); const unsigned long long member_size = LZ_decompress_member_position( decoder ); Pp_show_msg( pp, 0 ); - if( verbosity >= 3 ) - show_header( LZ_decompress_dictionary_size( decoder ) ); + show_header( LZ_decompress_dictionary_size( decoder ) ); if( verbosity >= 2 && data_position > 0 && member_size > 0 ) fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", (double)data_position / member_size, @@ -759,8 +763,8 @@ int do_decompress( struct LZ_Decoder * const decoder, const int infd, } -int decompress( const int infd, struct Pretty_print * const pp, - const bool testing ) +static int decompress( const int infd, struct Pretty_print * const pp, + const bool testing ) { struct LZ_Decoder * const decoder = LZ_decompress_open(); int retval; @@ -821,7 +825,7 @@ int main( const int argc, const char * const argv[] ) to the corresponding LZMA compression modes. */ const struct Lzma_options option_mapping[] = { - { 1 << 20, 5 }, /* -0 */ + { 65535, 16 }, /* -0 */ { 1 << 20, 5 }, /* -1 */ { 3 << 19, 6 }, /* -2 */ { 1 << 21, 8 }, /* -3 */ @@ -1022,8 +1026,8 @@ int main( const int argc, const char * const argv[] ) in_statsp = input_filename[0] ? &in_stats : 0; Pp_set_name( &pp, input_filename ); if( program_mode == m_compress ) - tmp = compress( member_size, volume_size, &encoder_options, infd, - &pp, in_statsp ); + tmp = compress( member_size, volume_size, infd, &encoder_options, &pp, + in_statsp ); else tmp = decompress( infd, &pp, program_mode == m_test ); if( tmp > retval ) retval = tmp; diff --git a/testsuite/check.sh b/testsuite/check.sh index df9b340..621c535 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lzlib - A compression library for lzip files -# Copyright (C) 2009-2014 Antonio Diaz Diaz. +# Copyright (C) 2009-2015 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. |