From 63df7a48df604cb3c9fd274713f6d65824ecaacd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 14:47:52 +0100 Subject: Adding upstream version 1.4~rc2. Signed-off-by: Daniel Baumann --- ChangeLog | 22 +- INSTALL | 11 +- Makefile.in | 38 +- NEWS | 26 +- README | 2 +- bbexample.c | 74 ++- carg_parser.c | 2 +- cbuffer.c | 134 ++++++ clzip.h | 148 ++++-- configure | 37 +- decoder.c | 623 ++---------------------- decoder.h | 451 ++++++++++++++++++ doc/lzlib.info | 104 ++-- doc/lzlib.texinfo | 64 +-- doc/minilzip.1 | 84 ++++ encoder.c | 1271 ++++++++++++++++--------------------------------- encoder.h | 808 +++++++++++++++++++++++++++++++ lzcheck.c | 55 +-- lzlib.c | 123 +++-- lzlib.h | 27 +- main.c | 413 ++++++++-------- tables.c | 395 --------------- testsuite/check.sh | 33 +- testsuite/test.txt.lz | Bin 0 -> 11518 bytes testsuite/test_v0.lz | Bin 11540 -> 0 bytes testsuite/test_v1.lz | Bin 11548 -> 0 bytes 26 files changed, 2571 insertions(+), 2374 deletions(-) create mode 100644 cbuffer.c create mode 100644 decoder.h create mode 100644 doc/minilzip.1 create mode 100644 encoder.h delete mode 100644 tables.c create mode 100644 testsuite/test.txt.lz delete mode 100644 testsuite/test_v0.lz delete mode 100644 testsuite/test_v1.lz diff --git a/ChangeLog b/ChangeLog index a60f1aa..a3d5a17 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +2013-02-07 Antonio Diaz Diaz + + * Version 1.4-rc2 released. + * lzlib.c (LZ_decompress_read): Tell LZ_header_error from + LZ_unexpected_eof the same way as lzip does. + +2013-01-18 Antonio Diaz Diaz + + * Version 1.4-rc1 released. + * Multi-step trials have been implemented. + * Compression ratio has been slightly increased. + * Compression time has been reduced by 8%. + * Decompression time has been reduced by 7%. + * lzlib.h: Changed 'long long' values to 'unsigned long long'. + * encoder.c (Mf_init): Reduce minimum buffer size to 64KiB. + * Makefile.in: Added new target 'install-as-lzip'. + * Makefile.in: Added new target 'install-bin'. + * main.c: Define 'strtoull' to 'strtoul' on Windows. + * main.c: Use 'setmode' instead of '_setmode' on Windows and OS/2. + 2012-02-29 Antonio Diaz Diaz * Version 1.3 released. @@ -111,7 +131,7 @@ * Version 0.1 released. -Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. +Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and diff --git a/INSTALL b/INSTALL index 00715be..7281bad 100644 --- a/INSTALL +++ b/INSTALL @@ -1,7 +1,7 @@ Requirements ------------ You will need a C compiler. -I use gcc 4.3.5 and 3.3.6, but the code should compile with any +I use gcc 4.7.2 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -32,6 +32,13 @@ the main archive. 5. Type 'make install' to install the library and any data files and documentation. (You might have to run ldconfig also). + You can install only the library, the info manual or the man page + typing 'make install-bin', 'make install-info' or 'make install-man' + respectively. + +5a. Type 'make install-as-lzip' to install the library and any data + files and documentation, and link minilzip to the name 'lzip'. + Another way ----------- @@ -50,7 +57,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. +Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Makefile.in b/Makefile.in index 983f9a9..02e3a46 100644 --- a/Makefile.in +++ b/Makefile.in @@ -11,8 +11,8 @@ SHELL = /bin/sh objs = carg_parser.o main.o -.PHONY : all install install-info install-man install-strip \ - uninstall uninstall-info uninstall-man \ +.PHONY : all install install-bin install-info install-man install-strip \ + install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \ doc info man check dist clean distclean all : $(progname) $(progname_shared) @@ -47,10 +47,12 @@ main.o : main.c lzlib_sh.o : lzlib.c $(CC) -fpic -fPIC $(CPPFLAGS) $(CFLAGS) -c -o $@ $< +lzdeps = lzlib.h clzip.h cbuffer.c decoder.h decoder.c encoder.h encoder.c + $(objs) : Makefile carg_parser.o : carg_parser.h -lzlib.o : Makefile lzlib.h clzip.h tables.c decoder.c encoder.c -lzlib_sh.o : Makefile lzlib.h clzip.h tables.c decoder.c encoder.c +lzlib.o : Makefile $(lzdeps) +lzlib_sh.o : Makefile $(lzdeps) main.o : carg_parser.h lzlib.h bbexample.o : Makefile lzlib.h lzcheck.o : Makefile lzlib.h @@ -75,7 +77,9 @@ Makefile : $(VPATH)/configure $(VPATH)/Makefile.in check : all bbexample lzcheck @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion) -install : all install-info +install : install-bin install-info + +install-bin : all if [ ! -d "$(DESTDIR)$(includedir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(includedir)" ; fi if [ ! -d "$(DESTDIR)$(libdir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(libdir)" ; fi $(INSTALL_DATA) $(VPATH)/$(libname)lib.h "$(DESTDIR)$(includedir)/$(libname)lib.h" @@ -83,9 +87,14 @@ install : all install-info if [ -n "$(progname_shared)" ] ; then \ $(INSTALL_PROGRAM) ./lib$(libname).so.$(pkgversion) "$(DESTDIR)$(libdir)/lib$(libname).so.$(pkgversion)" ; \ if [ -e "$(DESTDIR)$(libdir)/lib$(libname).so.$(soversion)" ] ; then \ - run_ldconfig=no ; rm -f "$(DESTDIR)$(libdir)/lib$(libname).so.$(soversion)" ; \ + rm -f "$(DESTDIR)$(libdir)/lib$(libname).so.$(soversion)" ; \ + run_ldconfig=no ; \ else run_ldconfig=yes ; \ fi ; \ + if [ -e "$(DESTDIR)$(libdir)/lib$(libname).so" ] ; then \ + rm -f "$(DESTDIR)$(libdir)/lib$(libname).so" ; \ + fi ; \ + cd "$(DESTDIR)$(libdir)" && ln -s lib$(libname).so.$(pkgversion) lib$(libname).so ; \ cd "$(DESTDIR)$(libdir)" && ln -s lib$(libname).so.$(pkgversion) lib$(libname).so.$(soversion) ; \ if [ $${run_ldconfig} = yes ] && [ -x "$(LDCONFIG)" ] ; then "$(LDCONFIG)" -n "$(DESTDIR)$(libdir)" ; fi ; \ fi @@ -93,7 +102,7 @@ install : all install-info install-info : if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info" - -install-info --info-dir="$(DESTDIR)$(infodir)" $(DESTDIR)$(infodir)/$(pkgname).info + -install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" install-man : if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi @@ -102,9 +111,19 @@ install-man : install-strip : all $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install -uninstall : uninstall-info uninstall-man +install-as-lzip : install install-man + if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi + $(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)" + -rm -f "$(DESTDIR)$(bindir)/lzip" + cd "$(DESTDIR)$(bindir)" && ln -s $(progname) lzip + +uninstall : uninstall-bin uninstall-info uninstall-man + +uninstall-bin : + -rm -f "$(DESTDIR)$(bindir)/$(progname)" -rm -f "$(DESTDIR)$(includedir)/$(libname)lib.h" -rm -f "$(DESTDIR)$(libdir)/lib$(libname).a" + -rm -f "$(DESTDIR)$(libdir)/lib$(libname).so" -rm -f "$(DESTDIR)$(libdir)/lib$(libname).so.$(soversion)" -rm -f "$(DESTDIR)$(libdir)/lib$(libname).so.$(pkgversion)" @@ -126,12 +145,13 @@ dist : doc $(DISTNAME)/NEWS \ $(DISTNAME)/README \ $(DISTNAME)/configure \ + $(DISTNAME)/doc/$(progname).1 \ $(DISTNAME)/doc/$(pkgname).info \ $(DISTNAME)/doc/$(pkgname).texinfo \ $(DISTNAME)/testsuite/check.sh \ $(DISTNAME)/testsuite/test.txt \ + $(DISTNAME)/testsuite/test.txt.lz \ $(DISTNAME)/testsuite/test_sync.lz \ - $(DISTNAME)/testsuite/test_v[01].lz \ $(DISTNAME)/*.h \ $(DISTNAME)/*.c rm -f $(DISTNAME) diff --git a/NEWS b/NEWS index 3f6de9d..0a90a66 100644 --- a/NEWS +++ b/NEWS @@ -1,11 +1,21 @@ -Changes in version 1.3: +Changes in version 1.4: -Lzlib has been translated to C from the C++ source of lzlib 1.2. This -has been done to avoid the dependency on libstdc++, making lzlib useful -in more environments. +Multi-step trials have been implemented. -Quote characters in messages have been changed as advised by GNU Coding -Standards. +Compression ratio has been slightly increased. -Configure option "--datadir" has been renamed to "--datarootdir" to -follow GNU Standards. +Compression time has been reduced by 8%. + +Decompression time has been reduced by 7%. + +Arguments and return values of functions in lzlib.h have been changed +from 'long long' to 'unsigned long long'. + +The minimum size of the input compression buffer has been reduced to 64KiB. + +"LZ_decompress_read" now tells "LZ_header_error" from "LZ_unexpected_eof" +the same way as lzip does when the EOF happens at the header. + +The target "install-as-lzip" has been added to the Makefile. + +The target "install-bin" has been added to the Makefile. diff --git a/README b/README index 65ff4c8..6771a79 100644 --- a/README +++ b/README @@ -36,7 +36,7 @@ Igor Pavlov. For a description of the LZMA algorithm, see the Lzip manual. -Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. +Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/bbexample.c b/bbexample.c index f174875..1b33978 100644 --- a/bbexample.c +++ b/bbexample.c @@ -1,5 +1,5 @@ /* Buff to buff example - A test program for the lzlib library - Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute and modify it. @@ -22,16 +22,6 @@ #include "lzlib.h" -#ifndef LLONG_MAX -#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL -#endif -#ifndef LLONG_MIN -#define LLONG_MIN (-LLONG_MAX - 1LL) -#endif -#ifndef ULLONG_MAX -#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL -#endif - /* Compresses 'size' bytes from 'data'. Returns the address of a malloc'd buffer containing the compressed data and its size in @@ -41,28 +31,32 @@ uint8_t * bbcompress( const uint8_t * const data, const int size, int * const out_sizep ) { + struct LZ_Encoder * encoder; + uint8_t * new_data; const int match_len_limit = 36; - const long long member_size = LLONG_MAX; + const unsigned long long member_size = INT64_MAX; + int delta_size, new_data_size; + int new_pos = 0; + int written = 0; + bool error = false; int dict_size = 8 << 20; /* 8 MiB */ + if( dict_size > size ) dict_size = size; /* saves memory */ if( dict_size < LZ_min_dictionary_size() ) dict_size = LZ_min_dictionary_size(); - struct LZ_Encoder * const encoder = - LZ_compress_open( dict_size, match_len_limit, member_size ); + encoder = LZ_compress_open( dict_size, match_len_limit, member_size ); if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) { LZ_compress_close( encoder ); return 0; } - const int delta_size = (size < 256) ? 64 : size / 4; /* size may be zero */ - int new_data_size = delta_size; /* initial size */ - uint8_t * new_data = (uint8_t *)malloc( new_data_size ); + delta_size = (size < 256) ? 64 : size / 4; /* size may be zero */ + new_data_size = delta_size; /* initial size */ + new_data = (uint8_t *)malloc( new_data_size ); if( !new_data ) { LZ_compress_close( encoder ); return 0; } - int new_pos = 0; - int written = 0; - bool error = false; while( true ) { + int rd; if( LZ_compress_write_size( encoder ) > 0 ) { if( written < size ) @@ -74,8 +68,8 @@ uint8_t * bbcompress( const uint8_t * const data, const int size, } if( written >= size ) LZ_compress_finish( encoder ); } - const int rd = LZ_compress_read( encoder, new_data + new_pos, - new_data_size - new_pos ); + rd = LZ_compress_read( encoder, new_data + new_pos, + new_data_size - new_pos ); if( rd < 0 ) { error = true; break; } new_pos += rd; if( LZ_compress_finished( encoder ) == 1 ) break; @@ -105,20 +99,22 @@ uint8_t * bbdecompress( const uint8_t * const data, const int size, int * const out_sizep ) { struct LZ_Decoder * const decoder = LZ_decompress_open(); + uint8_t * new_data; + const int delta_size = size; /* size must be > zero */ + int new_data_size = delta_size; /* initial size */ + int new_pos = 0; + int written = 0; + bool error = false; if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) { LZ_decompress_close( decoder ); return 0; } - const int delta_size = size; /* size must be > zero */ - int new_data_size = delta_size; /* initial size */ - uint8_t * new_data = (uint8_t *)malloc( new_data_size ); + new_data = (uint8_t *)malloc( new_data_size ); if( !new_data ) { LZ_decompress_close( decoder ); return 0; } - int new_pos = 0; - int written = 0; - bool error = false; while( true ) { + int rd; if( LZ_decompress_write_size( decoder ) > 0 ) { if( written < size ) @@ -130,8 +126,8 @@ uint8_t * bbdecompress( const uint8_t * const data, const int size, } if( written >= size ) LZ_decompress_finish( decoder ); } - const int rd = LZ_decompress_read( decoder, new_data + new_pos, - new_data_size - new_pos ); + rd = LZ_decompress_read( decoder, new_data + new_pos, + new_data_size - new_pos ); if( rd < 0 ) { error = true; break; } new_pos += rd; if( LZ_decompress_finished( decoder ) == 1 ) break; @@ -154,28 +150,32 @@ uint8_t * bbdecompress( const uint8_t * const data, const int size, int main( const int argc, const char * const argv[] ) { + FILE * file; + uint8_t * in_buffer, * mid_buffer, * out_buffer; + const int in_buffer_size = 1 << 20; + int in_size, mid_size = 0, out_size = 0; + if( argc < 2 ) { fprintf( stderr, "Usage: bbexample filename\n" ); return 1; } - FILE *file = fopen( argv[1], "rb" ); + file = fopen( argv[1], "rb" ); if( !file ) { fprintf( stderr, "bbexample: Can't open file '%s' for reading\n", argv[1] ); return 1; } - const int in_buffer_size = 1 << 20; - uint8_t * const in_buffer = (uint8_t *)malloc( in_buffer_size ); + in_buffer = (uint8_t *)malloc( in_buffer_size ); if( !in_buffer ) { fprintf( stderr, "bbexample: Not enough memory.\n" ); return 1; } - const int in_size = fread( in_buffer, 1, in_buffer_size, file ); + in_size = fread( in_buffer, 1, in_buffer_size, file ); if( in_size >= in_buffer_size ) { fprintf( stderr, "bbexample: Input file '%s' is too big.\n", argv[1] ); @@ -183,16 +183,14 @@ int main( const int argc, const char * const argv[] ) } fclose( file ); - int mid_size = 0; - uint8_t * const mid_buffer = bbcompress( in_buffer, in_size, &mid_size ); + mid_buffer = bbcompress( in_buffer, in_size, &mid_size ); if( !mid_buffer ) { fprintf( stderr, "bbexample: Not enough memory or compress error.\n" ); return 1; } - int out_size = 0; - uint8_t * const out_buffer = bbdecompress( mid_buffer, mid_size, &out_size ); + out_buffer = bbdecompress( mid_buffer, mid_size, &out_size ); if( !out_buffer ) { fprintf( stderr, "bbexample: Not enough memory or decompress error.\n" ); diff --git a/carg_parser.c b/carg_parser.c index 326bd41..973bb7e 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -88,7 +88,7 @@ static char parse_long_option( struct Arg_parser * const ap, const struct ap_Option options[], int * const argindp ) { - unsigned int len; + unsigned len; int index = -1; int i; char exact = 0, ambig = 0; diff --git a/cbuffer.c b/cbuffer.c new file mode 100644 index 0000000..d4dbeb9 --- /dev/null +++ b/cbuffer.c @@ -0,0 +1,134 @@ +/* Lzlib - A compression library for lzip files + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + + This library is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this library. If not, see . + + As a special exception, you may use this file as part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this + file, or you compile this file and link it with other files to + produce an executable, this file does not by itself cause the + resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. +*/ + +struct Circular_buffer + { + uint8_t * buffer; + int buffer_size; /* capacity == buffer_size - 1 */ + int get; /* buffer is empty when get == put */ + int put; + }; + +static inline void Cb_reset( struct Circular_buffer * const cb ) + { cb->get = 0; cb->put = 0; } + +static inline bool Cb_init( struct Circular_buffer * const cb, + const int buf_size ) + { + cb->buffer = (uint8_t *)malloc( buf_size + 1 ); + cb->buffer_size = buf_size + 1; + cb->get = 0; + cb->put = 0; + return ( cb->buffer != 0 ); + } + +static inline void Cb_free( struct Circular_buffer * const cb ) + { free( cb->buffer ); cb->buffer = 0; } + +static inline int Cb_used_bytes( const struct Circular_buffer * const cb ) + { return ( (cb->get <= cb->put) ? 0 : cb->buffer_size ) + cb->put - cb->get; } + +static inline int Cb_free_bytes( const struct Circular_buffer * const cb ) + { return ( (cb->get <= cb->put) ? cb->buffer_size : 0 ) - cb->put + cb->get - 1; } + +static inline uint8_t Cb_get_byte( struct Circular_buffer * const cb ) + { + const uint8_t b = cb->buffer[cb->get]; + if( ++cb->get >= cb->buffer_size ) cb->get = 0; + return b; + } + +static inline void Cb_put_byte( struct Circular_buffer * const cb, + const uint8_t b ) + { + cb->buffer[cb->put] = b; + if( ++cb->put >= cb->buffer_size ) cb->put = 0; + } + + +/* Copies up to 'out_size' bytes to 'out_buffer' and updates 'get'. + Returns the number of bytes copied. +*/ +static int Cb_read_data( struct Circular_buffer * const cb, + uint8_t * const out_buffer, const int out_size ) + { + int size = 0; + if( out_size < 0 ) return 0; + if( cb->get > cb->put ) + { + size = min( cb->buffer_size - cb->get, out_size ); + if( size > 0 ) + { + memcpy( out_buffer, cb->buffer + cb->get, size ); + cb->get += size; + if( cb->get >= cb->buffer_size ) cb->get = 0; + } + } + if( cb->get < cb->put ) + { + const int size2 = min( cb->put - cb->get, out_size - size ); + if( size2 > 0 ) + { + memcpy( out_buffer + size, cb->buffer + cb->get, size2 ); + cb->get += size2; + size += size2; + } + } + return size; + } + + +/* Copies up to 'in_size' bytes from 'in_buffer' and updates 'put'. + Returns the number of bytes copied. +*/ +static int Cb_write_data( struct Circular_buffer * const cb, + const uint8_t * const in_buffer, const int in_size ) + { + int size = 0; + if( in_size < 0 ) return 0; + if( cb->put >= cb->get ) + { + size = min( cb->buffer_size - cb->put - (cb->get == 0), in_size ); + if( size > 0 ) + { + memcpy( cb->buffer + cb->put, in_buffer, size ); + cb->put += size; + if( cb->put >= cb->buffer_size ) cb->put = 0; + } + } + if( cb->put < cb->get ) + { + const int size2 = min( cb->get - cb->put - 1, in_size - size ); + if( size2 > 0 ) + { + memcpy( cb->buffer + cb->put, in_buffer + size, size2 ); + cb->put += size2; + size += size2; + } + } + return size; + } diff --git a/clzip.h b/clzip.h index 7c918ba..f45d541 100644 --- a/clzip.h +++ b/clzip.h @@ -1,5 +1,5 @@ /* Lzlib - A compression library for lzip files - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,39 +32,26 @@ #define min(x,y) ((x) <= (y) ? (x) : (y)) #endif -typedef uint8_t State; +typedef int State; enum { states = 12 }; static inline bool St_is_char( const State st ) { return st < 7; } -static inline void St_set_char( State * const st ) +static inline State St_set_char( const State st ) { - static const uint8_t next[states] = - { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; - *st = next[*st]; + static const State next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; + return next[st]; } -static inline void St_set_match( State * const st ) - { - static const uint8_t next[states] = - { 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10 }; - *st = next[*st]; - } +static inline State St_set_match( const State st ) + { return ( ( st < 7 ) ? 7 : 10 ); } -static inline void St_set_rep( State * const st ) - { - static const uint8_t next[states] = - { 8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11 }; - *st = next[*st]; - } +static inline State St_set_rep( const State st ) + { return ( ( st < 7 ) ? 8 : 11 ); } -static inline void St_set_short_rep( State * const st ) - { - static const uint8_t next[states] = - { 9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11 }; - *st = next[*st]; - } +static inline State St_set_short_rep( const State st ) + { return ( ( st < 7 ) ? 9 : 11 ); } enum { @@ -80,7 +67,7 @@ enum { dis_slot_bits = 6, start_dis_model = 4, end_dis_model = 14, - modeled_distances = 1 << (end_dis_model / 2), + modeled_distances = 1 << (end_dis_model / 2), /* 128 */ dis_align_bits = 4, dis_align_size = 1 << dis_align_bits, @@ -98,34 +85,95 @@ enum { max_dis_states = 4 }; -static inline int get_dis_state( int len ) - { - len -= min_match_len; - if( len >= max_dis_states ) len = max_dis_states - 1; - return len; - } +static inline int get_dis_state( const int len ) + { return min( len - min_match_len, max_dis_states - 1 ); } + +static inline int get_lit_state( const uint8_t prev_byte ) + { return ( prev_byte >> ( 8 - literal_context_bits ) ); } enum { bit_model_move_bits = 5, bit_model_total_bits = 11, bit_model_total = 1 << bit_model_total_bits }; -typedef unsigned int Bit_model; +typedef int Bit_model; static inline void Bm_init( Bit_model * const probability ) { *probability = bit_model_total / 2; } +static inline void Bm_array_init( Bit_model * const p, const int size ) + { int i = 0; while( i < size ) p[i++] = bit_model_total / 2; } + + +/* Table of CRCs of all 8-bit messages. */ +static const uint32_t crc32[256] = + { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, + 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, + 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, + 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, + 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, + 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106, + 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, + 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, + 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, + 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, + 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, + 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, + 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, + 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, + 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, + 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, + 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, + 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, + 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, + 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, + 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, + 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D }; + + +static inline void CRC32_update_byte( uint32_t * const crc, const uint8_t byte ) + { *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); } + +static inline void CRC32_update_buf( uint32_t * const crc, + const uint8_t * const buffer, const int size ) + { + int i; + for( i = 0; i < size; ++i ) + *crc = crc32[(*crc^buffer[i])&0xFF] ^ ( *crc >> 8 ); + } + -static inline int real_bits( const unsigned int value ) +static inline int real_bits( unsigned value ) { - int bits = 0, i = 1; - unsigned int mask = 1; - for( ; mask > 0; ++i, mask <<= 1 ) if( value & mask ) bits = i; + int bits = 0; + while( value > 0 ) { value >>= 1; ++bits; } return bits; } -static const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' }; +static const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */ typedef uint8_t File_header[6]; /* 0-3 magic bytes */ /* 4 version */ @@ -144,11 +192,11 @@ static inline uint8_t Fh_version( const File_header data ) static inline bool Fh_verify_version( const File_header data ) { return ( data[4] <= 1 ); } -static inline int Fh_get_dictionary_size( const File_header data ) +static inline unsigned Fh_get_dictionary_size( const File_header data ) { - int sz = ( 1 << ( data[5] & 0x1F ) ); - if( sz > min_dictionary_size && sz <= max_dictionary_size ) - sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 0x07 ); + unsigned sz = ( 1 << ( data[5] & 0x1F ) ); + if( sz > min_dictionary_size ) + sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 ); return sz; } @@ -189,43 +237,43 @@ enum { Ft_size = 20 }; static inline int Ft_versioned_size( const int version ) { return ( ( version >= 1 ) ? 20 : 12 ); } -static inline uint32_t Ft_get_data_crc( const File_trailer data ) +static inline unsigned Ft_get_data_crc( const File_trailer data ) { - uint32_t tmp = 0; + unsigned tmp = 0; int i; for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } -static inline void Ft_set_data_crc( File_trailer data, uint32_t crc ) +static inline void Ft_set_data_crc( File_trailer data, unsigned crc ) { int i; for( i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } } -static inline long long Ft_get_data_size( const File_trailer data ) +static inline unsigned long long Ft_get_data_size( const File_trailer data ) { - long long tmp = 0; + unsigned long long tmp = 0; int i; for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } -static inline void Ft_set_data_size( File_trailer data, long long sz ) +static inline void Ft_set_data_size( File_trailer data, unsigned long long sz ) { int i; for( i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } } -static inline long long Ft_get_member_size( const File_trailer data ) +static inline unsigned long long Ft_get_member_size( const File_trailer data ) { - long long tmp = 0; + unsigned long long tmp = 0; int i; for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } -static inline void Ft_set_member_size( File_trailer data, long long sz ) +static inline void Ft_set_member_size( File_trailer data, unsigned long long sz ) { int i; for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } diff --git a/configure b/configure index 0d7af31..cfdce2a 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # configure script for Lzlib - A compression library for lzip files -# Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. +# Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -8,7 +8,7 @@ args= no_create= pkgname=lzlib -pkgversion=1.3 +pkgversion=1.4-rc2 soversion=1 progname=minilzip progname_shared= @@ -27,11 +27,19 @@ includedir='$(prefix)/include' infodir='$(datarootdir)/info' libdir='$(exec_prefix)/lib' mandir='$(datarootdir)/man' -CC= +CC=gcc CPPFLAGS= CFLAGS='-Wall -W -O2' LDFLAGS= +# checking whether we are using GNU C. +if [ ! -x /bin/gcc ] && + [ ! -x /usr/bin/gcc ] && + [ ! -x /usr/local/bin/gcc ] ; then + CC=cc + CFLAGS='-W -O2' +fi + # Loop over all args while [ -n "$1" ] ; do @@ -102,14 +110,14 @@ done srcdirtext= if [ -z "${srcdir}" ] ; then srcdirtext="or . or .." ; srcdir=. - if [ ! -r ${srcdir}/${srctrigger} ] ; then srcdir=.. ; fi - if [ ! -r ${srcdir}/${srctrigger} ] ; then + if [ ! -r "${srcdir}/${srctrigger}" ] ; then srcdir=.. ; fi + if [ ! -r "${srcdir}/${srctrigger}" ] ; then ## the sed command below emulates the dirname command srcdir=`echo $0 | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` fi fi -if [ ! -r ${srcdir}/${srctrigger} ] ; then +if [ ! -r "${srcdir}/${srctrigger}" ] ; then exec 1>&2 echo echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" @@ -118,18 +126,7 @@ if [ ! -r ${srcdir}/${srctrigger} ] ; then fi # Set srcdir to . if that's what it is. -if [ "`pwd`" = "`cd ${srcdir} ; pwd`" ] ; then srcdir=. ; fi - -# checking whether we are using GNU C. -if [ -z "${CC}" ] ; then # Let the user override the test. - if [ -x /bin/gcc ] || - [ -x /usr/bin/gcc ] || - [ -x /usr/local/bin/gcc ] ; then - CC="gcc" - else - CC="cc" - fi -fi +if [ "`pwd`" = "`cd "${srcdir}" ; pwd`" ] ; then srcdir=. ; fi echo if [ -z "${no_create}" ] ; then @@ -165,7 +162,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lzlib - A compression library for lzip files -# Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. +# Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. # This file was generated automatically by configure. Do not edit. # # This Makefile is free software: you have unlimited permission @@ -191,6 +188,6 @@ CPPFLAGS = ${CPPFLAGS} CFLAGS = ${CFLAGS} LDFLAGS = ${LDFLAGS} EOF -cat ${srcdir}/Makefile.in >> Makefile +cat "${srcdir}/Makefile.in" >> Makefile echo "OK. Now you can run make." diff --git a/decoder.c b/decoder.c index d3ea0d6..1f4cbf1 100644 --- a/decoder.c +++ b/decoder.c @@ -1,5 +1,5 @@ /* Lzlib - A compression library for lzip files - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,593 +25,23 @@ Public License. */ -struct Circular_buffer - { - uint8_t * buffer; - int buffer_size; /* capacity == buffer_size - 1 */ - int get; /* buffer is empty when get == put */ - int put; - }; - -static inline void Cb_reset( struct Circular_buffer * const cb ) - { cb->get = 0; cb->put = 0; } - -static inline bool Cb_init( struct Circular_buffer * const cb, - const int buf_size ) - { - cb->buffer = (uint8_t *)malloc( buf_size + 1 ); - cb->buffer_size = buf_size + 1; - cb->get = 0; - cb->put = 0; - return ( cb->buffer != 0 ); - } - -static inline void Cb_free( struct Circular_buffer * const cb ) - { free( cb->buffer ); cb->buffer = 0; } - -static inline int Cb_used_bytes( const struct Circular_buffer * const cb ) - { return ( (cb->get <= cb->put) ? 0 : cb->buffer_size ) + cb->put - cb->get; } - -static inline int Cb_free_bytes( const struct Circular_buffer * const cb ) - { return ( (cb->get <= cb->put) ? cb->buffer_size : 0 ) - cb->put + cb->get - 1; } - -static inline uint8_t Cb_get_byte( struct Circular_buffer * const cb ) - { - const uint8_t b = cb->buffer[cb->get]; - if( ++cb->get >= cb->buffer_size ) cb->get = 0; - return b; - } - -static inline void Cb_put_byte( struct Circular_buffer * const cb, - const uint8_t b ) - { - cb->buffer[cb->put] = b; - if( ++cb->put >= cb->buffer_size ) cb->put = 0; - } - - -/* Copies up to 'out_size' bytes to 'out_buffer' and updates 'get'. - Returns the number of bytes copied. -*/ -static int Cb_read_data( struct Circular_buffer * const cb, - uint8_t * const out_buffer, const int out_size ) - { - if( out_size < 0 ) return 0; - int size = 0; - if( cb->get > cb->put ) - { - size = min( cb->buffer_size - cb->get, out_size ); - if( size > 0 ) - { - memcpy( out_buffer, cb->buffer + cb->get, size ); - cb->get += size; - if( cb->get >= cb->buffer_size ) cb->get = 0; - } - } - if( cb->get < cb->put ) - { - const int size2 = min( cb->put - cb->get, out_size - size ); - if( size2 > 0 ) - { - memcpy( out_buffer + size, cb->buffer + cb->get, size2 ); - cb->get += size2; - size += size2; - } - } - return size; - } - - -/* Copies up to 'in_size' bytes from 'in_buffer' and updates 'put'. - Returns the number of bytes copied. -*/ -static int Cb_write_data( struct Circular_buffer * const cb, - const uint8_t * const in_buffer, const int in_size ) - { - if( in_size < 0 ) return 0; - int size = 0; - if( cb->put >= cb->get ) - { - size = min( cb->buffer_size - cb->put - (cb->get == 0), in_size ); - if( size > 0 ) - { - memcpy( cb->buffer + cb->put, in_buffer, size ); - cb->put += size; - if( cb->put >= cb->buffer_size ) cb->put = 0; - } - } - if( cb->put < cb->get ) - { - const int size2 = min( cb->get - cb->put - 1, in_size - size ); - if( size2 > 0 ) - { - memcpy( cb->buffer + cb->put, in_buffer + size, size2 ); - cb->put += size2; - size += size2; - } - } - return size; - } - - -enum { rd_min_available_bytes = 8 }; - -struct Range_decoder - { - struct Circular_buffer cb; - long long member_position; - uint32_t code; - uint32_t range; - bool reload_pending; - bool at_stream_end; - }; - -static inline bool Rd_init( struct Range_decoder * const rdec ) - { - if( !Cb_init( &rdec->cb, 65536 + rd_min_available_bytes ) ) return false; - rdec->member_position = 0; - rdec->code = 0; - rdec->range = 0xFFFFFFFFU; - rdec->reload_pending = false; - rdec->at_stream_end = false; - return true; - } - -static inline void Rd_free( struct Range_decoder * const rdec ) - { Cb_free( &rdec->cb ); } - -static inline int Rd_available_bytes( const struct Range_decoder * const rdec ) - { return Cb_used_bytes( &rdec->cb ); } - -static inline void Rd_finish( struct Range_decoder * const rdec ) - { rdec->at_stream_end = true; } - -static inline bool Rd_finished( const struct Range_decoder * const rdec ) - { return rdec->at_stream_end && !Cb_used_bytes( &rdec->cb ); } - -static inline int Rd_free_bytes( const struct Range_decoder * const rdec ) - { if( rdec->at_stream_end ) return 0; return Cb_free_bytes( &rdec->cb ); } - -static inline void Rd_purge( struct Range_decoder * const rdec ) - { rdec->at_stream_end = true; Cb_reset( &rdec->cb ); } - -static inline void Rd_reset( struct Range_decoder * const rdec ) - { rdec->at_stream_end = false; Cb_reset( &rdec->cb ); } - - -/* Seeks a member header and updates 'get'. - Returns true if it finds a valid header. -*/ -static bool Rd_find_header( struct Range_decoder * const rdec ) - { - while( rdec->cb.get != rdec->cb.put ) - { - if( rdec->cb.buffer[rdec->cb.get] == magic_string[0] ) - { - int g = rdec->cb.get; - int i; - File_header header; - for( i = 0; i < Fh_size; ++i ) - { - if( g == rdec->cb.put ) return false; /* not enough data */ - header[i] = rdec->cb.buffer[g]; - if( ++g >= rdec->cb.buffer_size ) g = 0; - } - if( Fh_verify( header ) ) return true; - } - if( ++rdec->cb.get >= rdec->cb.buffer_size ) rdec->cb.get = 0; - } - return false; - } - - -/* Returns true, fills 'header', and updates 'get' if 'get' points to a - valid header. - Else returns false and leaves 'get' unmodified. -*/ -static bool Rd_read_header( struct Range_decoder * const rdec, - File_header header ) - { - int g = rdec->cb.get; - int i; - for( i = 0; i < Fh_size; ++i ) - { - if( g == rdec->cb.put ) return false; /* not enough data */ - header[i] = rdec->cb.buffer[g]; - if( ++g >= rdec->cb.buffer_size ) g = 0; - } - if( Fh_verify( header ) ) - { - rdec->cb.get = g; - rdec->member_position = Fh_size; - rdec->reload_pending = true; - return true; - } - return false; - } - - -static inline int Rd_write_data( struct Range_decoder * const rdec, - const uint8_t * const inbuf, const int size ) - { - if( rdec->at_stream_end || size <= 0 ) return 0; - return Cb_write_data( &rdec->cb, inbuf, size ); - } - -static inline uint8_t Rd_get_byte( struct Range_decoder * const rdec ) - { - ++rdec->member_position; - return Cb_get_byte( &rdec->cb ); - } - - -static bool Rd_try_reload( struct Range_decoder * const rdec, const bool force ) - { - if( force ) rdec->reload_pending = true; - if( rdec->reload_pending && Rd_available_bytes( rdec ) >= 5 ) - { - int i; - rdec->reload_pending = false; - rdec->code = 0; - rdec->range = 0xFFFFFFFFU; - for( i = 0; i < 5; ++i ) - rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); - } - return !rdec->reload_pending; - } - - -static inline int Rd_decode( struct Range_decoder * const rdec, - const int num_bits ) - { - int symbol = 0; - int i; - for( i = num_bits; i > 0; --i ) - { - symbol <<= 1; - if( rdec->range <= 0x00FFFFFFU ) - { - rdec->range <<= 7; - rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); - if( rdec->code >= rdec->range ) - { rdec->code -= rdec->range; symbol |= 1; } - } - else - { - rdec->range >>= 1; - if( rdec->code >= rdec->range ) - { rdec->code -= rdec->range; symbol |= 1; } - } - } - return symbol; - } - - -static inline void Rd_normalize( struct Range_decoder * const rdec ) - { - if( rdec->range <= 0x00FFFFFFU ) - { - rdec->range <<= 8; - rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); - } - } - - -static inline int Rd_decode_bit( struct Range_decoder * const rdec, - Bit_model * const probability ) - { - uint32_t bound; - Rd_normalize( rdec ); - bound = ( rdec->range >> bit_model_total_bits ) * *probability; - if( rdec->code < bound ) - { - rdec->range = bound; - *probability += (bit_model_total - *probability) >> bit_model_move_bits; - return 0; - } - else - { - rdec->range -= bound; - rdec->code -= bound; - *probability -= *probability >> bit_model_move_bits; - return 1; - } - } - - -static inline int Rd_decode_tree( struct Range_decoder * const rdec, - Bit_model bm[], const int num_bits ) - { - int model = 1; - int i; - for( i = num_bits; i > 0; --i ) - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - return model - (1 << num_bits); - } - - -static inline int Rd_decode_matched( struct Range_decoder * const rdec, - Bit_model bm[], const int match_byte ) - { - Bit_model * const bm1 = bm + 0x100; - int symbol = 1; - int i; - for( i = 7; i >= 0; --i ) - { - const int match_bit = ( match_byte >> i ) & 1; - const int bit = Rd_decode_bit( rdec, &bm1[(match_bit<<8)+symbol] ); - symbol = ( symbol << 1 ) | bit; - if( match_bit != bit ) - { - while( --i >= 0 ) - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - break; - } - } - return symbol & 0xFF; - } - - -static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec, - Bit_model bm[], const int num_bits ) - { - int model = 1; - int symbol = 0; - int i; - for( i = 0; i < num_bits; ++i ) - { - const int bit = Rd_decode_bit( rdec, &bm[model] ); - model <<= 1; - if( bit ) { model |= 1; symbol |= (1 << i); } - } - return symbol; - } - - -static inline bool Rd_enough_available_bytes( const struct Range_decoder * const rdec ) - { - return ( Cb_used_bytes( &rdec->cb ) >= rd_min_available_bytes || - ( rdec->at_stream_end && Cb_used_bytes( &rdec->cb ) > 0 ) ); - } - - -static inline int Rd_read_data( struct Range_decoder * const rdec, - uint8_t * const outbuf, const int size ) - { - const int sz = Cb_read_data( &rdec->cb, outbuf, size ); - if( sz > 0 ) rdec->member_position += sz; - return sz; - } - - -struct Len_decoder - { - Bit_model choice1; - Bit_model choice2; - Bit_model bm_low[pos_states][len_low_symbols]; - Bit_model bm_mid[pos_states][len_mid_symbols]; - Bit_model bm_high[len_high_symbols]; - }; - -static inline void Led_init( struct Len_decoder * const len_decoder ) - { - int i, j; - Bm_init( &len_decoder->choice1 ); - Bm_init( &len_decoder->choice2 ); - for( i = 0; i < pos_states; ++i ) - for( j = 0; j < len_low_symbols; ++j ) - Bm_init( &len_decoder->bm_low[i][j] ); - for( i = 0; i < pos_states; ++i ) - for( j = 0; j < len_mid_symbols; ++j ) - Bm_init( &len_decoder->bm_mid[i][j] ); - for( i = 0; i < len_high_symbols; ++i ) - Bm_init( &len_decoder->bm_high[i] ); - } - -static inline int Led_decode( struct Len_decoder * const len_decoder, - struct Range_decoder * const rdec, - const int pos_state ) - { - if( Rd_decode_bit( rdec, &len_decoder->choice1 ) == 0 ) - return Rd_decode_tree( rdec, len_decoder->bm_low[pos_state], len_low_bits ); - if( Rd_decode_bit( rdec, &len_decoder->choice2 ) == 0 ) - return len_low_symbols + - Rd_decode_tree( rdec, len_decoder->bm_mid[pos_state], len_mid_bits ); - return len_low_symbols + len_mid_symbols + - Rd_decode_tree( rdec, len_decoder->bm_high, len_high_bits ); - } - - -struct Literal_decoder - { - Bit_model bm_literal[1<bm_literal[i][j] ); - } - -static inline int Lid_state( const uint8_t prev_byte ) - { return ( prev_byte >> ( 8 - literal_context_bits ) ); } - -static inline uint8_t Lid_decode( struct Literal_decoder * const lidec, - struct Range_decoder * const rdec, - const uint8_t prev_byte ) - { return Rd_decode_tree( rdec, lidec->bm_literal[Lid_state(prev_byte)], 8 ); } - -static inline uint8_t Lid_decode_matched( struct Literal_decoder * const lidec, - struct Range_decoder * const rdec, - const uint8_t prev_byte, - const uint8_t match_byte ) - { return Rd_decode_matched( rdec, lidec->bm_literal[Lid_state(prev_byte)], match_byte ); } - - -enum { lzd_min_free_bytes = max_match_len }; - -struct LZ_decoder - { - struct Circular_buffer cb; - long long partial_data_pos; - int dictionary_size; - uint32_t crc; - int member_version; - bool member_finished; - bool verify_trailer_pending; - unsigned int rep0; /* rep[0-3] latest four distances */ - unsigned int rep1; /* used for efficient coding of */ - unsigned int rep2; /* repeated distances */ - unsigned int rep3; - State state; - - Bit_model bm_match[states][pos_states]; - Bit_model bm_rep[states]; - Bit_model bm_rep0[states]; - Bit_model bm_rep1[states]; - Bit_model bm_rep2[states]; - Bit_model bm_len[states][pos_states]; - Bit_model bm_dis_slot[max_dis_states][1<cb, max( 65536, Fh_get_dictionary_size( header ) ) + lzd_min_free_bytes ) ) - return false; - decoder->partial_data_pos = 0; - decoder->dictionary_size = Fh_get_dictionary_size( header ); - decoder->crc = 0xFFFFFFFFU; - decoder->member_version = Fh_version( header ); - decoder->member_finished = false; - decoder->verify_trailer_pending = false; - decoder->rep0 = 0; - decoder->rep1 = 0; - decoder->rep2 = 0; - decoder->rep3 = 0; - decoder->state = 0; - - for( i = 0; i < states; ++i ) - { - for( j = 0; j < pos_states; ++j ) - { - Bm_init( &decoder->bm_match[i][j] ); - Bm_init( &decoder->bm_len[i][j] ); - } - Bm_init( &decoder->bm_rep[i] ); - Bm_init( &decoder->bm_rep0[i] ); - Bm_init( &decoder->bm_rep1[i] ); - Bm_init( &decoder->bm_rep2[i] ); - } - for( i = 0; i < max_dis_states; ++i ) - for( j = 0; j < 1<bm_dis_slot[i][j] ); - for( i = 0; i < modeled_distances-end_dis_model+1; ++i ) - Bm_init( &decoder->bm_dis[i] ); - for( i = 0; i < dis_align_size; ++i ) - Bm_init( &decoder->bm_align[i] ); - - decoder->range_decoder = rdec; - Led_init( &decoder->len_decoder ); - Led_init( &decoder->rep_match_len_decoder ); - Lid_init( &decoder->literal_decoder ); - decoder->cb.buffer[decoder->cb.buffer_size-1] = 0; /* prev_byte of first_byte */ - return true; - } - -static inline void LZd_free( struct LZ_decoder * const decoder ) - { Cb_free( &decoder->cb ); } - -static inline bool LZd_member_finished( const struct LZ_decoder * const decoder ) - { return ( decoder->member_finished && !Cb_used_bytes( &decoder->cb ) ); } - -static inline uint32_t LZd_crc( const struct LZ_decoder * const decoder ) - { return decoder->crc ^ 0xFFFFFFFFU; } - -static inline long long LZd_data_position( const struct LZ_decoder * const decoder ) - { return decoder->partial_data_pos + decoder->cb.put; } - - static bool LZd_verify_trailer( struct LZ_decoder * const decoder ) { File_trailer trailer; const int trailer_size = Ft_versioned_size( decoder->member_version ); - const long long member_size = + const unsigned long long member_size = decoder->range_decoder->member_position + trailer_size; int size = Rd_read_data( decoder->range_decoder, trailer, trailer_size ); - if( size < trailer_size ) return false; + if( size < trailer_size ) + return false; if( decoder->member_version == 0 ) Ft_set_member_size( trailer, member_size ); - if( decoder->range_decoder->code != 0 || - Ft_get_data_crc( trailer ) != LZd_crc( decoder ) || - Ft_get_data_size( trailer ) != LZd_data_position( decoder ) || - Ft_get_member_size( trailer ) != member_size ) return false; - return true; - } - - -static inline void LZd_copy_block( struct LZ_decoder * const decoder, - const int distance, int len ) - { - int i = decoder->cb.put - distance - 1; - if( i < 0 ) i += decoder->cb.buffer_size; - if( len < decoder->cb.buffer_size - max( decoder->cb.put, i ) && - len <= abs( decoder->cb.put - i ) ) - { - CRC32_update_buf( &decoder->crc, decoder->cb.buffer + i, len ); - memcpy( decoder->cb.buffer + decoder->cb.put, decoder->cb.buffer + i, len ); - decoder->cb.put += len; - } - else for( ; len > 0; --len ) - { - CRC32_update_byte( &decoder->crc, decoder->cb.buffer[i] ); - decoder->cb.buffer[decoder->cb.put] = decoder->cb.buffer[i]; - if( ++decoder->cb.put >= decoder->cb.buffer_size ) - { decoder->partial_data_pos += decoder->cb.put; decoder->cb.put = 0; } - if( ++i >= decoder->cb.buffer_size ) i = 0; - } - } - - -static inline bool LZd_enough_free_bytes( const struct LZ_decoder * const decoder ) - { return Cb_free_bytes( &decoder->cb ) >= lzd_min_free_bytes; } - - -static inline uint8_t LZd_get_byte( const struct LZ_decoder * const decoder, - const int distance ) - { - int i = decoder->cb.put - distance - 1; - if( i < 0 ) i += decoder->cb.buffer_size; - return decoder->cb.buffer[i]; - } - -static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const decoder ) - { - const int i = - ( ( decoder->cb.put > 0 ) ? decoder->cb.put : decoder->cb.buffer_size ) - 1; - return decoder->cb.buffer[i]; - } - -static inline void LZd_put_byte( struct LZ_decoder * const decoder, - const uint8_t b ) - { - CRC32_update_byte( &decoder->crc, b ); - decoder->cb.buffer[decoder->cb.put] = b; - if( ++decoder->cb.put >= decoder->cb.buffer_size ) - { decoder->partial_data_pos += decoder->cb.put; decoder->cb.put = 0; } + return ( decoder->range_decoder->code == 0 && + Ft_get_data_crc( trailer ) == LZd_crc( decoder ) && + Ft_get_data_size( trailer ) == LZd_data_position( decoder ) && + Ft_get_member_size( trailer ) == member_size ); } @@ -642,12 +72,17 @@ static int LZd_decode_member( struct LZ_decoder * const decoder ) { const uint8_t prev_byte = LZd_get_prev_byte( decoder ); if( St_is_char( *state ) ) - LZd_put_byte( decoder, Lid_decode( &decoder->literal_decoder, - decoder->range_decoder, prev_byte ) ); + { + *state -= ( *state < 4 ) ? *state : 3; + LZd_put_byte( decoder, Rd_decode_tree( decoder->range_decoder, + decoder->bm_literal[get_lit_state(prev_byte)], 8 ) ); + } else - LZd_put_byte( decoder, Lid_decode_matched( &decoder->literal_decoder, - decoder->range_decoder, prev_byte, LZd_get_byte( decoder, decoder->rep0 ) ) ); - St_set_char( state ); + { + *state -= ( *state < 10 ) ? 3 : 6; + LZd_put_byte( decoder, Rd_decode_matched( decoder->range_decoder, + decoder->bm_literal[get_lit_state(prev_byte)], LZd_get_byte( decoder, decoder->rep0 ) ) ); + } } else { @@ -657,7 +92,7 @@ static int LZd_decode_member( struct LZ_decoder * const decoder ) len = 0; if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep0[*state] ) == 1 ) { - unsigned int distance; + unsigned distance; if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep1[*state] ) == 0 ) distance = decoder->rep1; else @@ -673,31 +108,33 @@ static int LZd_decode_member( struct LZ_decoder * const decoder ) else { if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_len[*state][pos_state] ) == 0 ) - { St_set_short_rep( state ); len = 1; } + { *state = St_set_short_rep( *state ); len = 1; } } if( len == 0 ) { - St_set_rep( state ); + *state = St_set_rep( *state ); len = min_match_len + Led_decode( &decoder->rep_match_len_decoder, decoder->range_decoder, pos_state ); } } else { int dis_slot; - const unsigned int rep0_saved = decoder->rep0; + const unsigned rep0_saved = decoder->rep0; len = min_match_len + Led_decode( &decoder->len_decoder, decoder->range_decoder, pos_state ); - dis_slot = Rd_decode_tree( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)], dis_slot_bits ); + dis_slot = Rd_decode_tree6( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)] ); if( dis_slot < start_dis_model ) decoder->rep0 = dis_slot; else { const int direct_bits = ( dis_slot >> 1 ) - 1; decoder->rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; if( dis_slot < end_dis_model ) - decoder->rep0 += Rd_decode_tree_reversed( decoder->range_decoder, decoder->bm_dis + decoder->rep0 - dis_slot, direct_bits ); + decoder->rep0 += Rd_decode_tree_reversed( decoder->range_decoder, + decoder->bm_dis + decoder->rep0 - dis_slot - 1, + direct_bits ); else { decoder->rep0 += Rd_decode( decoder->range_decoder, direct_bits - dis_align_bits ) << dis_align_bits; - decoder->rep0 += Rd_decode_tree_reversed( decoder->range_decoder, decoder->bm_align, dis_align_bits ); + decoder->rep0 += Rd_decode_tree_reversed4( decoder->range_decoder, decoder->bm_align ); if( decoder->rep0 == 0xFFFFFFFFU ) /* Marker found */ { decoder->rep0 = rep0_saved; @@ -721,9 +158,9 @@ static int LZd_decode_member( struct LZ_decoder * const decoder ) } decoder->rep3 = decoder->rep2; decoder->rep2 = decoder->rep1; decoder->rep1 = rep0_saved; - St_set_match( state ); - if( decoder->rep0 >= (unsigned int)decoder->dictionary_size || - ( decoder->rep0 >= (unsigned int)decoder->cb.put && + *state = St_set_match( *state ); + if( decoder->rep0 >= (unsigned)decoder->dictionary_size || + ( decoder->rep0 >= (unsigned)decoder->cb.put && !decoder->partial_data_pos ) ) return 1; } diff --git a/decoder.h b/decoder.h new file mode 100644 index 0000000..49e2bef --- /dev/null +++ b/decoder.h @@ -0,0 +1,451 @@ +/* Lzlib - A compression library for lzip files + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + + This library is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this library. If not, see . + + As a special exception, you may use this file as part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this + file, or you compile this file and link it with other files to + produce an executable, this file does not by itself cause the + resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. +*/ + +enum { rd_min_available_bytes = 8 }; + +struct Range_decoder + { + struct Circular_buffer cb; /* input buffer */ + unsigned long long member_position; + uint32_t code; + uint32_t range; + bool reload_pending; + bool at_stream_end; + }; + +static inline bool Rd_init( struct Range_decoder * const rdec ) + { + if( !Cb_init( &rdec->cb, 65536 + rd_min_available_bytes ) ) return false; + rdec->member_position = 0; + rdec->code = 0; + rdec->range = 0xFFFFFFFFU; + rdec->reload_pending = false; + rdec->at_stream_end = false; + return true; + } + +static inline void Rd_free( struct Range_decoder * const rdec ) + { Cb_free( &rdec->cb ); } + +static inline bool Rd_finished( const struct Range_decoder * const rdec ) + { return rdec->at_stream_end && !Cb_used_bytes( &rdec->cb ); } + +static inline void Rd_finish( struct Range_decoder * const rdec ) + { rdec->at_stream_end = true; } + +static inline bool Rd_enough_available_bytes( const struct Range_decoder * const rdec ) + { + return ( Cb_used_bytes( &rdec->cb ) >= rd_min_available_bytes || + ( rdec->at_stream_end && Cb_used_bytes( &rdec->cb ) > 0 ) ); + } + +static inline int Rd_available_bytes( const struct Range_decoder * const rdec ) + { return Cb_used_bytes( &rdec->cb ); } + +static inline int Rd_free_bytes( const struct Range_decoder * const rdec ) + { if( rdec->at_stream_end ) return 0; return Cb_free_bytes( &rdec->cb ); } + +static inline void Rd_purge( struct Range_decoder * const rdec ) + { rdec->at_stream_end = true; Cb_reset( &rdec->cb ); } + +static inline void Rd_reset( struct Range_decoder * const rdec ) + { rdec->at_stream_end = false; Cb_reset( &rdec->cb ); } + + +/* Seeks a member header and updates 'get'. + Returns true if it finds a valid header. +*/ +static bool Rd_find_header( struct Range_decoder * const rdec ) + { + while( rdec->cb.get != rdec->cb.put ) + { + if( rdec->cb.buffer[rdec->cb.get] == magic_string[0] ) + { + int get = rdec->cb.get; + int i; + File_header header; + for( i = 0; i < Fh_size; ++i ) + { + if( get == rdec->cb.put ) return false; /* not enough data */ + header[i] = rdec->cb.buffer[get]; + if( ++get >= rdec->cb.buffer_size ) get = 0; + } + if( Fh_verify( header ) ) return true; + } + if( ++rdec->cb.get >= rdec->cb.buffer_size ) rdec->cb.get = 0; + } + return false; + } + + +/* Returns true, fills 'header', and updates 'get' if 'get' points to a + valid header. + Else returns false and leaves 'get' unmodified. +*/ +static bool Rd_read_header( struct Range_decoder * const rdec, + File_header header ) + { + int get = rdec->cb.get; + int i; + for( i = 0; i < Fh_size; ++i ) + { + if( get == rdec->cb.put ) return false; /* not enough data */ + header[i] = rdec->cb.buffer[get]; + if( ++get >= rdec->cb.buffer_size ) get = 0; + } + if( Fh_verify( header ) ) + { + rdec->cb.get = get; + rdec->member_position = Fh_size; + rdec->reload_pending = true; + return true; + } + return false; + } + +static inline int Rd_write_data( struct Range_decoder * const rdec, + const uint8_t * const inbuf, const int size ) + { + if( rdec->at_stream_end || size <= 0 ) return 0; + return Cb_write_data( &rdec->cb, inbuf, size ); + } + +static inline uint8_t Rd_get_byte( struct Range_decoder * const rdec ) + { + ++rdec->member_position; + return Cb_get_byte( &rdec->cb ); + } + +static inline int Rd_read_data( struct Range_decoder * const rdec, + uint8_t * const outbuf, const int size ) + { + const int sz = Cb_read_data( &rdec->cb, outbuf, size ); + if( sz > 0 ) rdec->member_position += sz; + return sz; + } + +static bool Rd_try_reload( struct Range_decoder * const rdec, const bool force ) + { + if( force ) rdec->reload_pending = true; + if( rdec->reload_pending && Rd_available_bytes( rdec ) >= 5 ) + { + int i; + rdec->reload_pending = false; + rdec->code = 0; + for( i = 0; i < 5; ++i ) + rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); + rdec->range = 0xFFFFFFFFU; + } + return !rdec->reload_pending; + } + +static inline void Rd_normalize( struct Range_decoder * const rdec ) + { + if( rdec->range <= 0x00FFFFFFU ) + { + rdec->range <<= 8; + rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); + } + } + +static inline int Rd_decode( struct Range_decoder * const rdec, + const int num_bits ) + { + int symbol = 0; + int i; + for( i = num_bits; i > 0; --i ) + { + uint32_t mask; + Rd_normalize( rdec ); + rdec->range >>= 1; +/* symbol <<= 1; */ +/* if( rdec->code >= rdec->range ) { rdec->code -= rdec->range; symbol |= 1; } */ + mask = 0U - (rdec->code < rdec->range); + rdec->code -= rdec->range; + rdec->code += rdec->range & mask; + symbol = (symbol << 1) + (mask + 1); + } + return symbol; + } + +static inline int Rd_decode_bit( struct Range_decoder * const rdec, + Bit_model * const probability ) + { + uint32_t bound; + Rd_normalize( rdec ); + bound = ( rdec->range >> bit_model_total_bits ) * *probability; + if( rdec->code < bound ) + { + rdec->range = bound; + *probability += (bit_model_total - *probability) >> bit_model_move_bits; + return 0; + } + else + { + rdec->range -= bound; + rdec->code -= bound; + *probability -= *probability >> bit_model_move_bits; + return 1; + } + } + +static inline int Rd_decode_tree( struct Range_decoder * const rdec, + Bit_model bm[], const int num_bits ) + { + int model = 1; + int i; + for( i = num_bits; i > 0; --i ) + model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); + return model - (1 << num_bits); + } + +static inline int Rd_decode_tree6( struct Range_decoder * const rdec, + Bit_model bm[] ) + { + int model = 1; + model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); + model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); + model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); + model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); + model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); + model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); + return model - (1 << 6); + } + +static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec, + Bit_model bm[], const int num_bits ) + { + int model = 1; + int symbol = 0; + int i; + for( i = 0; i < num_bits; ++i ) + { + const bool bit = Rd_decode_bit( rdec, &bm[model] ); + model <<= 1; + if( bit ) { ++model; symbol |= (1 << i); } + } + return symbol; + } + +static inline int Rd_decode_tree_reversed4( struct Range_decoder * const rdec, + Bit_model bm[] ) + { + int model = 1; + int symbol = 0; + int bit = Rd_decode_bit( rdec, &bm[model] ); + model = (model << 1) + bit; symbol |= bit; + bit = Rd_decode_bit( rdec, &bm[model] ); + model = (model << 1) + bit; symbol |= (bit << 1); + bit = Rd_decode_bit( rdec, &bm[model] ); + model = (model << 1) + bit; symbol |= (bit << 2); + if( Rd_decode_bit( rdec, &bm[model] ) ) symbol |= 8; + return symbol; + } + +static inline int Rd_decode_matched( struct Range_decoder * const rdec, + Bit_model bm[], int match_byte ) + { + Bit_model * const bm1 = bm + 0x100; + int symbol = 1; + int i; + for( i = 7; i >= 0; --i ) + { + int match_bit, bit; + match_byte <<= 1; + match_bit = match_byte & 0x100; + bit = Rd_decode_bit( rdec, &bm1[match_bit+symbol] ); + symbol = ( symbol << 1 ) | bit; + if( match_bit != bit << 8 ) + { + while( symbol < 0x100 ) + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + break; + } + } + return symbol - 0x100; + } + + +struct Len_decoder + { + Bit_model choice1; + Bit_model choice2; + Bit_model bm_low[pos_states][len_low_symbols]; + Bit_model bm_mid[pos_states][len_mid_symbols]; + Bit_model bm_high[len_high_symbols]; + }; + +static inline void Led_init( struct Len_decoder * const len_decoder ) + { + Bm_init( &len_decoder->choice1 ); + Bm_init( &len_decoder->choice2 ); + Bm_array_init( len_decoder->bm_low[0], pos_states * len_low_symbols ); + Bm_array_init( len_decoder->bm_mid[0], pos_states * len_mid_symbols ); + Bm_array_init( len_decoder->bm_high, len_high_symbols ); + } + +static inline int Led_decode( struct Len_decoder * const len_decoder, + struct Range_decoder * const rdec, + const int pos_state ) + { + if( Rd_decode_bit( rdec, &len_decoder->choice1 ) == 0 ) + return Rd_decode_tree( rdec, len_decoder->bm_low[pos_state], len_low_bits ); + if( Rd_decode_bit( rdec, &len_decoder->choice2 ) == 0 ) + return len_low_symbols + + Rd_decode_tree( rdec, len_decoder->bm_mid[pos_state], len_mid_bits ); + return len_low_symbols + len_mid_symbols + + Rd_decode_tree( rdec, len_decoder->bm_high, len_high_bits ); + } + + +enum { lzd_min_free_bytes = max_match_len }; + +struct LZ_decoder + { + struct Circular_buffer cb; + unsigned long long partial_data_pos; + int dictionary_size; + uint32_t crc; + int member_version; + bool member_finished; + bool verify_trailer_pending; + unsigned rep0; /* rep[0-3] latest four distances */ + unsigned rep1; /* used for efficient coding of */ + unsigned rep2; /* repeated distances */ + unsigned rep3; + State state; + + Bit_model bm_literal[1<cb ) >= lzd_min_free_bytes; } + +static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const decoder ) + { + const int i = + ( ( decoder->cb.put > 0 ) ? decoder->cb.put : decoder->cb.buffer_size ) - 1; + return decoder->cb.buffer[i]; + } + +static inline uint8_t LZd_get_byte( const struct LZ_decoder * const decoder, + const int distance ) + { + int i = decoder->cb.put - distance - 1; + if( i < 0 ) i += decoder->cb.buffer_size; + return decoder->cb.buffer[i]; + } + +static inline void LZd_put_byte( struct LZ_decoder * const decoder, + const uint8_t b ) + { + CRC32_update_byte( &decoder->crc, b ); + decoder->cb.buffer[decoder->cb.put] = b; + if( ++decoder->cb.put >= decoder->cb.buffer_size ) + { decoder->partial_data_pos += decoder->cb.put; decoder->cb.put = 0; } + } + +static inline void LZd_copy_block( struct LZ_decoder * const decoder, + const int distance, int len ) + { + int i = decoder->cb.put - distance - 1; + if( i < 0 ) i += decoder->cb.buffer_size; + if( len < decoder->cb.buffer_size - max( decoder->cb.put, i ) && + len <= abs( decoder->cb.put - i ) ) /* no wrap, no overlap */ + { + CRC32_update_buf( &decoder->crc, decoder->cb.buffer + i, len ); + memcpy( decoder->cb.buffer + decoder->cb.put, decoder->cb.buffer + i, len ); + decoder->cb.put += len; + } + else for( ; len > 0; --len ) + { + LZd_put_byte( decoder, decoder->cb.buffer[i] ); + if( ++i >= decoder->cb.buffer_size ) i = 0; + } + } + +static inline bool LZd_init( struct LZ_decoder * const decoder, + const File_header header, + struct Range_decoder * const rdec ) + { + decoder->dictionary_size = Fh_get_dictionary_size( header ); + if( !Cb_init( &decoder->cb, max( 65536, decoder->dictionary_size ) + lzd_min_free_bytes ) ) + return false; + decoder->partial_data_pos = 0; + decoder->crc = 0xFFFFFFFFU; + decoder->member_version = Fh_version( header ); + decoder->member_finished = false; + decoder->verify_trailer_pending = false; + decoder->rep0 = 0; + decoder->rep1 = 0; + decoder->rep2 = 0; + decoder->rep3 = 0; + decoder->state = 0; + + Bm_array_init( decoder->bm_literal[0], (1 << literal_context_bits) * 0x300 ); + Bm_array_init( decoder->bm_match[0], states * pos_states ); + Bm_array_init( decoder->bm_rep, states ); + Bm_array_init( decoder->bm_rep0, states ); + Bm_array_init( decoder->bm_rep1, states ); + Bm_array_init( decoder->bm_rep2, states ); + Bm_array_init( decoder->bm_len[0], states * pos_states ); + Bm_array_init( decoder->bm_dis_slot[0], max_dis_states * (1 << dis_slot_bits) ); + Bm_array_init( decoder->bm_dis, modeled_distances - end_dis_model ); + Bm_array_init( decoder->bm_align, dis_align_size ); + + decoder->range_decoder = rdec; + Led_init( &decoder->len_decoder ); + Led_init( &decoder->rep_match_len_decoder ); + decoder->cb.buffer[decoder->cb.buffer_size-1] = 0; /* prev_byte of first_byte */ + return true; + } + +static inline void LZd_free( struct LZ_decoder * const decoder ) + { Cb_free( &decoder->cb ); } + +static inline bool LZd_member_finished( const struct LZ_decoder * const decoder ) + { return ( decoder->member_finished && !Cb_used_bytes( &decoder->cb ) ); } + +static inline unsigned LZd_crc( const struct LZ_decoder * const decoder ) + { return decoder->crc ^ 0xFFFFFFFFU; } + +static inline unsigned long long +LZd_data_position( const struct LZ_decoder * const decoder ) + { return decoder->partial_data_pos + decoder->cb.put; } diff --git a/doc/lzlib.info b/doc/lzlib.info index bfa5a82..511fda5 100644 --- a/doc/lzlib.info +++ b/doc/lzlib.info @@ -12,7 +12,7 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir) Lzlib Manual ************ -This manual is for Lzlib (version 1.3, 29 February 2012). +This manual is for Lzlib (version 1.4-rc2, 7 February 2013). * Menu: @@ -30,7 +30,7 @@ This manual is for Lzlib (version 1.3, 29 February 2012). * Concept Index:: Index of concepts - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -113,7 +113,7 @@ minimum sizes: * Input compression buffer. Written to by the `LZ_compress_write' function. Its size is two times the dictionary size set with the - `LZ_compress_open' function or 128KiB, whichever is larger. + `LZ_compress_open' function or 64KiB, whichever is larger. * Output compression buffer. Read from by the `LZ_compress_read' function. Its size is 64KiB. @@ -161,12 +161,13 @@ File: lzlib.info, Node: Compression Functions, Next: Decompression Functions, *********************** These are the functions used to compress data. In case of error, all of -them return -1, except `LZ_compress_open' whose return value must be -verified by calling `LZ_compress_errno' before using it. +them return -1 or 0, for signed and unsigned return values respectively, +except `LZ_compress_open' whose return value must be verified by +calling `LZ_compress_errno' before using it. -- Function: struct LZ_Encoder * LZ_compress_open ( const int - DICTIONARY_SIZE, const int MATCH_LEN_LIMIT, const long long - MEMBER_SIZE ) + DICTIONARY_SIZE, const int MATCH_LEN_LIMIT, const unsigned + long long MEMBER_SIZE ) Initializes the internal stream state for compression and returns a pointer that can only be used as the ENCODER argument for the other LZ_compress functions, or a null pointer if the encoder @@ -191,7 +192,7 @@ verified by calling `LZ_compress_errno' before using it. size limit is 100kB. Small member size may degrade compression ratio, so use it only when needed. To produce a single-member data stream, give MEMBER_SIZE a value larger than the amount of data to - be produced, for example LLONG_MAX. + be produced, for example INT64_MAX. -- Function: int LZ_compress_close ( struct LZ_Encoder * const ENCODER ) @@ -209,7 +210,7 @@ verified by calling `LZ_compress_errno' before using it. new member can be started with `LZ_compress_restart_member'. -- Function: int LZ_compress_restart_member ( struct LZ_Encoder * - const ENCODER, const long long MEMBER_SIZE ) + const ENCODER, const unsigned long long MEMBER_SIZE ) Use this function to start a new member, in a multi-member data stream. Call this function only after `LZ_compress_member_finished' indicates that the current member @@ -245,10 +246,10 @@ verified by calling `LZ_compress_errno' before using it. -- Function: int LZ_compress_write_size ( struct LZ_Encoder * const ENCODER ) The `LZ_compress_write_size' function returns the maximum number of - bytes that can be inmediately written through the + bytes that can be immediately written through the `LZ_compress_write' function. - It is guaranteed that an inmediate call to `LZ_compress_write' will + It is guaranteed that an immediate call to `LZ_compress_write' will accept a SIZE up to the returned number of bytes. -- Function: enum LZ_Errno LZ_compress_errno ( struct LZ_Encoder * @@ -266,22 +267,22 @@ verified by calling `LZ_compress_errno' before using it. has been fully read and `LZ_compress_restart_member' can be safely called. Otherwise it returns 0. - -- Function: long long LZ_compress_data_position ( struct LZ_Encoder * - const ENCODER ) + -- Function: unsigned long long LZ_compress_data_position ( struct + LZ_Encoder * const ENCODER ) Returns the number of input bytes already compressed in the current member. - -- Function: long long LZ_compress_member_position ( struct LZ_Encoder - * const ENCODER ) + -- Function: unsigned long long LZ_compress_member_position ( struct + LZ_Encoder * const ENCODER ) Returns the number of compressed bytes already produced, but perhaps not yet read, in the current member. - -- Function: long long LZ_compress_total_in_size ( struct LZ_Encoder * - const ENCODER ) + -- Function: unsigned long long LZ_compress_total_in_size ( struct + LZ_Encoder * const ENCODER ) Returns the total number of input bytes already compressed. - -- Function: long long LZ_compress_total_out_size ( struct LZ_Encoder - * const ENCODER ) + -- Function: unsigned long long LZ_compress_total_out_size ( struct + LZ_Encoder * const ENCODER ) Returns the total number of compressed bytes already produced, but perhaps not yet read. @@ -292,8 +293,9 @@ File: lzlib.info, Node: Decompression Functions, Next: Error Codes, Prev: Com ************************* These are the functions used to decompress data. In case of error, all -of them return -1, except `LZ_decompress_open' whose return value must -be verified by calling `LZ_decompress_errno' before using it. +of them return -1 or 0, for signed and unsigned return values +respectively, except `LZ_decompress_open' whose return value must be +verified by calling `LZ_decompress_errno' before using it. -- Function: struct LZ_Decoder * LZ_decompress_open ( void ) Initializes the internal stream state for decompression and @@ -360,10 +362,10 @@ be verified by calling `LZ_decompress_errno' before using it. -- Function: int LZ_decompress_write_size ( struct LZ_Decoder * const DECODER ) The `LZ_decompress_write_size' function returns the maximum number - of bytes that can be inmediately written through the + of bytes that can be immediately written through the `LZ_decompress_write' function. - It is guaranteed that an inmediate call to `LZ_decompress_write' + It is guaranteed that an immediate call to `LZ_decompress_write' will accept a SIZE up to the returned number of bytes. -- Function: enum LZ_Errno LZ_decompress_errno ( struct LZ_Decoder * @@ -391,27 +393,27 @@ be verified by calling `LZ_decompress_errno' before using it. const DECODER ) Returns the dictionary size of current member from member header. - -- Function: unsigned int LZ_decompress_data_crc ( struct LZ_Decoder * + -- Function: unsigned LZ_decompress_data_crc ( struct LZ_Decoder * const DECODER ) Returns the 32 bit Cyclic Redundancy Check of the data decompressed from the current member. The returned value is valid only when `LZ_decompress_member_finished' returns 1. - -- Function: long long LZ_decompress_data_position ( struct LZ_Decoder - * const DECODER ) + -- Function: unsigned long long LZ_decompress_data_position ( struct + LZ_Decoder * const DECODER ) Returns the number of decompressed bytes already produced, but perhaps not yet read, in the current member. - -- Function: long long LZ_decompress_member_position ( struct + -- Function: unsigned long long LZ_decompress_member_position ( struct LZ_Decoder * const DECODER ) Returns the number of input bytes already decompressed in the current member. - -- Function: long long LZ_decompress_total_in_size ( struct LZ_Decoder - * const DECODER ) + -- Function: unsigned long long LZ_decompress_total_in_size ( struct + LZ_Decoder * const DECODER ) Returns the total number of input bytes already decompressed. - -- Function: long long LZ_decompress_total_out_size ( struct + -- Function: unsigned long long LZ_decompress_total_out_size ( struct LZ_Decoder * const DECODER ) Returns the total number of decompressed bytes already produced, but perhaps not yet read. @@ -490,7 +492,12 @@ File: lzlib.info, Node: Data Format, Next: Examples, Prev: Error Messages, U 9 Data Format ************* -In the diagram below, a box like this: +Perfection is reached, not when there is no longer anything to add, but +when there is no longer anything to take away. +-- Antoine de Saint-Exupery + + + In the diagram below, a box like this: +---+ | | <-- the vertical bars might be missing +---+ @@ -519,9 +526,8 @@ with no additional information before, between, or after them. "LZIP". `VN (version number, 1 byte)' - Just in case something needs to be modified in the future. Valid - values are 0 and 1. Version 0 files are deprecated. They can - contain only one member and lack the `Member size' field. + Just in case something needs to be modified in the future. 1 for + now. `DS (coded dictionary size, 1 byte)' Bits 4-0 contain the base 2 logarithm of the base dictionary size. @@ -541,9 +547,9 @@ with no additional information before, between, or after them. Size of the uncompressed original data. `Member size (8 bytes)' - Total size of the member, including header and trailer. This - facilitates safe recovery of undamaged members from multi-member - files. + Total size of the member, including header and trailer. This field + acts as a distributed index, and facilitates safe recovery of + undamaged members from multi-member files.  @@ -709,18 +715,18 @@ Concept Index  Tag Table: Node: Top219 -Node: Introduction1318 -Node: Library Version3164 -Node: Buffering3809 -Node: Parameter Limits4929 -Node: Compression Functions5886 -Node: Decompression Functions11985 -Node: Error Codes18057 -Node: Error Messages19996 -Node: Data Format20575 -Node: Examples22584 -Node: Problems26667 -Node: Concept Index27239 +Node: Introduction1327 +Node: Library Version3173 +Node: Buffering3818 +Node: Parameter Limits4937 +Node: Compression Functions5894 +Node: Decompression Functions12104 +Node: Error Codes18265 +Node: Error Messages20204 +Node: Data Format20783 +Node: Examples22864 +Node: Problems26947 +Node: Concept Index27519  End Tag Table diff --git a/doc/lzlib.texinfo b/doc/lzlib.texinfo index 17b78f1..eeae174 100644 --- a/doc/lzlib.texinfo +++ b/doc/lzlib.texinfo @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 29 February 2012 -@set VERSION 1.3 +@set UPDATED 7 February 2013 +@set VERSION 1.4-rc2 @dircategory Data Compression @direntry @@ -50,7 +50,7 @@ This manual is for Lzlib (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2009, 2010, 2011, 2012 Antonio Diaz Diaz. +Copyright @copyright{} 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -134,7 +134,7 @@ sizes: @itemize @bullet @item Input compression buffer. Written to by the @samp{LZ_compress_write} function. Its size is two times the dictionary -size set with the @samp{LZ_compress_open} function or 128KiB, whichever +size set with the @samp{LZ_compress_open} function or 64KiB, whichever is larger. @item Output compression buffer. Read from by the @@ -187,11 +187,12 @@ Returns the largest valid match length limit [273]. @cindex compression functions These are the functions used to compress data. In case of error, all of -them return -1, except @samp{LZ_compress_open} whose return value must -be verified by calling @samp{LZ_compress_errno} before using it. +them return -1 or 0, for signed and unsigned return values respectively, +except @samp{LZ_compress_open} whose return value must be verified by +calling @samp{LZ_compress_errno} before using it. -@deftypefun {struct LZ_Encoder *} LZ_compress_open ( const int @var{dictionary_size}, const int @var{match_len_limit}, const long long @var{member_size} ) +@deftypefun {struct LZ_Encoder *} LZ_compress_open ( const int @var{dictionary_size}, const int @var{match_len_limit}, const unsigned long long @var{member_size} ) Initializes the internal stream state for compression and returns a pointer that can only be used as the @var{encoder} argument for the other LZ_compress functions, or a null pointer if the encoder could not @@ -216,7 +217,7 @@ ratios but longer compression times. size limit is 100kB. Small member size may degrade compression ratio, so use it only when needed. To produce a single-member data stream, give @var{member_size} a value larger than the amount of data to be produced, -for example LLONG_MAX. +for example INT64_MAX. @end deftypefun @@ -237,7 +238,7 @@ After all the produced compressed data has been read with @end deftypefun -@deftypefun int LZ_compress_restart_member ( struct LZ_Encoder * const @var{encoder}, const long long @var{member_size} ) +@deftypefun int LZ_compress_restart_member ( struct LZ_Encoder * const @var{encoder}, const unsigned long long @var{member_size} ) Use this function to start a new member, in a multi-member data stream. Call this function only after @samp{LZ_compress_member_finished} indicates that the current member has been fully read (with the @@ -278,10 +279,10 @@ not an error. @deftypefun int LZ_compress_write_size ( struct LZ_Encoder * const @var{encoder} ) The @samp{LZ_compress_write_size} function returns the maximum number of -bytes that can be inmediately written through the @samp{LZ_compress_write} +bytes that can be immediately written through the @samp{LZ_compress_write} function. -It is guaranteed that an inmediate call to @samp{LZ_compress_write} will +It is guaranteed that an immediate call to @samp{LZ_compress_write} will accept a @var{size} up to the returned number of bytes. @end deftypefun @@ -304,24 +305,24 @@ Otherwise it returns 0. @end deftypefun -@deftypefun {long long} LZ_compress_data_position ( struct LZ_Encoder * const @var{encoder} ) +@deftypefun {unsigned long long} LZ_compress_data_position ( struct LZ_Encoder * const @var{encoder} ) Returns the number of input bytes already compressed in the current member. @end deftypefun -@deftypefun {long long} LZ_compress_member_position ( struct LZ_Encoder * const @var{encoder} ) +@deftypefun {unsigned long long} LZ_compress_member_position ( struct LZ_Encoder * const @var{encoder} ) Returns the number of compressed bytes already produced, but perhaps not yet read, in the current member. @end deftypefun -@deftypefun {long long} LZ_compress_total_in_size ( struct LZ_Encoder * const @var{encoder} ) +@deftypefun {unsigned long long} LZ_compress_total_in_size ( struct LZ_Encoder * const @var{encoder} ) Returns the total number of input bytes already compressed. @end deftypefun -@deftypefun {long long} LZ_compress_total_out_size ( struct LZ_Encoder * const @var{encoder} ) +@deftypefun {unsigned long long} LZ_compress_total_out_size ( struct LZ_Encoder * const @var{encoder} ) Returns the total number of compressed bytes already produced, but perhaps not yet read. @end deftypefun @@ -332,8 +333,9 @@ perhaps not yet read. @cindex decompression functions These are the functions used to decompress data. In case of error, all -of them return -1, except @samp{LZ_decompress_open} whose return value -must be verified by calling @samp{LZ_decompress_errno} before using it. +of them return -1 or 0, for signed and unsigned return values +respectively, except @samp{LZ_decompress_open} whose return value must +be verified by calling @samp{LZ_decompress_errno} before using it. @deftypefun {struct LZ_Decoder *} LZ_decompress_open ( void ) @@ -410,10 +412,10 @@ not an error. @deftypefun int LZ_decompress_write_size ( struct LZ_Decoder * const @var{decoder} ) The @samp{LZ_decompress_write_size} function returns the maximum number -of bytes that can be inmediately written through the +of bytes that can be immediately written through the @samp{LZ_decompress_write} function. -It is guaranteed that an inmediate call to @samp{LZ_decompress_write} +It is guaranteed that an immediate call to @samp{LZ_decompress_write} will accept a @var{size} up to the returned number of bytes. @end deftypefun @@ -448,31 +450,31 @@ Returns the dictionary size of current member from member header. @end deftypefun -@deftypefun {unsigned int} LZ_decompress_data_crc ( struct LZ_Decoder * const @var{decoder} ) +@deftypefun {unsigned} LZ_decompress_data_crc ( struct LZ_Decoder * const @var{decoder} ) Returns the 32 bit Cyclic Redundancy Check of the data decompressed from the current member. The returned value is valid only when @samp{LZ_decompress_member_finished} returns 1. @end deftypefun -@deftypefun {long long} LZ_decompress_data_position ( struct LZ_Decoder * const @var{decoder} ) +@deftypefun {unsigned long long} LZ_decompress_data_position ( struct LZ_Decoder * const @var{decoder} ) Returns the number of decompressed bytes already produced, but perhaps not yet read, in the current member. @end deftypefun -@deftypefun {long long} LZ_decompress_member_position ( struct LZ_Decoder * const @var{decoder} ) +@deftypefun {unsigned long long} LZ_decompress_member_position ( struct LZ_Decoder * const @var{decoder} ) Returns the number of input bytes already decompressed in the current member. @end deftypefun -@deftypefun {long long} LZ_decompress_total_in_size ( struct LZ_Decoder * const @var{decoder} ) +@deftypefun {unsigned long long} LZ_decompress_total_in_size ( struct LZ_Decoder * const @var{decoder} ) Returns the total number of input bytes already decompressed. @end deftypefun -@deftypefun {long long} LZ_decompress_total_out_size ( struct LZ_Decoder * const @var{decoder} ) +@deftypefun {unsigned long long} LZ_decompress_total_out_size ( struct LZ_Decoder * const @var{decoder} ) Returns the total number of decompressed bytes already produced, but perhaps not yet read. @end deftypefun @@ -555,6 +557,11 @@ The value of @var{lz_errno} normally comes from a call to @chapter Data Format @cindex data format +Perfection is reached, not when there is no longer anything to add, but +when there is no longer anything to take away.@* +--- Antoine de Saint-Exupery + +@sp 1 In the diagram below, a box like this: @verbatim +---+ @@ -590,9 +597,7 @@ All multibyte values are stored in little endian order. A four byte string, identifying the lzip format, with the value "LZIP". @item VN (version number, 1 byte) -Just in case something needs to be modified in the future. Valid values -are 0 and 1. Version 0 files are deprecated. They can contain only one -member and lack the @samp{Member size} field. +Just in case something needs to be modified in the future. 1 for now. @item DS (coded dictionary size, 1 byte) Bits 4-0 contain the base 2 logarithm of the base dictionary size.@* @@ -612,8 +617,9 @@ CRC of the uncompressed original data. Size of the uncompressed original data. @item Member size (8 bytes) -Total size of the member, including header and trailer. This facilitates -safe recovery of undamaged members from multi-member files. +Total size of the member, including header and trailer. This field acts +as a distributed index, and facilitates safe recovery of undamaged +members from multi-member files. @end table diff --git a/doc/minilzip.1 b/doc/minilzip.1 new file mode 100644 index 0000000..a7f6d21 --- /dev/null +++ b/doc/minilzip.1 @@ -0,0 +1,84 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. +.TH MINILZIP "1" "February 2013" "Minilzip 1.4-rc2" "User Commands" +.SH NAME +Minilzip \- reduces the size of files +.SH SYNOPSIS +.B minilzip +[\fIoptions\fR] [\fIfiles\fR] +.SH DESCRIPTION +Minilzip \- A test program for the lzlib library. +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-b\fR, \fB\-\-member\-size=\fR +set member size limit in bytes +.TP +\fB\-c\fR, \fB\-\-stdout\fR +send output to standard output +.TP +\fB\-d\fR, \fB\-\-decompress\fR +decompress +.TP +\fB\-f\fR, \fB\-\-force\fR +overwrite existing output files +.TP +\fB\-F\fR, \fB\-\-recompress\fR +force recompression of compressed files +.TP +\fB\-k\fR, \fB\-\-keep\fR +keep (don't delete) input files +.TP +\fB\-m\fR, \fB\-\-match\-length=\fR +set match length limit in bytes [36] +.TP +\fB\-o\fR, \fB\-\-output=\fR +if reading stdin, place the output into +.TP +\fB\-q\fR, \fB\-\-quiet\fR +suppress all messages +.TP +\fB\-s\fR, \fB\-\-dictionary\-size=\fR +set dictionary size limit in bytes [8MiB] +.TP +\fB\-S\fR, \fB\-\-volume\-size=\fR +set volume size limit in bytes +.TP +\fB\-t\fR, \fB\-\-test\fR +test compressed file integrity +.TP +\fB\-v\fR, \fB\-\-verbose\fR +be verbose (a 2nd \fB\-v\fR gives more) +.TP +\fB\-1\fR .. \fB\-9\fR +set compression level [default 6] +.TP +\fB\-\-fast\fR +alias for \fB\-1\fR +.TP +\fB\-\-best\fR +alias for \fB\-9\fR +.PP +If no file names are given, minilzip compresses or decompresses +from standard input to standard output. +Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, +Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... +The bidimensional parameter space of LZMA can't be mapped to a linear +scale optimal for all files. If your files are large, very repetitive, +etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR +options directly to achieve optimal performance. +.SH "REPORTING BUGS" +Report bugs to lzip\-bug@nongnu.org +.br +Lzlib home page: http://www.nongnu.org/lzip/lzlib.html +.SH COPYRIGHT +Copyright \(co 2013 Antonio Diaz Diaz. +Using Lzlib 1.4\-rc2 +License GPLv3+: GNU GPL version 3 or later +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. diff --git a/encoder.c b/encoder.c index bf0dafa..7b417ba 100644 --- a/encoder.c +++ b/encoder.c @@ -1,5 +1,5 @@ /* Lzlib - A compression library for lzip files - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,121 +25,6 @@ Public License. */ -enum { max_num_trials = 1 << 12, - price_shift = 6 }; - -static inline int price0( const Bit_model probability ) - { return get_price( probability ); } - -static inline int price1( const Bit_model probability ) - { return get_price( bit_model_total-probability ); } - -static inline int price_bit( const Bit_model bm, const int bit ) - { if( bit ) return price1( bm ); else return price0( bm ); } - - -static inline int price_symbol( const Bit_model bm[], int symbol, - const int num_bits ) - { - int price = 0; - symbol |= ( 1 << num_bits ); - while( symbol > 1 ) - { - const int bit = symbol & 1; - symbol >>= 1; - price += price_bit( bm[symbol], bit ); - } - return price; - } - - -static inline int price_symbol_reversed( const Bit_model bm[], int symbol, - const int num_bits ) - { - int price = 0; - int model = 1; - int i; - for( i = num_bits; i > 0; --i ) - { - const int bit = symbol & 1; - symbol >>= 1; - price += price_bit( bm[model], bit ); - model = ( model << 1 ) | bit; - } - return price; - } - - -static inline int price_matched( const Bit_model bm[], const int symbol, - const int match_byte ) - { - int price = 0; - int model = 1; - int i; - - for( i = 7; i >= 0; --i ) - { - const int match_bit = ( match_byte >> i ) & 1; - int bit = ( symbol >> i ) & 1; - price += price_bit( bm[(match_bit<<8)+model+0x100], bit ); - model = ( model << 1 ) | bit; - if( match_bit != bit ) - { - while( --i >= 0 ) - { - bit = ( symbol >> i ) & 1; - price += price_bit( bm[model], bit ); - model = ( model << 1 ) | bit; - } - break; - } - } - return price; - } - - -enum { /* bytes to keep in buffer before dictionary */ - before_size = max_num_trials + 1, - /* bytes to keep in buffer after pos */ - after_size = max_num_trials + max_match_len, - num_prev_positions4 = 1 << 20, - num_prev_positions3 = 1 << 18, - num_prev_positions2 = 1 << 16, - num_prev_positions = num_prev_positions4 + num_prev_positions3 + - num_prev_positions2 }; - -struct Matchfinder - { - long long partial_data_pos; - uint8_t * buffer; /* input buffer */ - int32_t * prev_positions; /* last seen position of key */ - int32_t * prev_pos_tree; - int dictionary_size; /* bytes to keep in buffer before pos */ - int buffer_size; - int pos; /* current pos in buffer */ - int cyclic_pos; /* current pos in dictionary */ - int stream_pos; /* first byte not yet read from file */ - int pos_limit; /* when reached, a new block must be read */ - int match_len_limit; - int cycles; - bool at_stream_end; /* stream_pos shows real end of file */ - bool been_flushed; - }; - - -static int Mf_write_data( struct Matchfinder * const mf, - const uint8_t * const inbuf, const int size ) - { - const int sz = min( mf->buffer_size - mf->stream_pos, size ); - if( !mf->at_stream_end && sz > 0 ) - { - memcpy( mf->buffer + mf->stream_pos, inbuf, sz ); - mf->stream_pos += sz; - } - return sz; - } - - static bool Mf_normalize_pos( struct Matchfinder * const mf ) { if( mf->pos > mf->stream_pos ) @@ -153,9 +38,9 @@ static bool Mf_normalize_pos( struct Matchfinder * const mf ) mf->partial_data_pos += offset; mf->pos -= offset; mf->stream_pos -= offset; - for( i = 0; i < num_prev_positions; ++i ) + for( i = 0; i < mf->num_prev_positions; ++i ) if( mf->prev_positions[i] >= 0 ) mf->prev_positions[i] -= offset; - for( i = 0; i < 2 * mf->dictionary_size; ++i ) + for( i = 0; i < 2 * ( mf->dictionary_size + 1 ); ++i ) if( mf->prev_pos_tree[i] >= 0 ) mf->prev_pos_tree[i] -= offset; } return true; @@ -163,101 +48,60 @@ static bool Mf_normalize_pos( struct Matchfinder * const mf ) static bool Mf_init( struct Matchfinder * const mf, - const int dict_size, const int len_limit ) + const int dict_size, const int match_len_limit ) { - int i; + const int buffer_size_limit = ( 2 * dict_size ) + before_size + after_size; + int i, size; + mf->partial_data_pos = 0; - mf->dictionary_size = dict_size; - mf->buffer_size = ( 2 * max( 65536, dict_size ) ) + before_size + after_size; - mf->buffer = (uint8_t *)malloc( mf->buffer_size ); - mf->prev_positions = (int32_t *)malloc( num_prev_positions * sizeof (int32_t) ); - mf->prev_pos_tree = (int32_t *)malloc( 2 * dict_size * sizeof (int32_t) ); + mf->match_len_limit = match_len_limit; mf->pos = 0; mf->cyclic_pos = 0; mf->stream_pos = 0; - mf->pos_limit = mf->buffer_size - after_size; - mf->match_len_limit = len_limit; - mf->cycles = ( len_limit < max_match_len ) ? 16 + ( len_limit / 2 ) : 256; + mf->cycles = ( match_len_limit < max_match_len ) ? + 16 + ( match_len_limit / 2 ) : 256; mf->at_stream_end = false; mf->been_flushed = false; - if( !mf->buffer || !mf->prev_positions || !mf->prev_pos_tree ) - { - if( mf->prev_pos_tree ) - { free( mf->prev_pos_tree ); mf->prev_pos_tree = 0; } - if( mf->prev_positions ) - { free( mf->prev_positions ); mf->prev_positions = 0; } - if( mf->buffer ) { free( mf->buffer ); mf->buffer = 0; } - return false; - } - for( i = 0; i < num_prev_positions; ++i ) mf->prev_positions[i] = -1; + mf->buffer_size = max( 65536, buffer_size_limit ); + mf->buffer = (uint8_t *)malloc( mf->buffer_size ); + if( !mf->buffer ) return false; + mf->dictionary_size = dict_size; + mf->pos_limit = mf->buffer_size - after_size; + size = 1 << max( 16, real_bits( mf->dictionary_size - 1 ) - 2 ); + if( mf->dictionary_size > 1 << 26 ) + size >>= 1; + mf->key4_mask = size - 1; + size += num_prev_positions2; + size += num_prev_positions3; + + mf->num_prev_positions = size; + size += ( 2 * ( mf->dictionary_size + 1 ) ); + mf->prev_positions = (int32_t *)malloc( size * sizeof (int32_t) ); + if( !mf->prev_positions ) { free( mf->buffer ); return false; } + mf->prev_pos_tree = mf->prev_positions + mf->num_prev_positions; + for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = -1; return true; } -static inline void Mf_free( struct Matchfinder * const mf ) +static void Mf_adjust_distionary_size( struct Matchfinder * const mf ) { - free( mf->prev_pos_tree ); mf->prev_pos_tree = 0; - free( mf->prev_positions ); mf->prev_positions = 0; - free( mf->buffer ); mf->buffer = 0; - } - -static inline uint8_t Mf_peek( const struct Matchfinder * const mf, const int i ) - { return mf->buffer[mf->pos+i]; } - -static inline int Mf_available_bytes( const struct Matchfinder * const mf ) - { return mf->stream_pos - mf->pos; } - -static inline long long Mf_data_position( const struct Matchfinder * const mf ) - { return mf->partial_data_pos + mf->pos; } - -static inline bool Mf_finished( const struct Matchfinder * const mf ) - { return mf->at_stream_end && mf->pos >= mf->stream_pos; } - -static inline const uint8_t * Mf_ptr_to_current_pos( const struct Matchfinder * const mf ) - { return mf->buffer + mf->pos; } - -static inline void Mf_set_flushing( struct Matchfinder * const mf, - const bool flushing ) - { mf->at_stream_end = flushing; } - -static inline int Mf_free_bytes( const struct Matchfinder * const mf ) - { if( mf->at_stream_end ) return 0; return mf->buffer_size - mf->stream_pos; } - -static inline bool Mf_enough_available_bytes( const struct Matchfinder * const mf ) - { - return ( mf->pos + after_size <= mf->stream_pos || - ( mf->at_stream_end && mf->pos < mf->stream_pos ) ); - } - -static inline bool Mf_dec_pos( struct Matchfinder * const mf, - const int ahead ) - { - if( ahead < 0 || mf->pos < ahead ) return false; - mf->pos -= ahead; - mf->cyclic_pos -= ahead; - if( mf->cyclic_pos < 0 ) mf->cyclic_pos += mf->dictionary_size; - return true; - } - -static inline int Mf_true_match_len( const struct Matchfinder * const mf, - const int index, const int distance, - int len_limit ) - { - const uint8_t * const data = mf->buffer + mf->pos + index; - int i = 0; - - if( index + len_limit > Mf_available_bytes( mf ) ) - len_limit = Mf_available_bytes( mf ) - index; - while( i < len_limit && data[i-distance] == data[i] ) ++i; - return i; - } - -static inline bool Mf_move_pos( struct Matchfinder * const mf ) - { - if( ++mf->cyclic_pos >= mf->dictionary_size ) mf->cyclic_pos = 0; - if( ++mf->pos >= mf->pos_limit ) return Mf_normalize_pos( mf ); - return true; + if( mf->stream_pos < mf->dictionary_size ) + { + int size; + mf->buffer_size = + mf->dictionary_size = + mf->pos_limit = max( min_dictionary_size, mf->stream_pos ); + size = 1 << max( 16, real_bits( mf->dictionary_size - 1 ) - 2 ); + if( mf->dictionary_size > 1 << 26 ) + size >>= 1; + mf->key4_mask = size - 1; + size += num_prev_positions2; + size += num_prev_positions3; + mf->num_prev_positions = size; + mf->prev_pos_tree = mf->prev_positions + mf->num_prev_positions; + } } @@ -272,23 +116,23 @@ static void Mf_reset( struct Matchfinder * const mf ) mf->cyclic_pos = 0; mf->at_stream_end = false; mf->been_flushed = false; - for( i = 0; i < num_prev_positions; ++i ) mf->prev_positions[i] = -1; + for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = -1; } -static int Mf_longest_match_len( struct Matchfinder * const mf, - int * const distances ) +static int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs ) { int32_t * ptr0 = mf->prev_pos_tree + ( mf->cyclic_pos << 1 ); int32_t * ptr1 = ptr0 + 1; int32_t * newptr; - const uint8_t * newdata; int len = 0, len0 = 0, len1 = 0; int maxlen = min_match_len - 1; - const int min_pos = (mf->pos >= mf->dictionary_size) ? - (mf->pos - mf->dictionary_size + 1) : 0; + int num_pairs = 0; + const int min_pos = (mf->pos > mf->dictionary_size) ? + mf->pos - mf->dictionary_size : 0; const uint8_t * const data = mf->buffer + mf->pos; - int count, delta, key2, key3, key4, newpos, tmp; + int count, delta, key2, key3, key4, newpos; + unsigned tmp; int len_limit = mf->match_len_limit; if( len_limit > Mf_available_bytes( mf ) ) @@ -298,24 +142,39 @@ static int Mf_longest_match_len( struct Matchfinder * const mf, if( len_limit < 4 ) { *ptr0 = *ptr1 = -1; return 0; } } - key2 = num_prev_positions4 + num_prev_positions3 + - ( ( (int)data[0] << 8 ) | data[1] ); - tmp = crc32[data[0]] ^ data[1] ^ ( (uint32_t)data[2] << 8 ); - key3 = num_prev_positions4 + (int)( tmp & ( num_prev_positions3 - 1 ) ); - key4 = (int)( ( tmp ^ ( crc32[data[3]] << 5 ) ) & - ( num_prev_positions4 - 1 ) ); + tmp = crc32[data[0]] ^ data[1]; + key2 = tmp & ( num_prev_positions2 - 1 ); + tmp ^= (uint32_t)data[2] << 8; + key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) ); + key4 = num_prev_positions2 + num_prev_positions3 + + ( ( tmp ^ ( crc32[data[3]] << 5 ) ) & mf->key4_mask ); - if( distances ) + if( pairs ) { - int np = mf->prev_positions[key2]; - if( np >= min_pos ) - { distances[2] = mf->pos - np - 1; maxlen = 2; } - else distances[2] = 0x7FFFFFFF; - np = mf->prev_positions[key3]; - if( np >= min_pos && mf->buffer[np] == data[0] ) - { distances[3] = mf->pos - np - 1; maxlen = 3; } - else distances[3] = 0x7FFFFFFF; - distances[4] = 0x7FFFFFFF; + int np2 = mf->prev_positions[key2]; + int np3 = mf->prev_positions[key3]; + if( np2 >= min_pos && mf->buffer[np2] == data[0] ) + { + pairs[0].dis = mf->pos - np2 - 1; + pairs[0].len = maxlen = 2; + num_pairs = 1; + } + if( np2 != np3 && np3 >= min_pos && mf->buffer[np3] == data[0] ) + { + maxlen = 3; + pairs[num_pairs].dis = mf->pos - np3 - 1; + ++num_pairs; + np2 = np3; + } + if( num_pairs > 0 ) + { + delta = mf->pos - np2; + while( maxlen < len_limit && data[maxlen-delta] == data[maxlen] ) + ++maxlen; + pairs[num_pairs-1].len = maxlen; + if( maxlen >= len_limit ) pairs = 0; + } + if( maxlen < 3 ) maxlen = 3; } mf->prev_positions[key2] = mf->pos; @@ -326,249 +185,44 @@ static int Mf_longest_match_len( struct Matchfinder * const mf, for( count = mf->cycles; ; ) { if( newpos < min_pos || --count < 0 ) { *ptr0 = *ptr1 = -1; break; } - newdata = mf->buffer + newpos; - if( mf->been_flushed ) len = 0; - while( len < len_limit && newdata[len] == data[len] ) ++len; + if( mf->been_flushed ) len = 0; delta = mf->pos - newpos; - if( distances ) while( maxlen < len ) distances[++maxlen] = delta - 1; - newptr = mf->prev_pos_tree + ( ( mf->cyclic_pos - delta + - ( ( mf->cyclic_pos >= delta ) ? 0 : mf->dictionary_size ) ) << 1 ); - - if( len < len_limit ) + ( (mf->cyclic_pos >= delta) ? 0 : mf->dictionary_size + 1 ) ) << 1 ); + if( data[len-delta] == data[len] ) { - if( newdata[len] < data[len] ) + while( ++len < len_limit && data[len-delta] == data[len] ) {} + if( pairs && maxlen < len ) { - *ptr0 = newpos; - ptr0 = newptr + 1; - newpos = *ptr0; - len0 = len; if( len1 < len ) len = len1; + pairs[num_pairs].dis = delta - 1; + pairs[num_pairs].len = maxlen = len; + ++num_pairs; } - else + if( len >= len_limit ) { - *ptr1 = newpos; - ptr1 = newptr; - newpos = *ptr1; - len1 = len; if( len0 < len ) len = len0; + *ptr0 = newptr[0]; + *ptr1 = newptr[1]; + break; } } - else + if( data[len-delta] < data[len] ) { - *ptr0 = newptr[0]; - *ptr1 = newptr[1]; - break; + *ptr0 = newpos; + ptr0 = newptr + 1; + newpos = *ptr0; + len0 = len; if( len1 < len ) len = len1; } - } - if( distances ) - { - if( distances[3] > distances[4] ) distances[3] = distances[4]; - if( distances[2] > distances[3] ) distances[2] = distances[3]; - } - return maxlen; - } - - -enum { re_min_free_bytes = 2 * max_num_trials }; - -struct Range_encoder - { - struct Circular_buffer cb; - uint64_t low; - long long partial_member_pos; - uint32_t range; - int ff_count; - uint8_t cache; - }; - -static inline void Re_shift_low( struct Range_encoder * const renc ) - { - const bool carry = ( renc->low > 0xFFFFFFFFU ); - if( carry || renc->low < 0xFF000000U ) - { - Cb_put_byte( &renc->cb, renc->cache + carry ); - for( ; renc->ff_count > 0; --renc->ff_count ) - Cb_put_byte( &renc->cb, 0xFF + carry ); - renc->cache = renc->low >> 24; - } - else ++renc->ff_count; - renc->low = ( renc->low & 0x00FFFFFFU ) << 8; - } - -static inline bool Re_init( struct Range_encoder * const renc ) - { - if( !Cb_init( &renc->cb, 65536 + re_min_free_bytes ) ) return false; - renc->low = 0; - renc->partial_member_pos = 0; - renc->range = 0xFFFFFFFFU; - renc->ff_count = 0; - renc->cache = 0; - return true; - } - -static inline void Re_free( struct Range_encoder * const renc ) - { Cb_free( &renc->cb ); } - -static inline long long Re_member_position( const struct Range_encoder * const renc ) - { return renc->partial_member_pos + Cb_used_bytes( &renc->cb ) + renc->ff_count; } - -static inline bool Re_enough_free_bytes( const struct Range_encoder * const renc ) - { return Cb_free_bytes( &renc->cb ) >= re_min_free_bytes; } - -static inline int Re_read_data( struct Range_encoder * const renc, - uint8_t * const out_buffer, const int out_size ) - { - const int size = Cb_read_data( &renc->cb, out_buffer, out_size ); - if( size > 0 ) renc->partial_member_pos += size; - return size; - } - -static inline void Re_flush( struct Range_encoder * const renc ) - { - int i; for( i = 0; i < 5; ++i ) Re_shift_low( renc ); - renc->low = 0; - renc->range = 0xFFFFFFFFU; - renc->ff_count = 0; - renc->cache = 0; - } - -static inline void Re_encode( struct Range_encoder * const renc, - const int symbol, const int num_bits ) - { - int i; - for( i = num_bits - 1; i >= 0; --i ) - { - renc->range >>= 1; - if( (symbol >> i) & 1 ) renc->low += renc->range; - if( renc->range <= 0x00FFFFFFU ) - { renc->range <<= 8; Re_shift_low( renc ); } - } - } - -static inline void Re_encode_bit( struct Range_encoder * const renc, - Bit_model * const probability, const int bit ) - { - const uint32_t bound = ( renc->range >> bit_model_total_bits ) * *probability; - if( !bit ) - { - renc->range = bound; - *probability += (bit_model_total - *probability) >> bit_model_move_bits; - } - else - { - renc->low += bound; - renc->range -= bound; - *probability -= *probability >> bit_model_move_bits; - } - if( renc->range <= 0x00FFFFFFU ) - { renc->range <<= 8; Re_shift_low( renc ); } - } - -static inline void Re_encode_tree( struct Range_encoder * const renc, - Bit_model bm[], const int symbol, const int num_bits ) - { - int mask = ( 1 << ( num_bits - 1 ) ); - int model = 1; - int i; - for( i = num_bits; i > 0; --i, mask >>= 1 ) - { - const int bit = ( symbol & mask ); - Re_encode_bit( renc, &bm[model], bit ); - model <<= 1; - if( bit ) model |= 1; - } - } - -static inline void Re_encode_tree_reversed( struct Range_encoder * const renc, - Bit_model bm[], int symbol, const int num_bits ) - { - int model = 1; - int i; - for( i = num_bits; i > 0; --i ) - { - const int bit = symbol & 1; - Re_encode_bit( renc, &bm[model], bit ); - model = ( model << 1 ) | bit; - symbol >>= 1; - } - } - -static inline void Re_encode_matched( struct Range_encoder * const renc, - Bit_model bm[], int symbol, int match_byte ) - { - int model = 1; - int i; - for( i = 7; i >= 0; --i ) - { - const int match_bit = ( match_byte >> i ) & 1; - int bit = ( symbol >> i ) & 1; - Re_encode_bit( renc, &bm[(match_bit<<8)+model+0x100], bit ); - model = ( model << 1 ) | bit; - if( match_bit != bit ) + else { - while( --i >= 0 ) - { - bit = ( symbol >> i ) & 1; - Re_encode_bit( renc, &bm[model], bit ); - model = ( model << 1 ) | bit; - } - break; + *ptr1 = newpos; + ptr1 = newptr; + newpos = *ptr1; + len1 = len; if( len0 < len ) len = len0; } } - } - - -struct Len_encoder - { - Bit_model choice1; - Bit_model choice2; - Bit_model bm_low[pos_states][len_low_symbols]; - Bit_model bm_mid[pos_states][len_mid_symbols]; - Bit_model bm_high[len_high_symbols]; - int prices[pos_states][max_len_symbols]; - int len_symbols; - int counters[pos_states]; - }; - -static void Lee_update_prices( struct Len_encoder * const len_encoder, - const int pos_state ) - { - int * const pps = len_encoder->prices[pos_state]; - int tmp = price0( len_encoder->choice1 ); - int len = 0; - for( ; len < len_low_symbols && len < len_encoder->len_symbols; ++len ) - pps[len] = tmp + - price_symbol( len_encoder->bm_low[pos_state], len, len_low_bits ); - tmp = price1( len_encoder->choice1 ); - for( ; len < len_low_symbols + len_mid_symbols && len < len_encoder->len_symbols; ++len ) - pps[len] = tmp + price0( len_encoder->choice2 ) + - price_symbol( len_encoder->bm_mid[pos_state], len - len_low_symbols, len_mid_bits ); - for( ; len < len_encoder->len_symbols; ++len ) - /* using 4 slots per value makes "Lee_price" faster */ - len_encoder->prices[3][len] = len_encoder->prices[2][len] = - len_encoder->prices[1][len] = len_encoder->prices[0][len] = - tmp + price1( len_encoder->choice2 ) + - price_symbol( len_encoder->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); - len_encoder->counters[pos_state] = len_encoder->len_symbols; - } - -static void Lee_init( struct Len_encoder * const len_encoder, - const int len_limit ) - { - int i, j; - Bm_init( &len_encoder->choice1 ); - Bm_init( &len_encoder->choice2 ); - for( i = 0; i < pos_states; ++i ) - for( j = 0; j < len_low_symbols; ++j ) - Bm_init( &len_encoder->bm_low[i][j] ); - for( i = 0; i < pos_states; ++i ) - for( j = 0; j < len_mid_symbols; ++j ) - Bm_init( &len_encoder->bm_mid[i][j] ); - for( i = 0; i < len_high_symbols; ++i ) - Bm_init( &len_encoder->bm_high[i] ); - len_encoder->len_symbols = len_limit + 1 - min_match_len; - for( i = 0; i < pos_states; ++i ) Lee_update_prices( len_encoder, i ); + return num_pairs; } @@ -603,112 +257,42 @@ static void Lee_encode( struct Len_encoder * const len_encoder, } -static inline int Lee_price( const struct Len_encoder * const len_encoder, - const int symbol, const int pos_state ) - { return len_encoder->prices[pos_state][symbol - min_match_len]; } - - -struct Literal_encoder - { - Bit_model bm_literal[1<bm_literal[i][j] ); - } - -static inline int Lie_state( const uint8_t prev_byte ) - { return ( prev_byte >> ( 8 - literal_context_bits ) ); } - -static inline void Lie_encode( struct Literal_encoder * const lienc, - struct Range_encoder * const renc, - uint8_t prev_byte, uint8_t symbol ) - { Re_encode_tree( renc, lienc->bm_literal[Lie_state(prev_byte)], symbol, 8 ); } - -static inline void Lie_encode_matched( struct Literal_encoder * const lienc, - struct Range_encoder * const renc, - uint8_t prev_byte, uint8_t symbol, - uint8_t match_byte ) - { Re_encode_matched( renc, lienc->bm_literal[Lie_state(prev_byte)], - symbol, match_byte ); } - -static inline int Lie_price_symbol( const struct Literal_encoder * const lienc, - uint8_t prev_byte, uint8_t symbol ) - { return price_symbol( lienc->bm_literal[Lie_state(prev_byte)], symbol, 8 ); } - -static inline int Lie_price_matched( const struct Literal_encoder * const lienc, - uint8_t prev_byte, uint8_t symbol, - uint8_t match_byte ) - { return price_matched( lienc->bm_literal[Lie_state(prev_byte)], - symbol, match_byte ); } - - -enum { infinite_price = 0x0FFFFFFF, - max_marker_size = 16, - num_rep_distances = 4 }; /* must be 4 */ - -struct Trial - { - State state; - int dis; - int prev_index; /* index of prev trial in trials[] */ - int price; /* dual use var; cumulative price, match length */ - int reps[num_rep_distances]; - }; - -static inline void Tr_update( struct Trial * const trial, - const int d, const int p_i, const int pr ) + /* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */ +static bool LZe_full_flush( struct LZ_encoder * const encoder, const State state ) { - if( pr < trial->price ) - { trial->dis = d; trial->prev_index = p_i; trial->price = pr; } + int i; + const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; + File_trailer trailer; + if( encoder->member_finished || + Cb_free_bytes( &encoder->range_encoder.cb ) < max_marker_size + Ft_size ) + return false; + Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][pos_state], 1 ); + Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep[state], 0 ); + LZe_encode_pair( encoder, 0xFFFFFFFFU, min_match_len, pos_state ); + Re_flush( &encoder->range_encoder ); + Ft_set_data_crc( trailer, LZe_crc( encoder ) ); + Ft_set_data_size( trailer, Mf_data_position( encoder->matchfinder ) ); + Ft_set_member_size( trailer, Re_member_position( &encoder->range_encoder ) + + Ft_size ); + for( i = 0; i < Ft_size; ++i ) + Cb_put_byte( &encoder->range_encoder.cb, trailer[i] ); + return true; } -struct LZ_encoder - { - long long member_size_limit; - int longest_match_found; - uint32_t crc; - - Bit_model bm_match[states][pos_states]; - Bit_model bm_rep[states]; - Bit_model bm_rep0[states]; - Bit_model bm_rep1[states]; - Bit_model bm_rep2[states]; - Bit_model bm_len[states][pos_states]; - Bit_model bm_dis_slot[max_dis_states][1< (dis == 0xFFFFFFFFU, len == min_match_len + 1) */ +static bool LZe_sync_flush( struct LZ_encoder * const encoder ) { - return ( encoder->member_finished && - !Cb_used_bytes( &encoder->range_encoder.cb ) ); + const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; + const State state = encoder->state; + if( encoder->member_finished || + Cb_free_bytes( &encoder->range_encoder.cb ) < max_marker_size ) + return false; + Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][pos_state], 1 ); + Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep[state], 0 ); + LZe_encode_pair( encoder, 0xFFFFFFFFU, min_match_len + 1, pos_state ); + Re_flush( &encoder->range_encoder ); + return true; } @@ -731,7 +315,7 @@ static void LZe_fill_distance_prices( struct LZ_encoder * const encoder ) const int direct_bits = ( dis_slot >> 1 ) - 1; const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits; const int price = - price_symbol_reversed( encoder->bm_dis + base - dis_slot, + price_symbol_reversed( encoder->bm_dis + base - dis_slot - 1, dis - base, direct_bits ); for( dis_state = 0; dis_state < max_dis_states; ++dis_state ) encoder->dis_prices[dis_state][dis] = price; @@ -747,7 +331,7 @@ static void LZe_fill_distance_prices( struct LZ_encoder * const encoder ) dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ); for( ; slot < encoder->num_dis_slots; ++slot ) dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ) + - (((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift ); + (((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift_bits ); for( dis = 0; dis < start_dis_model; ++dis ) dp[dis] = dsp[dis]; @@ -759,229 +343,44 @@ static void LZe_fill_distance_prices( struct LZ_encoder * const encoder ) static bool LZe_init( struct LZ_encoder * const encoder, struct Matchfinder * const mf, - const File_header header, const long long member_size ) + const File_header header, + const unsigned long long member_size ) { - int i, j; + int i; encoder->member_size_limit = member_size - Ft_size - max_marker_size; - encoder->longest_match_found = 0; + encoder->pending_num_pairs = 0; encoder->crc = 0xFFFFFFFFU; - for( i = 0; i < states; ++i ) - { - for( j = 0; j < pos_states; ++j ) - { - Bm_init( &encoder->bm_match[i][j] ); - Bm_init( &encoder->bm_len[i][j] ); - } - Bm_init( &encoder->bm_rep[i] ); - Bm_init( &encoder->bm_rep0[i] ); - Bm_init( &encoder->bm_rep1[i] ); - Bm_init( &encoder->bm_rep2[i] ); - } - for( i = 0; i < max_dis_states; ++i ) - for( j = 0; j < 1<bm_dis_slot[i][j] ); - for( i = 0; i < modeled_distances-end_dis_model+1; ++i ) - Bm_init( &encoder->bm_dis[i] ); - for( i = 0; i < dis_align_size; ++i ) - Bm_init( &encoder->bm_align[i] ); + Bm_array_init( encoder->bm_literal[0], (1 << literal_context_bits) * 0x300 ); + Bm_array_init( encoder->bm_match[0], states * pos_states ); + Bm_array_init( encoder->bm_rep, states ); + Bm_array_init( encoder->bm_rep0, states ); + Bm_array_init( encoder->bm_rep1, states ); + Bm_array_init( encoder->bm_rep2, states ); + Bm_array_init( encoder->bm_len[0], states * pos_states ); + Bm_array_init( encoder->bm_dis_slot[0], max_dis_states * (1 << dis_slot_bits) ); + Bm_array_init( encoder->bm_dis, modeled_distances - end_dis_model ); + Bm_array_init( encoder->bm_align, dis_align_size ); encoder->matchfinder = mf; if( !Re_init( &encoder->range_encoder ) ) return false; Lee_init( &encoder->len_encoder, encoder->matchfinder->match_len_limit ); Lee_init( &encoder->rep_match_len_encoder, encoder->matchfinder->match_len_limit ); - Lie_init( &encoder->literal_encoder ); encoder->num_dis_slots = 2 * real_bits( encoder->matchfinder->dictionary_size - 1 ); + for( i = 0; i < num_rep_distances; ++i ) encoder->rep_distances[i] = 0; + encoder->align_price_count = 0; encoder->fill_counter = 0; encoder->state = 0; encoder->member_finished = false; - LZe_fill_align_prices( encoder ); - for( i = 0; i < Fh_size; ++i ) Cb_put_byte( &encoder->range_encoder.cb, header[i] ); return true; } -static inline void LZe_free( struct LZ_encoder * const encoder ) - { - Re_free( &encoder->range_encoder ); - } - -static inline uint32_t LZe_crc( const struct LZ_encoder * const encoder ) - { return encoder->crc ^ 0xFFFFFFFFU; } - - /* move-to-front dis in/into reps */ -static inline void LZe_mtf_reps( const int dis, int reps[num_rep_distances] ) - { - int i; - if( dis >= num_rep_distances ) - { - for( i = num_rep_distances - 1; i > 0; --i ) reps[i] = reps[i-1]; - reps[0] = dis - num_rep_distances; - } - else if( dis > 0 ) - { - const int distance = reps[dis]; - for( i = dis; i > 0; --i ) reps[i] = reps[i-1]; - reps[0] = distance; - } - } - -static inline int LZe_price_rep_len1( const struct LZ_encoder * const encoder, - const State state, const int pos_state ) - { - return price0( encoder->bm_rep0[state] ) + - price0( encoder->bm_len[state][pos_state] ); - } - -static inline int LZe_price_rep( const struct LZ_encoder * const encoder, - const int rep, - const State state, const int pos_state ) - { - int price; - if( rep == 0 ) return price0( encoder->bm_rep0[state] ) + - price1( encoder->bm_len[state][pos_state] ); - price = price1( encoder->bm_rep0[state] ); - if( rep == 1 ) - price += price0( encoder->bm_rep1[state] ); - else - { - price += price1( encoder->bm_rep1[state] ); - price += price_bit( encoder->bm_rep2[state], rep - 2 ); - } - return price; - } - -static inline int LZe_price_dis( const struct LZ_encoder * const encoder, - const int dis, const int dis_state ) - { - if( dis < modeled_distances ) - return encoder->dis_prices[dis_state][dis]; - else - return encoder->dis_slot_prices[dis_state][get_slot( dis )] + - encoder->align_prices[dis & (dis_align_size - 1)]; - } - -static inline int LZe_price_pair( const struct LZ_encoder * const encoder, - const int dis, const int len, - const int pos_state ) - { - if( len <= min_match_len && dis >= modeled_distances ) - return infinite_price; - return Lee_price( &encoder->len_encoder, len, pos_state ) + - LZe_price_dis( encoder, dis, get_dis_state( len ) ); - } - -static inline void LZe_encode_pair( struct LZ_encoder * const encoder, - const uint32_t dis, const int len, - const int pos_state ) - { - const int dis_slot = get_slot( dis ); - Lee_encode( &encoder->len_encoder, &encoder->range_encoder, len, pos_state ); - Re_encode_tree( &encoder->range_encoder, - encoder->bm_dis_slot[get_dis_state(len)], - dis_slot, dis_slot_bits ); - - if( dis_slot >= start_dis_model ) - { - const int direct_bits = ( dis_slot >> 1 ) - 1; - const uint32_t base = ( 2 | ( dis_slot & 1 ) ) << direct_bits; - const uint32_t direct_dis = dis - base; - - if( dis_slot < end_dis_model ) - Re_encode_tree_reversed( &encoder->range_encoder, - encoder->bm_dis + base - dis_slot, - direct_dis, direct_bits ); - else - { - Re_encode( &encoder->range_encoder, direct_dis >> dis_align_bits, - direct_bits - dis_align_bits ); - Re_encode_tree_reversed( &encoder->range_encoder, encoder->bm_align, - direct_dis, dis_align_bits ); - if( --encoder->align_price_count <= 0 ) LZe_fill_align_prices( encoder ); - } - } - } - - /* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */ -static bool LZe_full_flush( struct LZ_encoder * const encoder, const State state ) - { - int i; - const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; - File_trailer trailer; - if( encoder->member_finished || - Cb_free_bytes( &encoder->range_encoder.cb ) < Ft_size + max_marker_size ) - return false; - Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][pos_state], 1 ); - Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep[state], 0 ); - LZe_encode_pair( encoder, 0xFFFFFFFFU, min_match_len, pos_state ); - Re_flush( &encoder->range_encoder ); - Ft_set_data_crc( trailer, LZe_crc( encoder ) ); - Ft_set_data_size( trailer, Mf_data_position( encoder->matchfinder ) ); - Ft_set_member_size( trailer, Re_member_position( &encoder->range_encoder ) + - Ft_size ); - for( i = 0; i < Ft_size; ++i ) - Cb_put_byte( &encoder->range_encoder.cb, trailer[i] ); - return true; - } - - - /* Sync Flush mark => (dis == 0xFFFFFFFFU, len == min_match_len + 1) */ -static bool LZe_sync_flush( struct LZ_encoder * const encoder ) - { - const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; - const State state = encoder->state; - if( encoder->member_finished || - Cb_free_bytes( &encoder->range_encoder.cb ) < max_marker_size ) - return false; - Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][pos_state], 1 ); - Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep[state], 0 ); - LZe_encode_pair( encoder, 0xFFFFFFFFU, min_match_len + 1, pos_state ); - Re_flush( &encoder->range_encoder ); - return true; - } - - -static inline int LZe_read_match_distances( struct LZ_encoder * const encoder ) - { - int len = Mf_longest_match_len( encoder->matchfinder, - encoder->match_distances ); - if( len == encoder->matchfinder->match_len_limit && len < max_match_len ) - len += Mf_true_match_len( encoder->matchfinder, len, - encoder->match_distances[len] + 1, - max_match_len - len ); - return len; - } - -static inline bool LZe_move_pos( struct LZ_encoder * const encoder, int n ) - { - if( --n >= 0 && !Mf_move_pos( encoder->matchfinder ) ) return false; - while( --n >= 0 ) - { - Mf_longest_match_len( encoder->matchfinder, 0 ); - if( !Mf_move_pos( encoder->matchfinder ) ) return false; - } - return true; - } - -static inline void LZe_backward( struct LZ_encoder * const encoder, int cur ) - { - int * const dis = &encoder->trials[cur].dis; - while( cur > 0 ) - { - const int prev_index = encoder->trials[cur].prev_index; - struct Trial * const prev_trial = &encoder->trials[prev_index]; - prev_trial->price = cur - prev_index; /* len */ - cur = *dis; *dis = prev_trial->dis; prev_trial->dis = cur; - cur = prev_index; - } - } - - /* Return value == number of bytes advanced (ahead). trials[0]..trials[retval-1] contain the steps to encode. ( trials[0].dis == -1 && trials[0].price == 1 ) means literal. @@ -990,16 +389,18 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, const int reps[num_rep_distances], const State state ) { - int main_len, i, rep, cur = 0, num_trials; + int main_len, num_pairs, i, rep, cur = 0, num_trials, len; int replens[num_rep_distances]; int rep_index = 0; - if( encoder->longest_match_found > 0 ) /* from previous call */ + if( encoder->pending_num_pairs > 0 ) /* from previous call */ { - main_len = encoder->longest_match_found; - encoder->longest_match_found = 0; + num_pairs = encoder->pending_num_pairs; + encoder->pending_num_pairs = 0; } - else main_len = LZe_read_match_distances( encoder ); + else + num_pairs = LZe_read_match_distances( encoder ); + main_len = ( num_pairs > 0 ) ? encoder->pairs[num_pairs-1].len : 0; for( i = 0; i < num_rep_distances; ++i ) { @@ -1017,9 +418,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, if( main_len >= encoder->matchfinder->match_len_limit ) { - encoder->trials[0].dis = - encoder->match_distances[encoder->matchfinder->match_len_limit] + - num_rep_distances; + encoder->trials[0].dis = encoder->pairs[num_pairs-1].dis + num_rep_distances; encoder->trials[0].price = main_len; if( !LZe_move_pos( encoder, main_len ) ) return 0; return main_len; @@ -1034,23 +433,22 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, const uint8_t match_byte = Mf_peek( encoder->matchfinder, -reps[0]-1 ); encoder->trials[0].state = state; - for( i = 0; i < num_rep_distances; ++i ) - encoder->trials[0].reps[i] = reps[i]; encoder->trials[1].dis = -1; - encoder->trials[1].prev_index = 0; encoder->trials[1].price = price0( encoder->bm_match[state][pos_state] ); if( St_is_char( state ) ) encoder->trials[1].price += - Lie_price_symbol( &encoder->literal_encoder, prev_byte, cur_byte ); + LZe_price_literal( encoder, prev_byte, cur_byte ); else encoder->trials[1].price += - Lie_price_matched( &encoder->literal_encoder, prev_byte, cur_byte, match_byte ); + LZe_price_matched( encoder, prev_byte, cur_byte, match_byte ); if( match_byte == cur_byte ) - Tr_update( &encoder->trials[1], 0, 0, rep_match_price + - LZe_price_rep_len1( encoder, state, pos_state ) ); + Tr_update( &encoder->trials[1], rep_match_price + + LZe_price_rep_len1( encoder, state, pos_state ), 0, 0 ); - if( main_len < min_match_len ) + num_trials = max( main_len, replens[rep_index] ); + + if( num_trials < min_match_len ) { encoder->trials[0].dis = encoder->trials[1].dis; encoder->trials[0].price = 1; @@ -1058,45 +456,51 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, return 1; } - if( main_len <= replens[rep_index] ) + for( i = 0; i < num_rep_distances; ++i ) + encoder->trials[0].reps[i] = reps[i]; + encoder->trials[1].prev_index = 0; + encoder->trials[1].prev_index2 = single_step_trial; + + for( len = min_match_len; len <= num_trials; ++len ) + encoder->trials[len].price = infinite_price; + + for( rep = 0; rep < num_rep_distances; ++rep ) { - int len; - main_len = replens[rep_index]; - for( len = min_match_len; len <= main_len; ++len ) - encoder->trials[len].price = infinite_price; + int price; + if( replens[rep] < min_match_len ) continue; + price = rep_match_price + LZe_price_rep( encoder, rep, state, pos_state ); + + for( len = min_match_len; len <= replens[rep]; ++len ) + Tr_update( &encoder->trials[len], price + + Lee_price( &encoder->rep_match_len_encoder, len, pos_state ), + rep, 0 ); } - else + + if( main_len > replens[0] ) { - int len; const int normal_match_price = match_price + price0( encoder->bm_rep[state] ); - for( len = min_match_len; len <= main_len; ++len ) + i = 0, len = max( replens[0] + 1, min_match_len ); + while( len > encoder->pairs[i].len ) ++i; + while( true ) { - encoder->trials[len].dis = encoder->match_distances[len] + num_rep_distances; - encoder->trials[len].prev_index = 0; - encoder->trials[len].price = normal_match_price + - LZe_price_pair( encoder, encoder->match_distances[len], len, pos_state ); + const int dis = encoder->pairs[i].dis; + Tr_update( &encoder->trials[len], normal_match_price + + LZe_price_pair( encoder, dis, len, pos_state ), + dis + num_rep_distances, 0 ); + if( ++len > encoder->pairs[i].len && ++i >= num_pairs ) break; } } - - for( rep = 0; rep < num_rep_distances; ++rep ) - { - const int price = rep_match_price + - LZe_price_rep( encoder, rep, state, pos_state ); - int len; - for( len = min_match_len; len <= replens[rep]; ++len ) - Tr_update( &encoder->trials[len], rep, 0, price + - Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) ); - } } - num_trials = main_len; if( !Mf_move_pos( encoder->matchfinder ) ) return 0; - while( true ) + while( true ) /* price optimization loop */ { struct Trial *cur_trial, *next_trial; - int newlen, pos_state, prev_index, len_limit; + int newlen, pos_state, prev_index, prev_index2, available_bytes, len_limit; + int start_len = min_match_len; int next_price, match_price, rep_match_price; + State cur_state; uint8_t prev_byte, cur_byte, match_byte; if( ++cur >= num_trials ) /* no more initialized trials */ @@ -1104,33 +508,70 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, LZe_backward( encoder, cur ); return cur; } - newlen = LZe_read_match_distances( encoder ); + + num_pairs = LZe_read_match_distances( encoder ); + newlen = ( num_pairs > 0 ) ? encoder->pairs[num_pairs-1].len : 0; if( newlen >= encoder->matchfinder->match_len_limit ) { - encoder->longest_match_found = newlen; + encoder->pending_num_pairs = num_pairs; LZe_backward( encoder, cur ); return cur; } + /* give final values to current trial */ cur_trial = &encoder->trials[cur]; prev_index = cur_trial->prev_index; + prev_index2 = cur_trial->prev_index2; - cur_trial->state = encoder->trials[prev_index].state; - - for( i = 0; i < num_rep_distances; ++i ) - cur_trial->reps[i] = encoder->trials[prev_index].reps[i]; + if( prev_index2 != single_step_trial ) + { + --prev_index; + if( prev_index2 >= 0 ) + { + cur_state = encoder->trials[prev_index2].state; + if( cur_trial->dis2 < num_rep_distances ) + cur_state = St_set_rep( cur_state ); + else + cur_state = St_set_match( cur_state ); + } + else + cur_state = encoder->trials[prev_index].state; + cur_state = St_set_char( cur_state ); + } + else + cur_state = encoder->trials[prev_index].state; if( prev_index == cur - 1 ) { - if( cur_trial->dis == 0 ) St_set_short_rep( &cur_trial->state ); - else St_set_char( &cur_trial->state ); + if( cur_trial->dis == 0 ) + cur_state = St_set_short_rep( cur_state ); + else + cur_state = St_set_char( cur_state ); + for( i = 0; i < num_rep_distances; ++i ) + cur_trial->reps[i] = encoder->trials[prev_index].reps[i]; } else { - if( cur_trial->dis < num_rep_distances ) St_set_rep( &cur_trial->state ); - else St_set_match( &cur_trial->state ); - LZe_mtf_reps( cur_trial->dis, cur_trial->reps ); + int dis; + if( prev_index2 >= 0 ) + { + dis = cur_trial->dis2; + prev_index = prev_index2; + cur_state = St_set_rep( cur_state ); + } + else + { + dis = cur_trial->dis; + if( dis < num_rep_distances ) + cur_state = St_set_rep( cur_state ); + else + cur_state = St_set_match( cur_state ); + } + for( i = 0; i < num_rep_distances; ++i ) + cur_trial->reps[i] = encoder->trials[prev_index].reps[i]; + LZe_mtf_reps( dis, cur_trial->reps ); } + cur_trial->state = cur_state; pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; prev_byte = Mf_peek( encoder->matchfinder, -1 ); @@ -1138,81 +579,162 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, match_byte = Mf_peek( encoder->matchfinder, -cur_trial->reps[0]-1 ); next_price = cur_trial->price + - price0( encoder->bm_match[cur_trial->state][pos_state] ); - if( St_is_char( cur_trial->state ) ) - next_price += Lie_price_symbol( &encoder->literal_encoder, - prev_byte, cur_byte ); + price0( encoder->bm_match[cur_state][pos_state] ); + if( St_is_char( cur_state ) ) + next_price += LZe_price_literal( encoder, prev_byte, cur_byte ); else - next_price += Lie_price_matched( &encoder->literal_encoder, + next_price += LZe_price_matched( encoder, prev_byte, cur_byte, match_byte ); if( !Mf_move_pos( encoder->matchfinder ) ) return 0; + /* try last updates to next trial */ next_trial = &encoder->trials[cur+1]; - Tr_update( next_trial, -1, cur, next_price ); + Tr_update( next_trial, next_price, -1, cur ); - match_price = cur_trial->price + price1( encoder->bm_match[cur_trial->state][pos_state] ); - rep_match_price = match_price + price1( encoder->bm_rep[cur_trial->state] ); + match_price = cur_trial->price + price1( encoder->bm_match[cur_state][pos_state] ); + rep_match_price = match_price + price1( encoder->bm_rep[cur_state] ); if( match_byte == cur_byte && next_trial->dis != 0 ) - Tr_update( next_trial, 0, cur, rep_match_price + - LZe_price_rep_len1( encoder, cur_trial->state, pos_state ) ); + { + const int price = rep_match_price + + LZe_price_rep_len1( encoder, cur_state, pos_state ); + if( price <= next_trial->price ) + { + next_trial->price = price; + next_trial->dis = 0; + next_trial->prev_index = cur; + next_trial->prev_index2 = single_step_trial; + } + } - len_limit = min( min( max_num_trials - 1 - cur, - Mf_available_bytes( encoder->matchfinder ) ), - encoder->matchfinder->match_len_limit ); - if( len_limit < min_match_len ) continue; + available_bytes = min( Mf_available_bytes( encoder->matchfinder ) + 1, + max_num_trials - 1 - cur ); + if( available_bytes < min_match_len ) continue; - for( rep = 0; rep < num_rep_distances; ++rep ) + len_limit = min( encoder->matchfinder->match_len_limit, available_bytes ); + + /* try literal + rep0 */ + if( match_byte != cur_byte && next_trial->prev_index != cur ) { - const int dis = cur_trial->reps[rep] + 1; - int len = 0; const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1; - while( len < len_limit && data[len] == data[len-dis] ) ++len; - if( len >= min_match_len ) + const int dis = cur_trial->reps[0] + 1; + const int limit = min( encoder->matchfinder->match_len_limit + 1, + available_bytes ); + len = 1; + while( len < limit && data[len-dis] == data[len] ) ++len; + if( --len >= min_match_len ) { - const int price = rep_match_price + - LZe_price_rep( encoder, rep, cur_trial->state, pos_state ); - while( num_trials < cur + len ) + const int pos_state2 = ( pos_state + 1 ) & pos_state_mask; + const State state2 = St_set_char( cur_state ); + const int price = next_price + + price1( encoder->bm_match[state2][pos_state2] ) + + price1( encoder->bm_rep[state2] ) + + LZe_price_rep0_len( encoder, len, state2, pos_state2 ); + while( num_trials < cur + 1 + len ) encoder->trials[++num_trials].price = infinite_price; - for( ; len >= min_match_len; --len ) - Tr_update( &encoder->trials[cur+len], rep, cur, price + - Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) ); + Tr_update2( &encoder->trials[cur+1+len], price, 0, cur + 1 ); } } - if( newlen <= len_limit && - ( newlen > min_match_len || - ( newlen == min_match_len && - encoder->match_distances[min_match_len] < modeled_distances ) ) ) + /* try rep distances */ + for( rep = 0; rep < num_rep_distances; ++rep ) { + const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1; + int price; + const int dis = cur_trial->reps[rep] + 1; + + if( data[-dis] != data[0] || data[1-dis] != data[1] ) continue; + for( len = min_match_len; len < len_limit; ++len ) + if( data[len-dis] != data[len] ) break; + while( num_trials < cur + len ) + encoder->trials[++num_trials].price = infinite_price; + price = rep_match_price + + LZe_price_rep( encoder, rep, cur_state, pos_state ); + for( i = min_match_len; i <= len; ++i ) + Tr_update( &encoder->trials[cur+i], price + + Lee_price( &encoder->rep_match_len_encoder, i, pos_state ), + rep, cur ); + + if( rep == 0 ) start_len = len + 1; /* discard shorter matches */ + + /* try rep + literal + rep0 */ + { + int len2 = len + 1, pos_state2; + const int limit = min( encoder->matchfinder->match_len_limit + len2, + available_bytes ); + State state2; + while( len2 < limit && data[len2-dis] == data[len2] ) ++len2; + len2 -= len + 1; + if( len2 < min_match_len ) continue; + + pos_state2 = ( pos_state + len ) & pos_state_mask; + state2 = St_set_rep( cur_state ); + price += Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) + + price0( encoder->bm_match[state2][pos_state2] ) + + LZe_price_matched( encoder, data[len-1], data[len], data[len-dis] ); + pos_state2 = ( pos_state2 + 1 ) & pos_state_mask; + state2 = St_set_char( state2 ); + price += price1( encoder->bm_match[state2][pos_state2] ) + + price1( encoder->bm_rep[state2] ) + + LZe_price_rep0_len( encoder, len2, state2, pos_state2 ); + while( num_trials < cur + len + 1 + len2 ) + encoder->trials[++num_trials].price = infinite_price; + Tr_update3( &encoder->trials[cur+len+1+len2], price, 0, cur + len + 1, + rep, cur ); + } + } + + /* try matches */ + if( newlen >= start_len && newlen <= len_limit ) + { + int dis; const int normal_match_price = match_price + - price0( encoder->bm_rep[cur_trial->state] ); - int len; - int dis = encoder->match_distances[min_match_len]; - int dis_state = get_dis_state( min_match_len ); - int dis_price = infinite_price; + price0( encoder->bm_rep[cur_state] ); while( num_trials < cur + newlen ) encoder->trials[++num_trials].price = infinite_price; - if( dis < modeled_distances ) - Tr_update( &encoder->trials[cur+min_match_len], dis + num_rep_distances, - cur, normal_match_price + encoder->dis_prices[dis_state][dis] + - Lee_price( &encoder->len_encoder, min_match_len, pos_state ) ); - - for( len = min_match_len + 1; len <= newlen; ++len ) + i = 0; + while( start_len > encoder->pairs[i].len ) ++i; + dis = encoder->pairs[i].dis; + for( len = start_len; ; ++len ) { - if( dis != encoder->match_distances[len] || - dis_state < max_dis_states - 1 ) + int price = normal_match_price + + LZe_price_pair( encoder, dis, len, pos_state ); + + Tr_update( &encoder->trials[cur+len], price, dis + num_rep_distances, cur ); + + /* try match + literal + rep0 */ + if( len == encoder->pairs[i].len ) { - dis = encoder->match_distances[len]; - dis_state = get_dis_state( len ); - dis_price = LZe_price_dis( encoder, dis, dis_state ); + const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1; + const int dis2 = dis + 1; + int len2 = len + 1; + const int limit = min( encoder->matchfinder->match_len_limit + len2, + available_bytes ); + while( len2 < limit && data[len2-dis2] == data[len2] ) ++len2; + len2 -= len + 1; + if( len2 >= min_match_len ) + { + int pos_state2 = ( pos_state + len ) & pos_state_mask; + State state2 = St_set_match( cur_state ); + price += price0( encoder->bm_match[state2][pos_state2] ) + + LZe_price_matched( encoder, data[len-1], data[len], data[len-dis2] ); + pos_state2 = ( pos_state2 + 1 ) & pos_state_mask; + state2 = St_set_char( state2 ); + price += price1( encoder->bm_match[state2][pos_state2] ) + + price1( encoder->bm_rep[state2] ) + + LZe_price_rep0_len( encoder, len2, state2, pos_state2 ); + + while( num_trials < cur + len + 1 + len2 ) + encoder->trials[++num_trials].price = infinite_price; + Tr_update3( &encoder->trials[cur+len+1+len2], price, 0, + cur + len + 1, dis + num_rep_distances, cur ); + } + if( ++i >= num_pairs ) break; + dis = encoder->pairs[i].dis; } - Tr_update( &encoder->trials[cur+len], dis + num_rep_distances, cur, - normal_match_price + dis_price + - Lee_price( &encoder->len_encoder, len, pos_state ) ); } } } @@ -1223,7 +745,7 @@ static bool LZe_encode_member( struct LZ_encoder * const encoder, const bool finish ) { const int fill_count = - ( encoder->matchfinder->match_len_limit > 12 ) ? 512 : 2048; + ( encoder->matchfinder->match_len_limit > 12 ) ? 128 : 512; int ahead, i; State * const state = &encoder->state; if( encoder->member_finished ) return true; @@ -1233,19 +755,19 @@ static bool LZe_encode_member( struct LZ_encoder * const encoder, return true; } - /* encode first byte */ if( Mf_data_position( encoder->matchfinder ) == 0 && - !Mf_finished( encoder->matchfinder ) ) + !Mf_finished( encoder->matchfinder ) ) /* encode first byte */ { + const uint8_t prev_byte = 0; + uint8_t cur_byte; if( Mf_available_bytes( encoder->matchfinder ) < max_match_len && !encoder->matchfinder->at_stream_end ) return true; - const uint8_t prev_byte = 0; - const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 ); + cur_byte = Mf_peek( encoder->matchfinder, 0 ); Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[*state][0], 0 ); - Lie_encode( &encoder->literal_encoder, &encoder->range_encoder, prev_byte, cur_byte ); + LZe_encode_literal( encoder, prev_byte, cur_byte ); CRC32_update_byte( &encoder->crc, cur_byte ); - Mf_longest_match_len( encoder->matchfinder, 0 ); + Mf_get_match_pairs( encoder->matchfinder, 0 ); if( !Mf_move_pos( encoder->matchfinder ) ) return false; } @@ -1253,12 +775,16 @@ static bool LZe_encode_member( struct LZ_encoder * const encoder, { if( !Mf_enough_available_bytes( encoder->matchfinder ) || !Re_enough_free_bytes( &encoder->range_encoder ) ) return true; - if( encoder->fill_counter <= 0 ) - { LZe_fill_distance_prices( encoder ); encoder->fill_counter = fill_count; } + if( encoder->pending_num_pairs == 0 ) + { + if( encoder->fill_counter <= 0 ) + { LZe_fill_distance_prices( encoder ); encoder->fill_counter = fill_count; } + if( encoder->align_price_count <= 0 ) + LZe_fill_align_prices( encoder ); + } ahead = LZe_sequence_optimizer( encoder, encoder->rep_distances, *state ); if( ahead <= 0 ) return false; /* can't happen */ - encoder->fill_counter -= ahead; for( i = 0; ; ) { @@ -1276,16 +802,14 @@ static bool LZe_encode_member( struct LZ_encoder * const encoder, const uint8_t cur_byte = Mf_peek( encoder->matchfinder, -ahead ); CRC32_update_byte( &encoder->crc, cur_byte ); if( St_is_char( *state ) ) - Lie_encode( &encoder->literal_encoder, &encoder->range_encoder, - prev_byte, cur_byte ); + LZe_encode_literal( encoder, prev_byte, cur_byte ); else { const uint8_t match_byte = Mf_peek( encoder->matchfinder, -ahead-encoder->rep_distances[0]-1 ); - Lie_encode_matched( &encoder->literal_encoder, &encoder->range_encoder, - prev_byte, cur_byte, match_byte ); + LZe_encode_matched( encoder, prev_byte, cur_byte, match_byte ); } - St_set_char( state ); + *state = St_set_char( *state ); } else /* match or repeated match */ { @@ -1305,17 +829,18 @@ static bool LZe_encode_member( struct LZ_encoder * const encoder, if( dis > 1 ) Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep2[*state], dis > 2 ); } - if( len == 1 ) St_set_short_rep( state ); + if( len == 1 ) *state = St_set_short_rep( *state ); else { Lee_encode( &encoder->rep_match_len_encoder, &encoder->range_encoder, len, pos_state ); - St_set_rep( state ); + *state = St_set_rep( *state ); } } else { LZe_encode_pair( encoder, dis - num_rep_distances, len, pos_state ); - St_set_match( state ); + --encoder->fill_counter; + *state = St_set_match( *state ); } } ahead -= len; i += len; diff --git a/encoder.h b/encoder.h new file mode 100644 index 0000000..90fa91c --- /dev/null +++ b/encoder.h @@ -0,0 +1,808 @@ +/* Lzlib - A compression library for lzip files + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + + This library is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this library. If not, see . + + As a special exception, you may use this file as part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this + file, or you compile this file and link it with other files to + produce an executable, this file does not by itself cause the + resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. +*/ + +enum { max_num_trials = 1 << 12, + price_shift_bits = 6, + price_step_bits = 2 }; + +static const uint8_t dis_slots[1<<10] = + { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19 }; + +static inline uint8_t get_slot( const uint32_t dis ) + { + if( dis < (1 << 10) ) return dis_slots[dis]; + if( dis < (1 << 19) ) return dis_slots[dis>> 9] + 18; + if( dis < (1 << 28) ) return dis_slots[dis>>18] + 36; + return dis_slots[dis>>27] + 54; + } + + +static const short prob_prices[bit_model_total >> price_step_bits] = +{ +640, 539, 492, 461, 438, 419, 404, 390, 379, 369, 359, 351, 343, 336, 330, 323, +318, 312, 307, 302, 298, 293, 289, 285, 281, 277, 274, 270, 267, 264, 261, 258, +255, 252, 250, 247, 244, 242, 239, 237, 235, 232, 230, 228, 226, 224, 222, 220, +218, 216, 214, 213, 211, 209, 207, 206, 204, 202, 201, 199, 198, 196, 195, 193, +192, 190, 189, 188, 186, 185, 184, 182, 181, 180, 178, 177, 176, 175, 174, 172, +171, 170, 169, 168, 167, 166, 165, 164, 163, 162, 161, 159, 158, 157, 157, 156, +155, 154, 153, 152, 151, 150, 149, 148, 147, 146, 145, 145, 144, 143, 142, 141, +140, 140, 139, 138, 137, 136, 136, 135, 134, 133, 133, 132, 131, 130, 130, 129, +128, 127, 127, 126, 125, 125, 124, 123, 123, 122, 121, 121, 120, 119, 119, 118, +117, 117, 116, 115, 115, 114, 114, 113, 112, 112, 111, 111, 110, 109, 109, 108, +108, 107, 106, 106, 105, 105, 104, 104, 103, 103, 102, 101, 101, 100, 100, 99, + 99, 98, 98, 97, 97, 96, 96, 95, 95, 94, 94, 93, 93, 92, 92, 91, + 91, 90, 90, 89, 89, 88, 88, 88, 87, 87, 86, 86, 85, 85, 84, 84, + 83, 83, 83, 82, 82, 81, 81, 80, 80, 80, 79, 79, 78, 78, 77, 77, + 77, 76, 76, 75, 75, 75, 74, 74, 73, 73, 73, 72, 72, 71, 71, 71, + 70, 70, 70, 69, 69, 68, 68, 68, 67, 67, 67, 66, 66, 65, 65, 65, + 64, 64, 64, 63, 63, 63, 62, 62, 61, 61, 61, 60, 60, 60, 59, 59, + 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 55, 55, 55, 54, 54, 54, + 53, 53, 53, 53, 52, 52, 52, 51, 51, 51, 50, 50, 50, 49, 49, 49, + 48, 48, 48, 48, 47, 47, 47, 46, 46, 46, 45, 45, 45, 45, 44, 44, + 44, 43, 43, 43, 43, 42, 42, 42, 41, 41, 41, 41, 40, 40, 40, 40, + 39, 39, 39, 38, 38, 38, 38, 37, 37, 37, 37, 36, 36, 36, 35, 35, + 35, 35, 34, 34, 34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 31, 31, + 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 27, 27, + 27, 27, 26, 26, 26, 26, 26, 25, 25, 25, 25, 24, 24, 24, 24, 23, + 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20, + 20, 19, 19, 19, 19, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, 16, + 16, 16, 16, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 13, 13, 13, + 13, 13, 12, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 10, + 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, + 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1 }; + +static inline int get_price( const int probability ) + { return prob_prices[probability >> price_step_bits]; } + + +static inline int price0( const Bit_model probability ) + { return get_price( probability ); } + +static inline int price1( const Bit_model probability ) + { return get_price( bit_model_total - probability ); } + +static inline int price_bit( const Bit_model bm, const int bit ) + { if( bit ) return price1( bm ); else return price0( bm ); } + + +static inline int price_symbol( const Bit_model bm[], int symbol, + const int num_bits ) + { + int price = 0; + symbol |= ( 1 << num_bits ); + while( symbol > 1 ) + { + const int bit = symbol & 1; + symbol >>= 1; + price += price_bit( bm[symbol], bit ); + } + return price; + } + + +static inline int price_symbol_reversed( const Bit_model bm[], int symbol, + const int num_bits ) + { + int price = 0; + int model = 1; + int i; + for( i = num_bits; i > 0; --i ) + { + const int bit = symbol & 1; + symbol >>= 1; + price += price_bit( bm[model], bit ); + model = ( model << 1 ) | bit; + } + return price; + } + + +static inline int price_matched( const Bit_model bm[], unsigned symbol, + unsigned match_byte ) + { + int price = 0; + unsigned mask = 0x100; + symbol |= 0x100; + + do { + unsigned bit, match_bit; + match_byte <<= 1; + match_bit = match_byte & mask; + symbol <<= 1; + bit = symbol & 0x100; + price += price_bit( bm[match_bit+(symbol>>9)+mask], bit ); + mask &= ~(match_byte ^ symbol); /* if( match_bit != bit ) mask = 0; */ + } + while( symbol < 0x10000 ); + return price; + } + + +struct Pair /* distance-length pair */ + { + int dis; + int len; + }; + + +enum { /* bytes to keep in buffer before dictionary */ + before_size = max_num_trials + 1, + /* bytes to keep in buffer after pos */ + after_size = max_num_trials + ( 2 * max_match_len ) + 1, + num_prev_positions3 = 1 << 16, + num_prev_positions2 = 1 << 10 }; + +struct Matchfinder + { + unsigned long long partial_data_pos; + uint8_t * buffer; /* input buffer */ + int32_t * prev_positions; /* last seen position of key */ + int32_t * prev_pos_tree; /* previous positions of key */ + int match_len_limit; + int buffer_size; + int dictionary_size; /* bytes to keep in buffer before pos */ + int pos; /* current pos in buffer */ + int cyclic_pos; /* cycles through [0, dictionary_size] */ + int pos_limit; /* when reached, a new block must be read */ + int stream_pos; /* first byte not yet read from file */ + int cycles; + unsigned key4_mask; + int num_prev_positions; /* size of prev_positions */ + bool at_stream_end; /* stream_pos shows real end of file */ + bool been_flushed; + }; + +static bool Mf_normalize_pos( struct Matchfinder * const mf ); + +static int Mf_write_data( struct Matchfinder * const mf, + const uint8_t * const inbuf, const int size ) + { + const int sz = min( mf->buffer_size - mf->stream_pos, size ); + if( !mf->at_stream_end && sz > 0 ) + { + memcpy( mf->buffer + mf->stream_pos, inbuf, sz ); + mf->stream_pos += sz; + } + return sz; + } + +static inline void Mf_free( struct Matchfinder * const mf ) + { + free( mf->prev_positions ); + free( mf->buffer ); + } + +static inline uint8_t Mf_peek( const struct Matchfinder * const mf, const int i ) + { return mf->buffer[mf->pos+i]; } + +static inline int Mf_available_bytes( const struct Matchfinder * const mf ) + { return mf->stream_pos - mf->pos; } + +static inline unsigned long long +Mf_data_position( const struct Matchfinder * const mf ) + { return mf->partial_data_pos + mf->pos; } + +static inline bool Mf_finished( const struct Matchfinder * const mf ) + { return mf->at_stream_end && mf->pos >= mf->stream_pos; } + +static inline const uint8_t * +Mf_ptr_to_current_pos( const struct Matchfinder * const mf ) + { return mf->buffer + mf->pos; } + +static inline void Mf_set_flushing( struct Matchfinder * const mf, + const bool flushing ) + { mf->at_stream_end = flushing; } + +static inline int Mf_free_bytes( const struct Matchfinder * const mf ) + { if( mf->at_stream_end ) return 0; return mf->buffer_size - mf->stream_pos; } + +static inline bool Mf_enough_available_bytes( const struct Matchfinder * const mf ) + { + return ( mf->pos + after_size <= mf->stream_pos || + ( mf->at_stream_end && mf->pos < mf->stream_pos ) ); + } + +static inline bool Mf_dec_pos( struct Matchfinder * const mf, + const int ahead ) + { + if( ahead < 0 || mf->pos < ahead ) return false; + mf->pos -= ahead; + mf->cyclic_pos -= ahead; + if( mf->cyclic_pos < 0 ) mf->cyclic_pos += mf->dictionary_size + 1; + return true; + } + +static inline int Mf_true_match_len( const struct Matchfinder * const mf, + const int index, const int distance, + int len_limit ) + { + const uint8_t * const data = mf->buffer + mf->pos + index; + int i = 0; + + if( index + len_limit > Mf_available_bytes( mf ) ) + len_limit = Mf_available_bytes( mf ) - index; + while( i < len_limit && data[i-distance] == data[i] ) ++i; + return i; + } + +static inline bool Mf_move_pos( struct Matchfinder * const mf ) + { + if( ++mf->cyclic_pos > mf->dictionary_size ) mf->cyclic_pos = 0; + if( ++mf->pos >= mf->pos_limit ) return Mf_normalize_pos( mf ); + return true; + } + +static void Mf_reset( struct Matchfinder * const mf ); +static int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs ); + + +enum { re_min_free_bytes = 2 * max_num_trials }; + +struct Range_encoder + { + struct Circular_buffer cb; + uint64_t low; + unsigned long long partial_member_pos; + uint32_t range; + int ff_count; + uint8_t cache; + }; + +static inline void Re_shift_low( struct Range_encoder * const renc ) + { + const bool carry = ( renc->low > 0xFFFFFFFFU ); + if( carry || renc->low < 0xFF000000U ) + { + Cb_put_byte( &renc->cb, renc->cache + carry ); + for( ; renc->ff_count > 0; --renc->ff_count ) + Cb_put_byte( &renc->cb, 0xFF + carry ); + renc->cache = renc->low >> 24; + } + else ++renc->ff_count; + renc->low = ( renc->low & 0x00FFFFFFU ) << 8; + } + +static inline bool Re_init( struct Range_encoder * const renc ) + { + if( !Cb_init( &renc->cb, 65536 + re_min_free_bytes ) ) return false; + renc->low = 0; + renc->partial_member_pos = 0; + renc->range = 0xFFFFFFFFU; + renc->ff_count = 0; + renc->cache = 0; + return true; + } + +static inline void Re_free( struct Range_encoder * const renc ) + { Cb_free( &renc->cb ); } + +static inline unsigned long long +Re_member_position( const struct Range_encoder * const renc ) + { return renc->partial_member_pos + Cb_used_bytes( &renc->cb ) + renc->ff_count; } + +static inline bool Re_enough_free_bytes( const struct Range_encoder * const renc ) + { return Cb_free_bytes( &renc->cb ) >= re_min_free_bytes; } + +static inline int Re_read_data( struct Range_encoder * const renc, + uint8_t * const out_buffer, const int out_size ) + { + const int size = Cb_read_data( &renc->cb, out_buffer, out_size ); + if( size > 0 ) renc->partial_member_pos += size; + return size; + } + +static inline void Re_flush( struct Range_encoder * const renc ) + { + int i; for( i = 0; i < 5; ++i ) Re_shift_low( renc ); + renc->low = 0; + renc->range = 0xFFFFFFFFU; + renc->ff_count = 0; + renc->cache = 0; + } + +static inline void Re_encode( struct Range_encoder * const renc, + const int symbol, const int num_bits ) + { + int i; + for( i = num_bits - 1; i >= 0; --i ) + { + renc->range >>= 1; + if( (symbol >> i) & 1 ) renc->low += renc->range; + if( renc->range <= 0x00FFFFFFU ) + { renc->range <<= 8; Re_shift_low( renc ); } + } + } + +static inline void Re_encode_bit( struct Range_encoder * const renc, + Bit_model * const probability, const int bit ) + { + const uint32_t bound = ( renc->range >> bit_model_total_bits ) * *probability; + if( !bit ) + { + renc->range = bound; + *probability += (bit_model_total - *probability) >> bit_model_move_bits; + } + else + { + renc->low += bound; + renc->range -= bound; + *probability -= *probability >> bit_model_move_bits; + } + if( renc->range <= 0x00FFFFFFU ) + { renc->range <<= 8; Re_shift_low( renc ); } + } + +static inline void Re_encode_tree( struct Range_encoder * const renc, + Bit_model bm[], const int symbol, const int num_bits ) + { + int mask = ( 1 << ( num_bits - 1 ) ); + int model = 1; + int i; + for( i = num_bits; i > 0; --i, mask >>= 1 ) + { + const int bit = ( symbol & mask ); + Re_encode_bit( renc, &bm[model], bit ); + model <<= 1; + if( bit ) model |= 1; + } + } + +static inline void Re_encode_tree_reversed( struct Range_encoder * const renc, + Bit_model bm[], int symbol, const int num_bits ) + { + int model = 1; + int i; + for( i = num_bits; i > 0; --i ) + { + const int bit = symbol & 1; + Re_encode_bit( renc, &bm[model], bit ); + model = ( model << 1 ) | bit; + symbol >>= 1; + } + } + +static inline void Re_encode_matched( struct Range_encoder * const renc, + Bit_model bm[], unsigned symbol, + unsigned match_byte ) + { + unsigned mask = 0x100; + symbol |= 0x100; + + do { + unsigned bit, match_bit; + match_byte <<= 1; + match_bit = match_byte & mask; + symbol <<= 1; + bit = symbol & 0x100; + Re_encode_bit( renc, &bm[match_bit+(symbol>>9)+mask], bit ); + mask &= ~(match_byte ^ symbol); /* if( match_bit != bit ) mask = 0; */ + } + while( symbol < 0x10000 ); + } + + +struct Len_encoder + { + Bit_model choice1; + Bit_model choice2; + Bit_model bm_low[pos_states][len_low_symbols]; + Bit_model bm_mid[pos_states][len_mid_symbols]; + Bit_model bm_high[len_high_symbols]; + int prices[pos_states][max_len_symbols]; + int len_symbols; + int counters[pos_states]; + }; + +static void Lee_update_prices( struct Len_encoder * const len_encoder, + const int pos_state ) + { + int * const pps = len_encoder->prices[pos_state]; + int tmp = price0( len_encoder->choice1 ); + int len = 0; + for( ; len < len_low_symbols && len < len_encoder->len_symbols; ++len ) + pps[len] = tmp + + price_symbol( len_encoder->bm_low[pos_state], len, len_low_bits ); + tmp = price1( len_encoder->choice1 ); + for( ; len < len_low_symbols + len_mid_symbols && len < len_encoder->len_symbols; ++len ) + pps[len] = tmp + price0( len_encoder->choice2 ) + + price_symbol( len_encoder->bm_mid[pos_state], len - len_low_symbols, len_mid_bits ); + for( ; len < len_encoder->len_symbols; ++len ) + /* using 4 slots per value makes "Lee_price" faster */ + len_encoder->prices[3][len] = len_encoder->prices[2][len] = + len_encoder->prices[1][len] = len_encoder->prices[0][len] = + tmp + price1( len_encoder->choice2 ) + + price_symbol( len_encoder->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); + len_encoder->counters[pos_state] = len_encoder->len_symbols; + } + +static void Lee_init( struct Len_encoder * const len_encoder, + const int match_len_limit ) + { + int i; + Bm_init( &len_encoder->choice1 ); + Bm_init( &len_encoder->choice2 ); + Bm_array_init( len_encoder->bm_low[0], pos_states * len_low_symbols ); + Bm_array_init( len_encoder->bm_mid[0], pos_states * len_mid_symbols ); + Bm_array_init( len_encoder->bm_high, len_high_symbols ); + len_encoder->len_symbols = match_len_limit + 1 - min_match_len; + for( i = 0; i < pos_states; ++i ) Lee_update_prices( len_encoder, i ); + } + +static void Lee_encode( struct Len_encoder * const len_encoder, + struct Range_encoder * const renc, + int symbol, const int pos_state ); + +static inline int Lee_price( const struct Len_encoder * const len_encoder, + const int symbol, const int pos_state ) + { return len_encoder->prices[pos_state][symbol - min_match_len]; } + + +enum { infinite_price = 0x0FFFFFFF, + max_marker_size = 16, + num_rep_distances = 4, /* must be 4 */ + single_step_trial = -2, + dual_step_trial = -1 }; + +struct Trial + { + State state; + int price; /* dual use var; cumulative price, match length */ + int dis; /* rep index or match distance */ + int prev_index; /* index of prev trial in trials[] */ + int dis2; + int prev_index2; /* -2 trial is single step */ + /* -1 literal + rep0 */ + /* >= 0 rep or match + literal + rep0 */ + int reps[num_rep_distances]; + }; + +static inline void Tr_update( struct Trial * const trial, const int pr, + const int d, const int p_i ) + { + if( pr < trial->price ) + { + trial->price = pr; + trial->dis = d; trial->prev_index = p_i; + trial->prev_index2 = single_step_trial; + } + } + +static inline void Tr_update2( struct Trial * const trial, const int pr, + const int d, const int p_i ) + { + if( pr < trial->price ) + { + trial->price = pr; + trial->dis = d; trial->prev_index = p_i; + trial->prev_index2 = dual_step_trial; + } + } + +static inline void Tr_update3( struct Trial * const trial, const int pr, + const int d, const int p_i, + const int d2, const int p_i2 ) + { + if( pr < trial->price ) + { + trial->price = pr; + trial->dis = d; trial->prev_index = p_i; + trial->dis2 = d2; trial->prev_index2 = p_i2; + } + } + + +struct LZ_encoder + { + unsigned long long member_size_limit; + int pending_num_pairs; + uint32_t crc; + + Bit_model bm_literal[1<member_finished && + !Cb_used_bytes( &encoder->range_encoder.cb ) ); + } + + +static inline void LZe_free( struct LZ_encoder * const encoder ) + { Re_free( &encoder->range_encoder ); } + +static inline unsigned LZe_crc( const struct LZ_encoder * const encoder ) + { return encoder->crc ^ 0xFFFFFFFFU; } + + /* move-to-front dis in/into reps */ +static inline void LZe_mtf_reps( const int dis, int reps[num_rep_distances] ) + { + int i; + if( dis >= num_rep_distances ) + { + for( i = num_rep_distances - 1; i > 0; --i ) reps[i] = reps[i-1]; + reps[0] = dis - num_rep_distances; + } + else if( dis > 0 ) + { + const int distance = reps[dis]; + for( i = dis; i > 0; --i ) reps[i] = reps[i-1]; + reps[0] = distance; + } + } + +static inline int LZe_price_rep_len1( const struct LZ_encoder * const encoder, + const State state, const int pos_state ) + { + return price0( encoder->bm_rep0[state] ) + + price0( encoder->bm_len[state][pos_state] ); + } + +static inline int LZe_price_rep( const struct LZ_encoder * const encoder, + const int rep, + const State state, const int pos_state ) + { + int price; + if( rep == 0 ) return price0( encoder->bm_rep0[state] ) + + price1( encoder->bm_len[state][pos_state] ); + price = price1( encoder->bm_rep0[state] ); + if( rep == 1 ) + price += price0( encoder->bm_rep1[state] ); + else + { + price += price1( encoder->bm_rep1[state] ); + price += price_bit( encoder->bm_rep2[state], rep - 2 ); + } + return price; + } + +static inline int LZe_price_rep0_len( const struct LZ_encoder * const encoder, + const int len, + const State state, const int pos_state ) + { + return LZe_price_rep( encoder, 0, state, pos_state ) + + Lee_price( &encoder->rep_match_len_encoder, len, pos_state ); + } + +static inline int LZe_price_dis( const struct LZ_encoder * const encoder, + const int dis, const int dis_state ) + { + if( dis < modeled_distances ) + return encoder->dis_prices[dis_state][dis]; + else + return encoder->dis_slot_prices[dis_state][get_slot( dis )] + + encoder->align_prices[dis & (dis_align_size - 1)]; + } + +static inline int LZe_price_pair( const struct LZ_encoder * const encoder, + const int dis, const int len, + const int pos_state ) + { + return Lee_price( &encoder->len_encoder, len, pos_state ) + + LZe_price_dis( encoder, dis, get_dis_state( len ) ); + } + +static inline int LZe_price_literal( const struct LZ_encoder * const encoder, + uint8_t prev_byte, uint8_t symbol ) + { return price_symbol( encoder->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); } + +static inline int LZe_price_matched( const struct LZ_encoder * const encoder, + uint8_t prev_byte, uint8_t symbol, + uint8_t match_byte ) + { return price_matched( encoder->bm_literal[get_lit_state(prev_byte)], + symbol, match_byte ); } + +static inline void LZe_encode_literal( struct LZ_encoder * const encoder, + uint8_t prev_byte, uint8_t symbol ) + { Re_encode_tree( &encoder->range_encoder, + encoder->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); } + +static inline void LZe_encode_matched( struct LZ_encoder * const encoder, + uint8_t prev_byte, uint8_t symbol, + uint8_t match_byte ) + { Re_encode_matched( &encoder->range_encoder, + encoder->bm_literal[get_lit_state(prev_byte)], + symbol, match_byte ); } + +static inline void LZe_encode_pair( struct LZ_encoder * const encoder, + const uint32_t dis, const int len, + const int pos_state ) + { + const int dis_slot = get_slot( dis ); + Lee_encode( &encoder->len_encoder, &encoder->range_encoder, len, pos_state ); + Re_encode_tree( &encoder->range_encoder, + encoder->bm_dis_slot[get_dis_state(len)], + dis_slot, dis_slot_bits ); + + if( dis_slot >= start_dis_model ) + { + const int direct_bits = ( dis_slot >> 1 ) - 1; + const uint32_t base = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + const uint32_t direct_dis = dis - base; + + if( dis_slot < end_dis_model ) + Re_encode_tree_reversed( &encoder->range_encoder, + encoder->bm_dis + base - dis_slot - 1, + direct_dis, direct_bits ); + else + { + Re_encode( &encoder->range_encoder, direct_dis >> dis_align_bits, + direct_bits - dis_align_bits ); + Re_encode_tree_reversed( &encoder->range_encoder, encoder->bm_align, + direct_dis, dis_align_bits ); + --encoder->align_price_count; + } + } + } + +static inline int LZe_read_match_distances( struct LZ_encoder * const encoder ) + { + const int num_pairs = + Mf_get_match_pairs( encoder->matchfinder, encoder->pairs ); + if( num_pairs > 0 ) + { + int len = encoder->pairs[num_pairs-1].len; + if( len == encoder->matchfinder->match_len_limit && len < max_match_len ) + { + len += Mf_true_match_len( encoder->matchfinder, len, + encoder->pairs[num_pairs-1].dis + 1, + max_match_len - len ); + encoder->pairs[num_pairs-1].len = len; + } + } + return num_pairs; + } + +static inline bool LZe_move_pos( struct LZ_encoder * const encoder, int n ) + { + if( --n >= 0 && !Mf_move_pos( encoder->matchfinder ) ) return false; + while( --n >= 0 ) + { + Mf_get_match_pairs( encoder->matchfinder, 0 ); + if( !Mf_move_pos( encoder->matchfinder ) ) return false; + } + return true; + } + +static inline void LZe_backward( struct LZ_encoder * const encoder, int cur ) + { + int * const dis = &encoder->trials[cur].dis; + while( cur > 0 ) + { + const int prev_index = encoder->trials[cur].prev_index; + struct Trial * const prev_trial = &encoder->trials[prev_index]; + + if( encoder->trials[cur].prev_index2 != single_step_trial ) + { + prev_trial->dis = -1; + prev_trial->prev_index = prev_index - 1; + prev_trial->prev_index2 = single_step_trial; + if( encoder->trials[cur].prev_index2 >= 0 ) + { + struct Trial * const prev_trial2 = &encoder->trials[prev_index-1]; + prev_trial2->dis = encoder->trials[cur].dis2; + prev_trial2->prev_index = encoder->trials[cur].prev_index2; + prev_trial2->prev_index2 = single_step_trial; + } + } + prev_trial->price = cur - prev_index; /* len */ + cur = *dis; *dis = prev_trial->dis; prev_trial->dis = cur; + cur = prev_index; + } + } diff --git a/lzcheck.c b/lzcheck.c index 19a1887..6a1eed9 100644 --- a/lzcheck.c +++ b/lzcheck.c @@ -1,5 +1,5 @@ /* Lzcheck - A test program for the lzlib library - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute and modify it. @@ -23,20 +23,11 @@ #include "lzlib.h" -#ifndef LLONG_MAX -#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL -#endif -#ifndef LLONG_MIN -#define LLONG_MIN (-LLONG_MAX - 1LL) -#endif -#ifndef ULLONG_MAX -#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL -#endif - #ifndef min #define min(x,y) ((x) <= (y) ? (x) : (y)) #endif + enum { buffer_size = 32768 }; uint8_t in_buffer[buffer_size]; uint8_t mid_buffer[buffer_size]; @@ -45,13 +36,21 @@ uint8_t out_buffer[buffer_size]; int main( const int argc, const char * const argv[] ) { + const int dictionary_size = 1 << 20; + const int match_len_limit = 36; + const unsigned long long member_size = INT64_MAX; + struct LZ_Encoder * encoder; + struct LZ_Decoder * decoder; + FILE * file; + int retval = 0; + if( argc < 2 ) { fprintf( stderr, "Usage: lzcheck filename.txt\n" ); return 1; } - FILE *file = fopen( argv[1], "rb" ); + file = fopen( argv[1], "rb" ); if( !file ) { fprintf( stderr, "lzcheck: Can't open file '%s' for reading\n", argv[1] ); @@ -59,11 +58,7 @@ int main( const int argc, const char * const argv[] ) } /* fprintf( stderr, "lzcheck: Testing file '%s'\n", argv[1] ); */ - const int dictionary_size = 1 << 20; - const int match_len_limit = 36; - const long long member_size = LLONG_MAX; - struct LZ_Encoder * const encoder = - LZ_compress_open( dictionary_size, match_len_limit, member_size ); + encoder = LZ_compress_open( dictionary_size, match_len_limit, member_size ); if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) { const bool mem_error = ( LZ_compress_errno( encoder ) == LZ_mem_error ); @@ -77,7 +72,7 @@ int main( const int argc, const char * const argv[] ) return 3; } - struct LZ_Decoder * const decoder = LZ_decompress_open(); + decoder = LZ_decompress_open(); if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) { LZ_decompress_close( decoder ); @@ -85,7 +80,6 @@ int main( const int argc, const char * const argv[] ) return 1; } - int retval = 0; while( retval <= 1 ) { int i, l, r; @@ -94,11 +88,12 @@ int main( const int argc, const char * const argv[] ) for( l = 0, r = 1; r <= read_size; l = r, ++r ) { + int in_size, mid_size, out_size; while( r < read_size && in_buffer[r-1] != '\n' ) ++r; - const int in_size = LZ_compress_write( encoder, in_buffer + l, r - l ); + in_size = LZ_compress_write( encoder, in_buffer + l, r - l ); if( in_size < r - l ) r = l + in_size; LZ_compress_sync_flush( encoder ); - const int mid_size = LZ_compress_read( encoder, mid_buffer, buffer_size ); + mid_size = LZ_compress_read( encoder, mid_buffer, buffer_size ); if( mid_size < 0 ) { fprintf( stderr, "lzcheck: LZ_compress_read error: %s.\n", @@ -106,7 +101,7 @@ int main( const int argc, const char * const argv[] ) retval = 3; break; } LZ_decompress_write( decoder, mid_buffer, mid_size ); - const int out_size = LZ_decompress_read( decoder, out_buffer, buffer_size ); + out_size = LZ_decompress_read( decoder, out_buffer, buffer_size ); if( out_size < 0 ) { fprintf( stderr, "lzcheck: LZ_decompress_read error: %s.\n", @@ -146,22 +141,22 @@ int main( const int argc, const char * const argv[] ) while( retval <= 1 ) { - int i, l, r; + int i, l, r, size; const int read_size = fread( in_buffer, 1, buffer_size / 2, file ); if( read_size <= 0 ) break; /* end of file */ for( l = 0, r = 1; r <= read_size; l = r, ++r ) { + int leading_garbage, in_size, mid_size, out_size; while( r < read_size && in_buffer[r-1] != '\n' ) ++r; - const int leading_garbage = (l == 0) ? min( r, read_size / 2 ) : 0; - const int in_size = LZ_compress_write( encoder, in_buffer + l, r - l ); + leading_garbage = (l == 0) ? min( r, read_size / 2 ) : 0; + in_size = LZ_compress_write( encoder, in_buffer + l, r - l ); if( in_size < r - l ) r = l + in_size; LZ_compress_sync_flush( encoder ); if( leading_garbage ) memset( mid_buffer, in_buffer[0], leading_garbage ); - const int mid_size = LZ_compress_read( encoder, - mid_buffer + leading_garbage, - buffer_size - leading_garbage ); + mid_size = LZ_compress_read( encoder, mid_buffer + leading_garbage, + buffer_size - leading_garbage ); if( mid_size < 0 ) { fprintf( stderr, "lzcheck: LZ_compress_read error: %s.\n", @@ -169,7 +164,7 @@ int main( const int argc, const char * const argv[] ) retval = 3; break; } LZ_decompress_write( decoder, mid_buffer, mid_size + leading_garbage ); - int out_size = LZ_decompress_read( decoder, out_buffer, buffer_size ); + out_size = LZ_decompress_read( decoder, out_buffer, buffer_size ); if( out_size < 0 ) { if( LZ_decompress_errno( decoder ) == LZ_header_error || @@ -213,7 +208,7 @@ int main( const int argc, const char * const argv[] ) retval = 3; break; } - const int size = min( 100, read_size ); + size = min( 100, read_size ); if( LZ_compress_write( encoder, in_buffer, size ) != size || LZ_compress_finish( encoder ) < 0 || LZ_decompress_write( decoder, mid_buffer, LZ_compress_read( encoder, mid_buffer, buffer_size ) ) < 0 || diff --git a/lzlib.c b/lzlib.c index 3ee8ce3..1046bf7 100644 --- a/lzlib.c +++ b/lzlib.c @@ -1,5 +1,5 @@ /* Lzlib - A compression library for lzip files - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,15 +32,17 @@ #include "lzlib.h" #include "clzip.h" -#include "tables.c" +#include "cbuffer.c" +#include "decoder.h" #include "decoder.c" +#include "encoder.h" #include "encoder.c" struct LZ_Encoder { - long long partial_in_size; - long long partial_out_size; + unsigned long long partial_in_size; + unsigned long long partial_out_size; struct Matchfinder * matchfinder; struct LZ_encoder * lz_encoder; enum LZ_Errno lz_errno; @@ -66,8 +68,8 @@ static void LZ_Encoder_init( struct LZ_Encoder * const e, struct LZ_Decoder { - long long partial_in_size; - long long partial_out_size; + unsigned long long partial_in_size; + unsigned long long partial_out_size; struct Range_decoder * rdec; struct LZ_decoder * lz_decoder; enum LZ_Errno lz_errno; @@ -142,36 +144,42 @@ int LZ_max_match_len_limit( void ) { return max_match_len; } struct LZ_Encoder * LZ_compress_open( const int dictionary_size, const int match_len_limit, - const long long member_size ) + const unsigned long long member_size ) { File_header header; - Fh_set_magic( header ); const bool error = ( !Fh_set_dictionary_size( header, dictionary_size ) || match_len_limit < min_match_len_limit || - match_len_limit > max_match_len ); + match_len_limit > max_match_len || + member_size < min_dictionary_size ); struct LZ_Encoder * const e = (struct LZ_Encoder *)malloc( sizeof (struct LZ_Encoder) ); if( !e ) return 0; + Fh_set_magic( header ); LZ_Encoder_init( e, header ); if( error ) e->lz_errno = LZ_bad_argument; else { e->matchfinder = (struct Matchfinder *)malloc( sizeof (struct Matchfinder) ); - e->lz_encoder = (struct LZ_encoder *)malloc( sizeof (struct LZ_encoder) ); - if( !e->matchfinder || !e->lz_encoder || - !Mf_init( e->matchfinder, - Fh_get_dictionary_size( header ), match_len_limit ) || - !LZe_init( e->lz_encoder, e->matchfinder, header, member_size ) ) + if( e->matchfinder ) { - if( e->matchfinder ) - { Mf_free( e->matchfinder ); free( e->matchfinder ); e->matchfinder = 0; } + e->lz_encoder = (struct LZ_encoder *)malloc( sizeof (struct LZ_encoder) ); if( e->lz_encoder ) - { LZe_free( e->lz_encoder ); free( e->lz_encoder ); e->lz_encoder = 0; } - e->lz_errno = LZ_mem_error; + { + if( Mf_init( e->matchfinder, + Fh_get_dictionary_size( header ), match_len_limit ) ) + { + if( LZe_init( e->lz_encoder, e->matchfinder, header, member_size ) ) + return e; + Mf_free( e->matchfinder ); + } + free( e->lz_encoder ); e->lz_encoder = 0; + } + free( e->matchfinder ); e->matchfinder = 0; } + e->lz_errno = LZ_mem_error; } - if( e->lz_errno != LZ_ok ) e->fatal = true; + e->fatal = true; return e; } @@ -193,16 +201,23 @@ int LZ_compress_finish( struct LZ_Encoder * const e ) if( !verify_encoder( e ) || e->fatal ) return -1; Mf_set_flushing( e->matchfinder, true ); e->flush_pending = 0; + /* if (open --> write --> finish) use same dictionary size as lzip. */ + /* this does not save any memory. */ + if( Mf_data_position( e->matchfinder ) == 0 && + LZ_compress_total_out_size( e ) == Fh_size ) + Mf_adjust_distionary_size( e->matchfinder ); return 0; } int LZ_compress_restart_member( struct LZ_Encoder * const e, - const long long member_size ) + const unsigned long long member_size ) { if( !verify_encoder( e ) || e->fatal ) return -1; if( !LZe_member_finished( e->lz_encoder ) ) { e->lz_errno = LZ_sequence_error; return -1; } + if( member_size < min_dictionary_size ) + { e->lz_errno = LZ_bad_argument; return -1; } e->partial_in_size += Mf_data_position( e->matchfinder ); e->partial_out_size += Re_member_position( &e->lz_encoder->range_encoder ); @@ -290,30 +305,30 @@ int LZ_compress_member_finished( struct LZ_Encoder * const e ) } -long long LZ_compress_data_position( struct LZ_Encoder * const e ) +unsigned long long LZ_compress_data_position( struct LZ_Encoder * const e ) { - if( !verify_encoder( e ) ) return -1; + if( !verify_encoder( e ) ) return 0; return Mf_data_position( e->matchfinder ); } -long long LZ_compress_member_position( struct LZ_Encoder * const e ) +unsigned long long LZ_compress_member_position( struct LZ_Encoder * const e ) { - if( !verify_encoder( e ) ) return -1; + if( !verify_encoder( e ) ) return 0; return Re_member_position( &e->lz_encoder->range_encoder ); } -long long LZ_compress_total_in_size( struct LZ_Encoder * const e ) +unsigned long long LZ_compress_total_in_size( struct LZ_Encoder * const e ) { - if( !verify_encoder( e ) ) return -1; + if( !verify_encoder( e ) ) return 0; return e->partial_in_size + Mf_data_position( e->matchfinder ); } -long long LZ_compress_total_out_size( struct LZ_Encoder * const e ) +unsigned long long LZ_compress_total_out_size( struct LZ_Encoder * const e ) { - if( !verify_encoder( e ) ) return -1; + if( !verify_encoder( e ) ) return 0; return e->partial_out_size + Re_member_position( &e->lz_encoder->range_encoder ); } @@ -393,11 +408,13 @@ int LZ_decompress_sync_to_member( struct LZ_Decoder * const d ) int LZ_decompress_read( struct LZ_Decoder * const d, uint8_t * const buffer, const int size ) { + int result; if( !verify_decoder( d ) || d->fatal ) return -1; if( d->seeking ) return 0; + if( d->lz_decoder && LZd_member_finished( d->lz_decoder ) ) { - d->partial_in_size += d->rdec->member_position ; + d->partial_in_size += d->rdec->member_position; d->partial_out_size += LZd_data_position( d->lz_decoder ); LZd_free( d->lz_decoder ); free( d->lz_decoder ); d->lz_decoder = 0; } @@ -406,8 +423,13 @@ int LZ_decompress_read( struct LZ_Decoder * const d, if( Rd_available_bytes( d->rdec ) < 5 + Fh_size ) { if( !d->rdec->at_stream_end || Rd_finished( d->rdec ) ) return 0; - Rd_purge( d->rdec ); /* remove trailing garbage */ - d->lz_errno = LZ_header_error; + /* set position at EOF */ + d->partial_in_size += Rd_available_bytes( d->rdec ); + if( Rd_available_bytes( d->rdec ) <= Fh_size || + !Rd_read_header( d->rdec, d->member_header ) ) + d->lz_errno = LZ_header_error; + else + d->lz_errno = LZ_unexpected_eof; d->fatal = true; return -1; } @@ -427,7 +449,7 @@ int LZ_decompress_read( struct LZ_Decoder * const d, return -1; } } - const int result = LZd_decode_member( d->lz_decoder ); + result = LZd_decode_member( d->lz_decoder ); if( result != 0 ) { if( result == 2 ) d->lz_errno = LZ_unexpected_eof; @@ -442,13 +464,16 @@ int LZ_decompress_read( struct LZ_Decoder * const d, int LZ_decompress_write( struct LZ_Decoder * const d, const uint8_t * const buffer, const int size ) { + int result; if( !verify_decoder( d ) || d->fatal ) return -1; - int result = Rd_write_data( d->rdec, buffer, size ); + + result = Rd_write_data( d->rdec, buffer, size ); while( d->seeking ) { + int size2; if( Rd_find_header( d->rdec ) ) d->seeking = false; if( result >= size ) break; - const int size2 = Rd_write_data( d->rdec, buffer + result, size - result ); + size2 = Rd_write_data( d->rdec, buffer + result, size - result ); if( size2 > 0 ) result += size2; else break; } @@ -499,44 +524,42 @@ int LZ_decompress_dictionary_size( struct LZ_Decoder * const d ) } -unsigned int LZ_decompress_data_crc( struct LZ_Decoder * const d ) +unsigned LZ_decompress_data_crc( struct LZ_Decoder * const d ) { if( verify_decoder( d ) && d->lz_decoder ) return LZd_crc( d->lz_decoder ); - else return 0; + return 0; } -long long LZ_decompress_data_position( struct LZ_Decoder * const d ) +unsigned long long LZ_decompress_data_position( struct LZ_Decoder * const d ) { - if( !verify_decoder( d ) ) return -1; - if( d->lz_decoder ) + if( verify_decoder( d ) && d->lz_decoder ) return LZd_data_position( d->lz_decoder ); - else return 0; + return 0; } -long long LZ_decompress_member_position( struct LZ_Decoder * const d ) +unsigned long long LZ_decompress_member_position( struct LZ_Decoder * const d ) { - if( !verify_decoder( d ) ) return -1; - if( d->lz_decoder ) - return d->rdec->member_position ; - else return 0; + if( verify_decoder( d ) && d->lz_decoder ) + return d->rdec->member_position; + return 0; } -long long LZ_decompress_total_in_size( struct LZ_Decoder * const d ) +unsigned long long LZ_decompress_total_in_size( struct LZ_Decoder * const d ) { - if( !verify_decoder( d ) ) return -1; + if( !verify_decoder( d ) ) return 0; if( d->lz_decoder ) - return d->partial_in_size + d->rdec->member_position ; + return d->partial_in_size + d->rdec->member_position; return d->partial_in_size; } -long long LZ_decompress_total_out_size( struct LZ_Decoder * const d ) +unsigned long long LZ_decompress_total_out_size( struct LZ_Decoder * const d ) { - if( !verify_decoder( d ) ) return -1; + if( !verify_decoder( d ) ) return 0; if( d->lz_decoder ) return d->partial_out_size + LZd_data_position( d->lz_decoder ); return d->partial_out_size; diff --git a/lzlib.h b/lzlib.h index 1413322..1da08fa 100644 --- a/lzlib.h +++ b/lzlib.h @@ -1,5 +1,5 @@ /* Lzlib - A compression library for lzip files - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,7 +29,7 @@ extern "C" { #endif -static const char * const LZ_version_string = "1.3"; +static const char * const LZ_version_string = "1.4-rc2"; enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error, LZ_sequence_error, LZ_header_error, LZ_unexpected_eof, @@ -53,12 +53,12 @@ struct LZ_Encoder; struct LZ_Encoder * LZ_compress_open( const int dictionary_size, const int match_len_limit, - const long long member_size ); + const unsigned long long member_size ); int LZ_compress_close( struct LZ_Encoder * const encoder ); int LZ_compress_finish( struct LZ_Encoder * const encoder ); int LZ_compress_restart_member( struct LZ_Encoder * const encoder, - const long long member_size ); + const unsigned long long member_size ); int LZ_compress_sync_flush( struct LZ_Encoder * const encoder ); int LZ_compress_read( struct LZ_Encoder * const encoder, @@ -71,10 +71,10 @@ enum LZ_Errno LZ_compress_errno( struct LZ_Encoder * const encoder ); int LZ_compress_finished( struct LZ_Encoder * const encoder ); int LZ_compress_member_finished( struct LZ_Encoder * const encoder ); -long long LZ_compress_data_position( struct LZ_Encoder * const encoder ); -long long LZ_compress_member_position( struct LZ_Encoder * const encoder ); -long long LZ_compress_total_in_size( struct LZ_Encoder * const encoder ); -long long LZ_compress_total_out_size( struct LZ_Encoder * const encoder ); +unsigned long long LZ_compress_data_position( struct LZ_Encoder * const encoder ); +unsigned long long LZ_compress_member_position( struct LZ_Encoder * const encoder ); +unsigned long long LZ_compress_total_in_size( struct LZ_Encoder * const encoder ); +unsigned long long LZ_compress_total_out_size( struct LZ_Encoder * const encoder ); /*--------------------- Decompression Functions ---------------------*/ @@ -100,11 +100,12 @@ int LZ_decompress_member_finished( struct LZ_Decoder * const decoder ); int LZ_decompress_member_version( struct LZ_Decoder * const decoder ); int LZ_decompress_dictionary_size( struct LZ_Decoder * const decoder ); -unsigned int LZ_decompress_data_crc( struct LZ_Decoder * const decoder ); -long long LZ_decompress_data_position( struct LZ_Decoder * const decoder ); -long long LZ_decompress_member_position( struct LZ_Decoder * const decoder ); -long long LZ_decompress_total_in_size( struct LZ_Decoder * const decoder ); -long long LZ_decompress_total_out_size( struct LZ_Decoder * const decoder ); +unsigned LZ_decompress_data_crc( struct LZ_Decoder * const decoder ); + +unsigned long long LZ_decompress_data_position( struct LZ_Decoder * const decoder ); +unsigned long long LZ_decompress_member_position( struct LZ_Decoder * const decoder ); +unsigned long long LZ_decompress_total_in_size( struct LZ_Decoder * const decoder ); +unsigned long long LZ_decompress_total_out_size( struct LZ_Decoder * const decoder ); #ifdef __cplusplus } diff --git a/main.c b/main.c index fe31871..9e69828 100644 --- a/main.c +++ b/main.c @@ -1,5 +1,5 @@ /* Minilzip - A test program for the lzlib library - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,6 +35,21 @@ #include #include #include +#if defined(__MSVCRT__) +#include +#define fchmod(x,y) 0 +#define fchown(x,y,z) 0 +#define strtoull strtoul +#define SIGHUP SIGTERM +#define S_ISSOCK(x) 0 +#define S_IRGRP 0 +#define S_IWGRP 0 +#define S_IROTH 0 +#define S_IWOTH 0 +#endif +#if defined(__OS2__) +#include +#endif #include "carg_parser.h" #include "lzlib.h" @@ -43,26 +58,21 @@ #error "Environments where CHAR_BIT != 8 are not supported." #endif -#ifndef LLONG_MAX -#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL +#ifndef max + #define max(x,y) ((x) >= (y) ? (x) : (y)) #endif -#ifndef LLONG_MIN -#define LLONG_MIN (-LLONG_MAX - 1LL) -#endif -#ifndef ULLONG_MAX -#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL -#endif - #ifndef min #define min(x,y) ((x) <= (y) ? (x) : (y)) #endif -long long int llabs( long long int number ); +void cleanup_and_fail( const int retval ); +void show_error( const char * const msg, const int errcode, const bool help ); +void internal_error( const char * const msg ); const char * const Program_name = "Minilzip"; const char * const program_name = "minilzip"; -const char * const program_year = "2012"; +const char * const program_year = "2013"; const char * invocation_name = 0; #ifdef O_BINARY @@ -83,6 +93,8 @@ struct Lzma_options }; enum Mode { m_compress, m_decompress, m_test }; +const unsigned long long max_member_size = 0x1000000000000000ULL; +const unsigned long long max_volume_size = 0x7FFFFFFFFFFFFFFFULL; char * output_filename = 0; int outfd = -1; @@ -101,31 +113,10 @@ struct Pretty_print bool first_post; }; -static void Pp_init( struct Pretty_print * const pp, - const char * const filenames[], const int num_filenames ) - { - unsigned int stdin_name_len; - int i; - pp->name = 0; - pp->stdin_name = "(stdin)"; - pp->longest_name = 0; - pp->first_post = false; - stdin_name_len = strlen( pp->stdin_name ); - - for( i = 0; i < num_filenames; ++i ) - { - const char * const s = filenames[i]; - const int len = ( !strcmp( s, "-" ) ? stdin_name_len : strlen( s ) ); - if( len > pp->longest_name ) pp->longest_name = len; - } - if( pp->longest_name == 0 ) pp->longest_name = stdin_name_len; - } - - static inline void Pp_set_name( struct Pretty_print * const pp, const char * const filename ) { - if( filename && filename[0] && strcmp( filename, "-" ) ) + if( filename && filename[0] && strcmp( filename, "-" ) != 0 ) pp->name = filename; else pp->name = pp->stdin_name; pp->first_post = true; @@ -201,56 +192,32 @@ static void show_version( void ) } -void show_error( const char * const msg, const int errcode, const bool help ) - { - if( verbosity >= 0 ) - { - if( msg && msg[0] ) - { - fprintf( stderr, "%s: %s", program_name, msg ); - if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) ); - fprintf( stderr, "\n" ); - } - if( help && invocation_name && invocation_name[0] ) - fprintf( stderr, "Try '%s --help' for more information.\n", - invocation_name ); - } - } - - -void internal_error( const char * const msg ) - { - if( verbosity >= 0 ) - fprintf( stderr, "%s: internal error: %s.\n", program_name, msg ); - exit( 3 ); - } - - -static const char * format_num( long long num ) +void show_header( struct LZ_Decoder * const decoder ) { const char * const prefix[8] = { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; - enum { buf_size = 16, factor = 1024 }; - static char buf[buf_size]; - const char *p = ""; + enum { factor = 1024 }; + const char * p = ""; + const char * np = " "; + unsigned num = LZ_decompress_dictionary_size( decoder ), i; bool exact = ( num % factor == 0 ); - int i; - for( i = 0; i < 8 && ( llabs( num ) > 9999 || - ( exact && llabs( num ) >= factor ) ); ++i ) - { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; } - snprintf( buf, buf_size, "%lld %s", num, p ); - return buf; + for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; + p = prefix[i]; np = ""; } + fprintf( stderr, "version %d, dictionary size %s%4u %sB. ", + LZ_decompress_member_version( decoder ), np, num, p ); } -static long long getnum( const char * const ptr, - const long long llimit, const long long ulimit ) +static unsigned long long getnum( const char * const ptr, + const unsigned long long llimit, + const unsigned long long ulimit ) { - long long result; - char *tail; + unsigned long long result; + char * tail; errno = 0; - result = strtoll( ptr, &tail, 0 ); + result = strtoull( ptr, &tail, 0 ); if( tail == ptr ) { show_error( "Bad or missing numerical argument.", 0, true ); @@ -260,8 +227,7 @@ static long long getnum( const char * const ptr, if( !errno && tail[0] ) { int factor = ( tail[1] == 'i' ) ? 1024 : 1000; - int exponent = 0; - int i; + int exponent = 0, i; bool bad_multiplier = false; switch( tail[0] ) { @@ -286,7 +252,7 @@ static long long getnum( const char * const ptr, } for( i = 0; i < exponent; ++i ) { - if( LLONG_MAX / factor >= llabs( result ) ) result *= factor; + if( ulimit / factor >= result ) result *= factor; else { errno = ERANGE; break; } } } @@ -302,7 +268,7 @@ static long long getnum( const char * const ptr, static int get_dict_size( const char * const arg ) { - char *tail; + char * tail; int bits = strtol( arg, &tail, 0 ); if( bits >= LZ_min_dictionary_bits() && bits <= LZ_max_dictionary_bits() && *tail == 0 ) @@ -311,6 +277,20 @@ static int get_dict_size( const char * const arg ) } +static int extension_index( const char * const name ) + { + int i; + for( i = 0; known_extensions[i].from; ++i ) + { + const char * const ext = known_extensions[i].from; + if( strlen( name ) > strlen( ext ) && + strncmp( name + strlen( name ) - strlen( ext ), ext, strlen( ext ) ) == 0 ) + return i; + } + return -1; + } + + static int open_instream( const char * const name, struct stat * const in_statsp, const enum Mode program_mode, const int eindex, const bool recompress, const bool to_stdout ) @@ -354,22 +334,6 @@ static int open_instream( const char * const name, struct stat * const in_statsp } -void cleanup_and_fail( const int retval ) - { - if( delete_output_on_interrupt ) - { - delete_output_on_interrupt = false; - if( verbosity >= 0 ) - fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n", - program_name, output_filename ); - if( outfd >= 0 ) { close( outfd ); outfd = -1; } - if( remove( output_filename ) != 0 && errno != ENOENT ) - show_error( "WARNING: deletion of output file (apparently) failed.", 0, false ); - } - exit( retval ); - } - - /* assure at least a minimum size for buffer 'buf' */ static void * resize_buffer( void * buf, const int min_size ) { @@ -394,20 +358,6 @@ static void set_c_outname( const char * const name, const bool multifile ) } -static int extension_index( const char * const name ) - { - int i; - for( i = 0; known_extensions[i].from; ++i ) - { - const char * const ext = known_extensions[i].from; - if( strlen( name ) > strlen( ext ) && - strncmp( name + strlen( name ) - strlen( ext ), ext, strlen( ext ) ) == 0 ) - return i; - } - return -1; - } - - static void set_d_outname( const char * const name, const int i ) { if( i >= 0 ) @@ -468,6 +418,22 @@ static bool check_tty( const int infd, const enum Mode program_mode ) } +void cleanup_and_fail( const int retval ) + { + if( delete_output_on_interrupt ) + { + delete_output_on_interrupt = false; + if( verbosity >= 0 ) + fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n", + program_name, output_filename ); + if( outfd >= 0 ) { close( outfd ); outfd = -1; } + if( remove( output_filename ) != 0 && errno != ENOENT ) + show_error( "WARNING: deletion of output file (apparently) failed.", 0, false ); + } + exit( retval ); + } + + /* Set permissions, owner and times. */ static void close_and_set_permissions( const struct stat * const in_statsp ) { @@ -503,13 +469,13 @@ int readblock( const int fd, uint8_t * const buf, const int size ) errno = 0; while( rest > 0 ) { - errno = 0; const int n = read( fd, buf + size - rest, rest ); if( n > 0 ) rest -= n; - else if( n == 0 ) break; + else if( n == 0 ) break; /* EOF */ else if( errno != EINTR && errno != EAGAIN ) break; + errno = 0; } - return ( rest > 0 ) ? size - rest : size; + return size - rest; } @@ -522,18 +488,18 @@ int writeblock( const int fd, const uint8_t * const buf, const int size ) errno = 0; while( rest > 0 ) { - errno = 0; const int n = write( fd, buf + size - rest, rest ); if( n > 0 ) rest -= n; else if( n < 0 && errno != EINTR && errno != EAGAIN ) break; + errno = 0; } - return ( rest > 0 ) ? size - rest : size; + return size - rest; } static bool next_filename( void ) { - const unsigned int len = strlen( known_extensions[0].from ); + const unsigned len = strlen( known_extensions[0].from ); int i, j; if( strlen( output_filename ) >= len + 5 ) /* "*00001.lz" */ @@ -546,19 +512,20 @@ static bool next_filename( void ) } -static int do_compress( struct LZ_Encoder * const encoder, const long long member_size, - const long long volume_size, const int infd, +static int do_compress( struct LZ_Encoder * const encoder, + const unsigned long long member_size, + const unsigned long long volume_size, const int infd, struct Pretty_print * const pp, const struct stat * const in_statsp ) { - long long partial_volume_size = 0; + unsigned long long partial_volume_size = 0; enum { buffer_size = 65536 }; uint8_t buffer[buffer_size]; if( verbosity >= 1 ) Pp_show_msg( pp, 0 ); while( true ) { - int in_size = 0; + int in_size = 0, out_size; while( LZ_compress_write_size( encoder ) > 0 ) { const int size = min( LZ_compress_write_size( encoder ), buffer_size ); @@ -574,7 +541,7 @@ static int do_compress( struct LZ_Encoder * const encoder, const long long membe /* else LZ_compress_sync_flush( encoder ); */ in_size += rd; } - const int out_size = LZ_compress_read( encoder, buffer, buffer_size ); + out_size = LZ_compress_read( encoder, buffer, buffer_size ); if( out_size < 0 ) { Pp_show_msg( pp, 0 ); @@ -595,22 +562,27 @@ static int do_compress( struct LZ_Encoder * const encoder, const long long membe else if( in_size == 0 ) internal_error( "library error (LZ_compress_read)" ); if( LZ_compress_member_finished( encoder ) ) { + unsigned long long size; if( LZ_compress_finished( encoder ) == 1 ) break; - partial_volume_size += LZ_compress_member_position( encoder ); - if( partial_volume_size >= volume_size - LZ_min_dictionary_size() ) + if( volume_size > 0 ) { - partial_volume_size = 0; - if( delete_output_on_interrupt ) + partial_volume_size += LZ_compress_member_position( encoder ); + if( partial_volume_size >= volume_size - LZ_min_dictionary_size() ) { - close_and_set_permissions( in_statsp ); - if( !next_filename() ) - { Pp_show_msg( pp, "Too many volume files" ); return 1; } - if( !open_outstream( true ) ) return 1; - delete_output_on_interrupt = true; + partial_volume_size = 0; + if( delete_output_on_interrupt ) + { + close_and_set_permissions( in_statsp ); + if( !next_filename() ) + { Pp_show_msg( pp, "Too many volume files" ); return 1; } + if( !open_outstream( true ) ) return 1; + delete_output_on_interrupt = true; + } } + size = min( member_size, volume_size - partial_volume_size ); } - const long long size = - min( member_size, volume_size - partial_volume_size ); + else + size = member_size; if( LZ_compress_restart_member( encoder, size ) < 0 ) { Pp_show_msg( pp, 0 ); @@ -624,13 +596,13 @@ static int do_compress( struct LZ_Encoder * const encoder, const long long membe if( verbosity >= 1 ) { - const long long in_size = LZ_compress_total_in_size( encoder ); - const long long out_size = LZ_compress_total_out_size( encoder ); - if( in_size <= 0 || out_size <= 0 ) + const unsigned long long in_size = LZ_compress_total_in_size( encoder ); + const unsigned long long out_size = LZ_compress_total_out_size( encoder ); + if( in_size == 0 || out_size == 0 ) fprintf( stderr, " no data compressed.\n" ); else fprintf( stderr, "%6.3f:1, %6.3f bits/byte, " - "%5.2f%% saved, %lld in, %lld out.\n", + "%5.2f%% saved, %llu in, %llu out.\n", (double)in_size / out_size, ( 8.0 * out_size ) / in_size, 100.0 * ( 1.0 - ( (double)out_size / in_size ) ), @@ -640,14 +612,16 @@ static int do_compress( struct LZ_Encoder * const encoder, const long long membe } -int compress( const long long member_size, const long long volume_size, - const struct Lzma_options * const encoder_options, const int infd, - struct Pretty_print * const pp, const struct stat * const in_statsp ) +int compress( const unsigned long long member_size, + const unsigned long long volume_size, + const struct Lzma_options * const encoder_options, + const int infd, struct Pretty_print * const pp, + const struct stat * const in_statsp ) { struct LZ_Encoder * const encoder = LZ_compress_open( encoder_options->dictionary_size, - encoder_options->match_len_limit, - min( member_size, volume_size ) ); + encoder_options->match_len_limit, ( volume_size > 0 ) ? + min( member_size, volume_size ) : member_size ); int retval; if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) @@ -675,6 +649,7 @@ int do_decompress( struct LZ_Decoder * const decoder, const int infd, for( first_member = true; ; ) { int in_size = min( LZ_decompress_write_size( decoder ), buffer_size ); + int out_size = 0; if( in_size > 0 ) { const int max_in_size = in_size; @@ -688,7 +663,6 @@ int do_decompress( struct LZ_Decoder * const decoder, const int infd, internal_error( "library error (LZ_decompress_write)" ); if( in_size < max_in_size ) LZ_decompress_finish( decoder ); } - int out_size = 0; while( true ) { const int rd = LZ_decompress_read( decoder, buffer, buffer_size ); @@ -706,35 +680,34 @@ int do_decompress( struct LZ_Decoder * const decoder, const int infd, } } else if( rd < 0 ) { out_size = rd; break; } - if( verbosity >= 1 && LZ_decompress_member_finished( decoder ) == 1 ) + if( LZ_decompress_member_finished( decoder ) == 1 ) { - const long long data_position = LZ_decompress_data_position( decoder ); - const long long member_size = LZ_decompress_member_position( decoder ); - Pp_show_msg( pp, 0 ); - if( verbosity >= 2 ) - fprintf( stderr, "version %d, dictionary size %7sB. ", - LZ_decompress_member_version( decoder ), - format_num( LZ_decompress_dictionary_size( decoder ) ) ); - if( verbosity >= 3 && data_position > 0 && member_size > 0 ) - fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", - (double)data_position / member_size, - ( 8.0 * member_size ) / data_position, - 100.0 * ( 1.0 - ( (double)member_size / data_position ) ) ); - if( verbosity >= 4 ) - fprintf( stderr, "data CRC %08X, data size %9lld, member size %8lld. ", - LZ_decompress_data_crc( decoder ), - data_position, member_size ); - if( testing ) fprintf( stderr, "ok\n" ); - else fprintf( stderr, "done\n" ); + if( verbosity >= 1 ) + { + const unsigned long long data_position = LZ_decompress_data_position( decoder ); + const unsigned long long member_size = LZ_decompress_member_position( decoder ); + Pp_show_msg( pp, 0 ); + if( verbosity >= 2 ) show_header( decoder ); + if( verbosity >= 3 && data_position > 0 && member_size > 0 ) + fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", + (double)data_position / member_size, + ( 8.0 * member_size ) / data_position, + 100.0 * ( 1.0 - ( (double)member_size / data_position ) ) ); + if( verbosity >= 4 ) + fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ", + LZ_decompress_data_crc( decoder ), + data_position, member_size ); + if( testing ) fprintf( stderr, "ok\n" ); + else fprintf( stderr, "done\n" ); + } + first_member = false; Pp_reset( pp ); } - if( LZ_decompress_member_finished( decoder ) == 1 ) - { first_member = false; Pp_reset( pp ); } if( rd <= 0 ) break; } - if( out_size < 0 ) + if( out_size < 0 || ( first_member && out_size == 0 ) ) { const enum LZ_Errno lz_errno = LZ_decompress_errno( decoder ); - if( lz_errno == LZ_header_error ) + if( lz_errno == LZ_header_error || ( first_member && out_size == 0 ) ) { if( !first_member ) break; /* trailing garbage */ Pp_show_msg( pp, "Error reading member header" ); @@ -745,18 +718,18 @@ int do_decompress( struct LZ_Decoder * const decoder, const int infd, Pp_show_msg( pp, "Not enough memory. Find a machine with more memory" ); return 1; } - Pp_show_msg( pp, 0 ); - if( lz_errno == LZ_unexpected_eof ) + if( verbosity >= 0 ) { - if( verbosity >= 0 ) - fprintf( stderr, "File ends unexpectedly at pos %lld\n", + Pp_show_msg( pp, 0 ); + if( lz_errno == LZ_unexpected_eof ) + fprintf( stderr, "File ends unexpectedly at pos %llu\n", LZ_decompress_total_in_size( decoder ) ); - return 2; + else + fprintf( stderr, "Decoder error at pos %llu: %s.\n", + LZ_decompress_total_in_size( decoder ), + LZ_strerror( LZ_decompress_errno( decoder ) ) ); } - if( verbosity >= 0 ) - fprintf( stderr, "LZ_decompress_read error: %s.\n", - LZ_strerror( LZ_decompress_errno( decoder ) ) ); - return 1; + return 2; } if( LZ_decompress_finished( decoder ) == 1 ) break; if( in_size == 0 && out_size == 0 ) @@ -786,7 +759,7 @@ int decompress( const int infd, struct Pretty_print * const pp, void signal_handler( int sig ) { - sig = 0; /* keep compiler happy */ + if( sig ) {} /* keep compiler happy */ show_error( "Control-C or similar caught, quitting.", 0, false ); cleanup_and_fail( 1 ); } @@ -800,6 +773,52 @@ static void set_signals( void ) } +static void Pp_init( struct Pretty_print * const pp, + const char * const filenames[], const int num_filenames ) + { + unsigned stdin_name_len; + int i; + pp->name = 0; + pp->stdin_name = "(stdin)"; + pp->longest_name = 0; + pp->first_post = false; + stdin_name_len = strlen( pp->stdin_name ); + + for( i = 0; i < num_filenames; ++i ) + { + const char * const s = filenames[i]; + const int len = ( (strcmp( s, "-" ) == 0) ? stdin_name_len : strlen( s ) ); + if( len > pp->longest_name ) pp->longest_name = len; + } + if( pp->longest_name == 0 ) pp->longest_name = stdin_name_len; + } + + +void show_error( const char * const msg, const int errcode, const bool help ) + { + if( verbosity >= 0 ) + { + if( msg && msg[0] ) + { + fprintf( stderr, "%s: %s", program_name, msg ); + if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) ); + fprintf( stderr, "\n" ); + } + if( help ) + fprintf( stderr, "Try '%s --help' for more information.\n", + invocation_name ); + } + } + + +void internal_error( const char * const msg ) + { + if( verbosity >= 0 ) + fprintf( stderr, "%s: internal error: %s.\n", program_name, msg ); + exit( 3 ); + } + + int main( const int argc, const char * const argv[] ) { /* Mapping from gzip/bzip2 style 1..9 compression modes @@ -817,8 +836,8 @@ int main( const int argc, const char * const argv[] ) { 3 << 23, 132 }, /* -8 */ { 1 << 25, 273 } }; /* -9 */ struct Lzma_options encoder_options = option_mapping[6]; /* default = "-6" */ - long long member_size = LLONG_MAX; - long long volume_size = LLONG_MAX; + unsigned long long member_size = max_member_size; + unsigned long long volume_size = 0; const char * input_filename = ""; const char * default_output_filename = ""; const char ** filenames = 0; @@ -867,9 +886,10 @@ int main( const int argc, const char * const argv[] ) struct Arg_parser parser; invocation_name = argv[0]; + if( LZ_version()[0] != LZ_version_string[0] ) internal_error( "bad library version" ); - if( strcmp( PROGVERSION, LZ_version_string ) ) + if( strcmp( PROGVERSION, LZ_version_string ) != 0 ) internal_error( "bad library version_string" ); if( !ap_init( &parser, argc, argv, options, 0 ) ) @@ -881,14 +901,14 @@ int main( const int argc, const char * const argv[] ) { const int code = ap_code( &parser, argind ); const char * const arg = ap_argument( &parser, argind ); - if( !code ) break; /* no more options */ + if( !code ) break; /* no more options */ switch( code ) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': encoder_options = option_mapping[code-'0']; break; - case 'b': member_size = getnum( arg, 100000, LLONG_MAX / 2 ); break; + case 'b': member_size = getnum( arg, 100000, max_member_size ); break; case 'c': to_stdout = true; break; case 'd': program_mode = m_decompress; break; case 'f': force = true; break; @@ -902,7 +922,7 @@ int main( const int argc, const char * const argv[] ) case 'q': verbosity = -1; break; case 's': encoder_options.dictionary_size = get_dict_size( arg ); break; - case 'S': volume_size = getnum( arg, 100000, LLONG_MAX / 2 ); break; + case 'S': volume_size = getnum( arg, 100000, max_volume_size ); break; case 't': program_mode = m_test; break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; @@ -910,29 +930,24 @@ int main( const int argc, const char * const argv[] ) } } /* end process options */ -#if defined(__OS2__) - _fsetmode( stdin, "b" ); - _fsetmode( stdout, "b" ); +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); #endif if( program_mode == m_test ) outfd = -1; - for( ; argind < ap_arguments( &parser ); ++argind ) - { - if( strcmp( ap_argument( &parser, argind ), "-" ) ) - filenames_given = true; - ++num_filenames; - filenames = resize_buffer( filenames, num_filenames * sizeof (char *) ); - filenames[num_filenames-1] = ap_argument( &parser, argind ); - } + num_filenames = max( 1, ap_arguments( &parser ) - argind ); + filenames = resize_buffer( filenames, num_filenames * sizeof filenames[0] ); + filenames[0] = "-"; - if( num_filenames == 0 ) + for( i = 0; argind + i < ap_arguments( &parser ); ++i ) { - ++num_filenames; - filenames = resize_buffer( filenames, sizeof (char *) ); - filenames[num_filenames-1] = "-"; + filenames[i] = ap_argument( &parser, argind + i ); + if( strcmp( filenames[i], "-" ) != 0 ) filenames_given = true; } + if( !to_stdout && program_mode != m_test && ( filenames_given || default_output_filename[0] ) ) set_signals(); @@ -947,7 +962,7 @@ int main( const int argc, const char * const argv[] ) const struct stat * in_statsp; output_filename[0] = 0; - if( !filenames[i][0] || !strcmp( filenames[i], "-" ) ) + if( !filenames[i][0] || strcmp( filenames[i], "-" ) == 0 ) { input_filename = ""; infd = STDIN_FILENO; @@ -958,7 +973,7 @@ int main( const int argc, const char * const argv[] ) else { if( program_mode == m_compress ) - set_c_outname( default_output_filename, volume_size != LLONG_MAX ); + set_c_outname( default_output_filename, volume_size > 0 ); else { output_filename = resize_buffer( output_filename, @@ -968,7 +983,7 @@ int main( const int argc, const char * const argv[] ) outfd_mode = all_rw; if( !open_outstream( force ) ) { - if( outfd == -1 && retval < 1 ) retval = 1; + if( retval < 1 ) retval = 1; close( infd ); infd = -1; continue; } @@ -988,12 +1003,12 @@ int main( const int argc, const char * const argv[] ) else { if( program_mode == m_compress ) - set_c_outname( input_filename, volume_size != LLONG_MAX ); + set_c_outname( input_filename, volume_size > 0 ); else set_d_outname( input_filename, eindex ); outfd_mode = usr_rw; if( !open_outstream( force ) ) { - if( outfd == -1 && retval < 1 ) retval = 1; + if( retval < 1 ) retval = 1; close( infd ); infd = -1; continue; } diff --git a/tables.c b/tables.c deleted file mode 100644 index c088706..0000000 --- a/tables.c +++ /dev/null @@ -1,395 +0,0 @@ -/* Lzlib - A compression library for lzip files - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. - - This library is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this library. If not, see . - - As a special exception, you may use this file as part of a free - software library without restriction. Specifically, if other files - instantiate templates or use macros or inline functions from this - file, or you compile this file and link it with other files to - produce an executable, this file does not by itself cause the - resulting executable to be covered by the GNU General Public - License. This exception does not however invalidate any other - reasons why the executable file might be covered by the GNU General - Public License. -*/ - -/* Table of CRCs of all 8-bit messages. */ -static const uint32_t crc32[256] = - { - 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, - 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, - 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, - 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, - 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, - 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, - 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, - 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, - 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, - 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, - 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106, - 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, - 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, - 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, - 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, - 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, - 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, - 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, - 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, - 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, - 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, - 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, - 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, - 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, - 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, - 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, - 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, - 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, - 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, - 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, - 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, - 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, - 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, - 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, - 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, - 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, - 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, - 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, - 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, - 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, - 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, - 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, - 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D }; - - -static inline void CRC32_update_byte( uint32_t * crc, const uint8_t byte ) - { *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); } - -static inline void CRC32_update_buf( uint32_t * crc, const uint8_t * const buffer, - const int size ) - { - int i; - for( i = 0; i < size; ++i ) - *crc = crc32[(*crc^buffer[i])&0xFF] ^ ( *crc >> 8 ); - } - - -static const uint8_t dis_slots[1<<12] = - { - 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23 }; - - -static inline int get_slot( const uint32_t dis ) - { - if( dis < (1 << 12) ) return dis_slots[dis]; - if( dis < (1 << 23) ) return dis_slots[dis>>11] + 22; - return dis_slots[dis>>22] + 44; - } - - -static const int prob_prices[bit_model_total >> 2] = -{ -704, 576, 512, 480, 448, 432, 416, 400, 384, 376, 368, 360, 352, 344, 336, 328, -320, 316, 312, 308, 304, 300, 296, 292, 288, 284, 280, 276, 272, 268, 264, 260, -256, 254, 252, 250, 248, 246, 244, 242, 240, 238, 236, 234, 232, 230, 228, 226, -224, 222, 220, 218, 216, 214, 212, 210, 208, 206, 204, 202, 200, 198, 196, 194, -192, 191, 190, 189, 188, 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, -176, 175, 174, 173, 172, 171, 170, 169, 168, 167, 166, 165, 164, 163, 162, 161, -160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146, 145, -144, 143, 142, 141, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131, 130, 129, -128, 127, 127, 126, 126, 125, 125, 124, 124, 123, 123, 122, 122, 121, 121, 120, -120, 119, 119, 118, 118, 117, 117, 116, 116, 115, 115, 114, 114, 113, 113, 112, -112, 111, 111, 110, 110, 109, 109, 108, 108, 107, 107, 106, 106, 105, 105, 104, -104, 103, 103, 102, 102, 101, 101, 100, 100, 99, 99, 98, 98, 97, 97, 96, - 96, 95, 95, 94, 94, 93, 93, 92, 92, 91, 91, 90, 90, 89, 89, 88, - 88, 87, 87, 86, 86, 85, 85, 84, 84, 83, 83, 82, 82, 81, 81, 80, - 80, 79, 79, 78, 78, 77, 77, 76, 76, 75, 75, 74, 74, 73, 73, 72, - 72, 71, 71, 70, 70, 69, 69, 68, 68, 67, 67, 66, 66, 65, 65, 64, - 64, 63, 63, 63, 63, 62, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60, - 60, 59, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, - 56, 55, 55, 55, 55, 54, 54, 54, 54, 53, 53, 53, 53, 52, 52, 52, - 52, 51, 51, 51, 51, 50, 50, 50, 50, 49, 49, 49, 49, 48, 48, 48, - 48, 47, 47, 47, 47, 46, 46, 46, 46, 45, 45, 45, 45, 44, 44, 44, - 44, 43, 43, 43, 43, 42, 42, 42, 42, 41, 41, 41, 41, 40, 40, 40, - 40, 39, 39, 39, 39, 38, 38, 38, 38, 37, 37, 37, 37, 36, 36, 36, - 36, 35, 35, 35, 35, 34, 34, 34, 34, 33, 33, 33, 33, 32, 32, 32, - 32, 31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, - 28, 27, 27, 27, 27, 26, 26, 26, 26, 25, 25, 25, 25, 24, 24, 24, - 24, 23, 23, 23, 23, 22, 22, 22, 22, 21, 21, 21, 21, 20, 20, 20, - 20, 19, 19, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, - 16, 15, 15, 15, 15, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, - 12, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, 9, 8, 8, 8, - 8, 7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, - 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0 }; - - -static inline int get_price( const int probability ) - { - return prob_prices[probability >> 2]; - } diff --git a/testsuite/check.sh b/testsuite/check.sh index 371fcec..a5d5649 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lzlib - A compression library for lzip files -# Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. +# Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -28,31 +28,32 @@ fail=0 printf "testing lzlib-%s..." "$2" -"${LZIP}" -t "${testdir}"/test_v0.lz || fail=1 -printf . -"${LZIP}" -cd "${testdir}"/test_v0.lz > copy || fail=1 -cmp in copy || fail=1 -printf . - -"${LZIP}" -t "${testdir}"/test_v1.lz || fail=1 -printf . -"${LZIP}" -cd "${testdir}"/test_v1.lz > copy || fail=1 +"${LZIP}" -t "${testdir}"/test.txt.lz || fail=1 +"${LZIP}" -cd "${testdir}"/test.txt.lz > copy || fail=1 cmp in copy || fail=1 printf . "${LZIP}" -t "${testdir}"/test_sync.lz || fail=1 -printf . "${LZIP}" -cd "${testdir}"/test_sync.lz > copy || fail=1 cmp in copy || fail=1 printf . -"${LZIP}" -cfq "${testdir}"/test_v1.lz > out +"${LZIP}" -cfq "${testdir}"/test.txt.lz > out if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIP}" -cF "${testdir}"/test_v1.lz > out || fail=1 +"${LZIP}" -cF "${testdir}"/test.txt.lz > out || fail=1 "${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1 cmp in copy || fail=1 printf . +"${LZIP}" -cqs-1 in > out +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqs0 in > out +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqs4095 in > out +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqm274 in > out +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi + for i in s4Ki 0 1 2 3 4 5 6 7 8s16 9s16 ; do "${LZIP}" -k -$i in || fail=1 mv -f in.lz copy.lz || fail=1 @@ -89,6 +90,12 @@ done cmp in anyothername.out || fail=1 printf . +cat in in > in2 || framework_failure +"${LZIP}" < in2 > out2 || fail=1 +"${LZIP}" -d < out2 > copy2 || fail=1 +cmp in2 copy2 || fail=1 +printf . + "${BBEXAMPLE}" in || fail=1 printf . "${BBEXAMPLE}" out || fail=1 diff --git a/testsuite/test.txt.lz b/testsuite/test.txt.lz new file mode 100644 index 0000000..4db881a Binary files /dev/null and b/testsuite/test.txt.lz differ diff --git a/testsuite/test_v0.lz b/testsuite/test_v0.lz deleted file mode 100644 index a09b1e8..0000000 Binary files a/testsuite/test_v0.lz and /dev/null differ diff --git a/testsuite/test_v1.lz b/testsuite/test_v1.lz deleted file mode 100644 index f1c79eb..0000000 Binary files a/testsuite/test_v1.lz and /dev/null differ -- cgit v1.2.3