From 4da07136ac4461ad1ba6113f5772e2c0a6468b49 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 14:48:27 +0100 Subject: Merging upstream version 1.4. Signed-off-by: Daniel Baumann --- ChangeLog | 14 +-- INSTALL | 7 +- Makefile.in | 2 +- carg_parser.c | 10 +- carg_parser.h | 2 +- clzip.h | 280 ------------------------------------------------- configure | 40 +++++-- decoder.c | 73 +++++++------ decoder.h | 71 +++++-------- doc/lzlib.info | 47 +++++---- doc/lzlib.texinfo | 22 ++-- doc/minilzip.1 | 9 +- encoder.c | 26 ++--- encoder.h | 38 +++---- lzip.h | 298 +++++++++++++++++++++++++++++++++++++++++++++++++++++ lzlib.c | 2 +- lzlib.h | 2 +- main.c | 20 ++-- testsuite/check.sh | 18 ++-- 19 files changed, 504 insertions(+), 477 deletions(-) delete mode 100644 clzip.h create mode 100644 lzip.h diff --git a/ChangeLog b/ChangeLog index a3d5a17..df0b97e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,22 +1,18 @@ -2013-02-07 Antonio Diaz Diaz +2013-05-28 Antonio Diaz Diaz - * Version 1.4-rc2 released. - * lzlib.c (LZ_decompress_read): Tell LZ_header_error from - LZ_unexpected_eof the same way as lzip does. - -2013-01-18 Antonio Diaz Diaz - - * Version 1.4-rc1 released. + * Version 1.4 released. * Multi-step trials have been implemented. * Compression ratio has been slightly increased. * Compression time has been reduced by 8%. * Decompression time has been reduced by 7%. * lzlib.h: Changed 'long long' values to 'unsigned long long'. * encoder.c (Mf_init): Reduce minimum buffer size to 64KiB. + * lzlib.c (LZ_decompress_read): Tell LZ_header_error from + LZ_unexpected_eof the same way as lzip does. * Makefile.in: Added new target 'install-as-lzip'. * Makefile.in: Added new target 'install-bin'. - * main.c: Define 'strtoull' to 'strtoul' on Windows. * main.c: Use 'setmode' instead of '_setmode' on Windows and OS/2. + * main.c: Define 'strtoull' to 'strtoul' on Windows. 2012-02-29 Antonio Diaz Diaz diff --git a/INSTALL b/INSTALL index 7281bad..5e450e7 100644 --- a/INSTALL +++ b/INSTALL @@ -1,7 +1,7 @@ Requirements ------------ You will need a C compiler. -I use gcc 4.7.2 and 3.3.6, but the code should compile with any +I use gcc 4.8.0 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -36,8 +36,9 @@ the main archive. typing 'make install-bin', 'make install-info' or 'make install-man' respectively. -5a. Type 'make install-as-lzip' to install the library and any data - files and documentation, and link minilzip to the name 'lzip'. + Instead of 'make install', you can type 'make install-as-lzip' to + install the library and any data files and documentation, and link + minilzip to the name 'lzip'. Another way diff --git a/Makefile.in b/Makefile.in index 02e3a46..7be45f7 100644 --- a/Makefile.in +++ b/Makefile.in @@ -47,7 +47,7 @@ main.o : main.c lzlib_sh.o : lzlib.c $(CC) -fpic -fPIC $(CPPFLAGS) $(CFLAGS) -c -o $@ $< -lzdeps = lzlib.h clzip.h cbuffer.c decoder.h decoder.c encoder.h encoder.c +lzdeps = lzlib.h lzip.h cbuffer.c decoder.h decoder.c encoder.h encoder.c $(objs) : Makefile carg_parser.o : carg_parser.h diff --git a/carg_parser.c b/carg_parser.c index 973bb7e..a86f76f 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify @@ -89,15 +89,14 @@ static char parse_long_option( struct Arg_parser * const ap, int * const argindp ) { unsigned len; - int index = -1; - int i; + int index = -1, i; char exact = 0, ambig = 0; for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ; /* Test all long options for either exact match or abbreviated matches. */ for( i = 0; options[i].code != 0; ++i ) - if( options[i].name && !strncmp( options[i].name, &opt[2], len ) ) + if( options[i].name && strncmp( options[i].name, &opt[2], len ) == 0 ) { if( strlen( options[i].name ) == len ) /* Exact match found */ { index = i; exact = 1; break; } @@ -165,8 +164,7 @@ static char parse_short_option( struct Arg_parser * const ap, while( cind > 0 ) { - int index = -1; - int i; + int index = -1, i; const unsigned char code = opt[cind]; char code_str[2]; code_str[0] = code; code_str[1] = 0; diff --git a/carg_parser.h b/carg_parser.h index 3575dd7..41aa7b3 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify diff --git a/clzip.h b/clzip.h deleted file mode 100644 index f45d541..0000000 --- a/clzip.h +++ /dev/null @@ -1,280 +0,0 @@ -/* Lzlib - A compression library for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. - - This library is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this library. If not, see . - - As a special exception, you may use this file as part of a free - software library without restriction. Specifically, if other files - instantiate templates or use macros or inline functions from this - file, or you compile this file and link it with other files to - produce an executable, this file does not by itself cause the - resulting executable to be covered by the GNU General Public - License. This exception does not however invalidate any other - reasons why the executable file might be covered by the GNU General - Public License. -*/ - -#ifndef max - #define max(x,y) ((x) >= (y) ? (x) : (y)) -#endif -#ifndef min - #define min(x,y) ((x) <= (y) ? (x) : (y)) -#endif - -typedef int State; - -enum { states = 12 }; - -static inline bool St_is_char( const State st ) { return st < 7; } - -static inline State St_set_char( const State st ) - { - static const State next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; - return next[st]; - } - -static inline State St_set_match( const State st ) - { return ( ( st < 7 ) ? 7 : 10 ); } - -static inline State St_set_rep( const State st ) - { return ( ( st < 7 ) ? 8 : 11 ); } - -static inline State St_set_short_rep( const State st ) - { return ( ( st < 7 ) ? 9 : 11 ); } - - -enum { - min_dictionary_bits = 12, - min_dictionary_size = 1 << min_dictionary_bits, - max_dictionary_bits = 29, - max_dictionary_size = 1 << max_dictionary_bits, - literal_context_bits = 3, - pos_state_bits = 2, - pos_states = 1 << pos_state_bits, - pos_state_mask = pos_states - 1, - - dis_slot_bits = 6, - start_dis_model = 4, - end_dis_model = 14, - modeled_distances = 1 << (end_dis_model / 2), /* 128 */ - dis_align_bits = 4, - dis_align_size = 1 << dis_align_bits, - - len_low_bits = 3, - len_mid_bits = 3, - len_high_bits = 8, - len_low_symbols = 1 << len_low_bits, - len_mid_symbols = 1 << len_mid_bits, - len_high_symbols = 1 << len_high_bits, - max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols, - - min_match_len = 2, /* must be 2 */ - max_match_len = min_match_len + max_len_symbols - 1, /* 273 */ - min_match_len_limit = 5, - - max_dis_states = 4 }; - -static inline int get_dis_state( const int len ) - { return min( len - min_match_len, max_dis_states - 1 ); } - -static inline int get_lit_state( const uint8_t prev_byte ) - { return ( prev_byte >> ( 8 - literal_context_bits ) ); } - - -enum { bit_model_move_bits = 5, - bit_model_total_bits = 11, - bit_model_total = 1 << bit_model_total_bits }; - -typedef int Bit_model; - -static inline void Bm_init( Bit_model * const probability ) - { *probability = bit_model_total / 2; } - -static inline void Bm_array_init( Bit_model * const p, const int size ) - { int i = 0; while( i < size ) p[i++] = bit_model_total / 2; } - - -/* Table of CRCs of all 8-bit messages. */ -static const uint32_t crc32[256] = - { - 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, - 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, - 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, - 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, - 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, - 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, - 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, - 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, - 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, - 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, - 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106, - 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, - 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, - 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, - 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, - 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, - 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, - 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, - 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, - 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, - 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, - 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, - 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, - 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, - 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, - 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, - 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, - 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, - 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, - 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, - 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, - 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, - 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, - 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, - 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, - 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, - 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, - 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, - 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, - 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, - 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, - 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, - 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D }; - - -static inline void CRC32_update_byte( uint32_t * const crc, const uint8_t byte ) - { *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); } - -static inline void CRC32_update_buf( uint32_t * const crc, - const uint8_t * const buffer, const int size ) - { - int i; - for( i = 0; i < size; ++i ) - *crc = crc32[(*crc^buffer[i])&0xFF] ^ ( *crc >> 8 ); - } - - -static inline int real_bits( unsigned value ) - { - int bits = 0; - while( value > 0 ) { value >>= 1; ++bits; } - return bits; - } - - -static const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */ - -typedef uint8_t File_header[6]; /* 0-3 magic bytes */ - /* 4 version */ - /* 5 coded_dict_size */ -enum { Fh_size = 6 }; - -static inline void Fh_set_magic( File_header data ) - { memcpy( data, magic_string, 4 ); data[4] = 1; } - -static inline bool Fh_verify_magic( const File_header data ) - { return ( memcmp( data, magic_string, 4 ) == 0 ); } - -static inline uint8_t Fh_version( const File_header data ) - { return data[4]; } - -static inline bool Fh_verify_version( const File_header data ) - { return ( data[4] <= 1 ); } - -static inline unsigned Fh_get_dictionary_size( const File_header data ) - { - unsigned sz = ( 1 << ( data[5] & 0x1F ) ); - if( sz > min_dictionary_size ) - sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 ); - return sz; - } - -static inline bool Fh_set_dictionary_size( File_header data, const int sz ) - { - if( sz >= min_dictionary_size && sz <= max_dictionary_size ) - { - data[5] = real_bits( sz - 1 ); - if( sz > min_dictionary_size ) - { - const int base_size = 1 << data[5]; - const int wedge = base_size / 16; - int i; - for( i = 7; i >= 1; --i ) - if( base_size - ( i * wedge ) >= sz ) - { data[5] |= ( i << 5 ); break; } - } - return true; - } - return false; - } - -static inline bool Fh_verify( const File_header data ) - { - return ( Fh_verify_magic( data ) && Fh_verify_version( data ) && - Fh_get_dictionary_size( data ) >= min_dictionary_size && - Fh_get_dictionary_size( data ) <= max_dictionary_size ); - } - - -typedef uint8_t File_trailer[20]; - /* 0-3 CRC32 of the uncompressed data */ - /* 4-11 size of the uncompressed data */ - /* 12-19 member size including header and trailer */ - -enum { Ft_size = 20 }; - -static inline int Ft_versioned_size( const int version ) - { return ( ( version >= 1 ) ? 20 : 12 ); } - -static inline unsigned Ft_get_data_crc( const File_trailer data ) - { - unsigned tmp = 0; - int i; - for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } - return tmp; - } - -static inline void Ft_set_data_crc( File_trailer data, unsigned crc ) - { - int i; - for( i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } - } - -static inline unsigned long long Ft_get_data_size( const File_trailer data ) - { - unsigned long long tmp = 0; - int i; - for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } - return tmp; - } - -static inline void Ft_set_data_size( File_trailer data, unsigned long long sz ) - { - int i; - for( i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } - } - -static inline unsigned long long Ft_get_member_size( const File_trailer data ) - { - unsigned long long tmp = 0; - int i; - for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } - return tmp; - } - -static inline void Ft_set_member_size( File_trailer data, unsigned long long sz ) - { - int i; - for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } - } diff --git a/configure b/configure index cfdce2a..149f91b 100755 --- a/configure +++ b/configure @@ -5,10 +5,8 @@ # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. -args= -no_create= pkgname=lzlib -pkgversion=1.4-rc2 +pkgversion=1.4 soversion=1 progname=minilzip progname_shared= @@ -33,18 +31,19 @@ CFLAGS='-Wall -W -O2' LDFLAGS= # checking whether we are using GNU C. -if [ ! -x /bin/gcc ] && - [ ! -x /usr/bin/gcc ] && - [ ! -x /usr/local/bin/gcc ] ; then +${CC} --version > /dev/null 2>&1 +if [ $? != 0 ] ; then CC=cc CFLAGS='-W -O2' fi # Loop over all args -while [ -n "$1" ] ; do +args= +no_create= +while [ $# != 0 ] ; do # Get the first arg, and shuffle - option=$1 + option=$1 ; arg2=no shift # Add the argument quoted to args @@ -82,6 +81,16 @@ while [ -n "$1" ] ; do --version | -V) echo "Configure script for ${pkgname} version ${pkgversion}" exit 0 ;; + --srcdir) srcdir=$1 ; arg2=yes ;; + --prefix) prefix=$1 ; arg2=yes ;; + --exec-prefix) exec_prefix=$1 ; arg2=yes ;; + --bindir) bindir=$1 ; arg2=yes ;; + --datarootdir) datarootdir=$1 ; arg2=yes ;; + --includedir) includedir=$1 ; arg2=yes ;; + --infodir) infodir=$1 ; arg2=yes ;; + --libdir) libdir=$1 ; arg2=yes ;; + --mandir) mandir=$1 ; arg2=yes ;; + --srcdir=*) srcdir=${optarg} ;; --prefix=*) prefix=${optarg} ;; --exec-prefix=*) exec_prefix=${optarg} ;; @@ -99,11 +108,22 @@ while [ -n "$1" ] ; do CFLAGS=*) CFLAGS=${optarg} ;; LDFLAGS=*) LDFLAGS=${optarg} ;; - --* | *=* | *-*-*) ;; + --*) + echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; + *=* | *-*-*) ;; *) - echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2 + echo "configure: unrecognized option: '${option}'" 1>&2 + echo "Try 'configure --help' for more information." exit 1 ;; esac + + # Check if the option took a separate argument + if [ "${arg2}" = yes ] ; then + if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift + else echo "configure: Missing argument to \"${option}\"" 1>&2 + exit 1 + fi + fi done # Find the source files, if location was not specified. diff --git a/decoder.c b/decoder.c index 1f4cbf1..7f46009 100644 --- a/decoder.c +++ b/decoder.c @@ -30,15 +30,15 @@ static bool LZd_verify_trailer( struct LZ_decoder * const decoder ) File_trailer trailer; const int trailer_size = Ft_versioned_size( decoder->member_version ); const unsigned long long member_size = - decoder->range_decoder->member_position + trailer_size; + decoder->rdec->member_position + trailer_size; - int size = Rd_read_data( decoder->range_decoder, trailer, trailer_size ); + int size = Rd_read_data( decoder->rdec, trailer, trailer_size ); if( size < trailer_size ) return false; if( decoder->member_version == 0 ) Ft_set_member_size( trailer, member_size ); - return ( decoder->range_decoder->code == 0 && + return ( decoder->rdec->code == 0 && Ft_get_data_crc( trailer ) == LZd_crc( decoder ) && Ft_get_data_size( trailer ) == LZd_data_position( decoder ) && Ft_get_member_size( trailer ) == member_size ); @@ -51,105 +51,104 @@ static int LZd_decode_member( struct LZ_decoder * const decoder ) { State * const state = &decoder->state; if( decoder->member_finished ) return 0; - if( !Rd_try_reload( decoder->range_decoder, false ) ) return 0; + if( !Rd_try_reload( decoder->rdec, false ) ) return 0; if( decoder->verify_trailer_pending ) { - if( Rd_available_bytes( decoder->range_decoder ) < Ft_versioned_size( decoder->member_version ) && - !decoder->range_decoder->at_stream_end ) + if( Rd_available_bytes( decoder->rdec ) < Ft_versioned_size( decoder->member_version ) && + !decoder->rdec->at_stream_end ) return 0; decoder->verify_trailer_pending = false; decoder->member_finished = true; if( LZd_verify_trailer( decoder ) ) return 0; else return 3; } - while( !Rd_finished( decoder->range_decoder ) ) + while( !Rd_finished( decoder->rdec ) ) { const int pos_state = LZd_data_position( decoder ) & pos_state_mask; - if( !Rd_enough_available_bytes( decoder->range_decoder ) || + if( !Rd_enough_available_bytes( decoder->rdec ) || !LZd_enough_free_bytes( decoder ) ) return 0; - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_match[*state][pos_state] ) == 0 ) + if( Rd_decode_bit( decoder->rdec, &decoder->bm_match[*state][pos_state] ) == 0 ) /* 1st bit */ { const uint8_t prev_byte = LZd_get_prev_byte( decoder ); if( St_is_char( *state ) ) { *state -= ( *state < 4 ) ? *state : 3; - LZd_put_byte( decoder, Rd_decode_tree( decoder->range_decoder, + LZd_put_byte( decoder, Rd_decode_tree( decoder->rdec, decoder->bm_literal[get_lit_state(prev_byte)], 8 ) ); } else { *state -= ( *state < 10 ) ? 3 : 6; - LZd_put_byte( decoder, Rd_decode_matched( decoder->range_decoder, - decoder->bm_literal[get_lit_state(prev_byte)], LZd_get_byte( decoder, decoder->rep0 ) ) ); + LZd_put_byte( decoder, Rd_decode_matched( decoder->rdec, + decoder->bm_literal[get_lit_state(prev_byte)], + LZd_get_byte( decoder, decoder->rep0 ) ) ); } } else { int len; - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep[*state] ) == 1 ) + if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep[*state] ) == 1 ) /* 2nd bit */ { - len = 0; - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep0[*state] ) == 1 ) + if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep0[*state] ) == 0 ) /* 3rd bit */ + { + if( Rd_decode_bit( decoder->rdec, &decoder->bm_len[*state][pos_state] ) == 0 ) /* 4th bit */ + { *state = St_set_short_rep( *state ); + LZd_put_byte( decoder, LZd_get_byte( decoder, decoder->rep0 ) ); continue; } + } + else { unsigned distance; - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep1[*state] ) == 0 ) + if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep1[*state] ) == 0 ) /* 4th bit */ distance = decoder->rep1; else { - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep2[*state] ) == 0 ) + if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep2[*state] ) == 0 ) /* 5th bit */ distance = decoder->rep2; - else { distance = decoder->rep3; decoder->rep3 = decoder->rep2; } + else + { distance = decoder->rep3; decoder->rep3 = decoder->rep2; } decoder->rep2 = decoder->rep1; } decoder->rep1 = decoder->rep0; decoder->rep0 = distance; } - else - { - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_len[*state][pos_state] ) == 0 ) - { *state = St_set_short_rep( *state ); len = 1; } - } - if( len == 0 ) - { - *state = St_set_rep( *state ); - len = min_match_len + Led_decode( &decoder->rep_match_len_decoder, decoder->range_decoder, pos_state ); - } + *state = St_set_rep( *state ); + len = min_match_len + Rd_decode_len( decoder->rdec, &decoder->rep_len_model, pos_state ); } else { int dis_slot; const unsigned rep0_saved = decoder->rep0; - len = min_match_len + Led_decode( &decoder->len_decoder, decoder->range_decoder, pos_state ); - dis_slot = Rd_decode_tree6( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)] ); + len = min_match_len + Rd_decode_len( decoder->rdec, &decoder->match_len_model, pos_state ); + dis_slot = Rd_decode_tree6( decoder->rdec, decoder->bm_dis_slot[get_dis_state(len)] ); if( dis_slot < start_dis_model ) decoder->rep0 = dis_slot; else { const int direct_bits = ( dis_slot >> 1 ) - 1; decoder->rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; if( dis_slot < end_dis_model ) - decoder->rep0 += Rd_decode_tree_reversed( decoder->range_decoder, + decoder->rep0 += Rd_decode_tree_reversed( decoder->rdec, decoder->bm_dis + decoder->rep0 - dis_slot - 1, direct_bits ); else { - decoder->rep0 += Rd_decode( decoder->range_decoder, direct_bits - dis_align_bits ) << dis_align_bits; - decoder->rep0 += Rd_decode_tree_reversed4( decoder->range_decoder, decoder->bm_align ); + decoder->rep0 += Rd_decode( decoder->rdec, direct_bits - dis_align_bits ) << dis_align_bits; + decoder->rep0 += Rd_decode_tree_reversed4( decoder->rdec, decoder->bm_align ); if( decoder->rep0 == 0xFFFFFFFFU ) /* Marker found */ { decoder->rep0 = rep0_saved; - Rd_normalize( decoder->range_decoder ); + Rd_normalize( decoder->rdec ); if( len == min_match_len ) /* End Of Stream marker */ { - if( Rd_available_bytes( decoder->range_decoder ) < Ft_versioned_size( decoder->member_version ) && - !decoder->range_decoder->at_stream_end ) + if( Rd_available_bytes( decoder->rdec ) < Ft_versioned_size( decoder->member_version ) && + !decoder->rdec->at_stream_end ) { decoder->verify_trailer_pending = true; return 0; } decoder->member_finished = true; if( LZd_verify_trailer( decoder ) ) return 0; else return 3; } if( len == min_match_len + 1 ) /* Sync Flush marker */ { - if( Rd_try_reload( decoder->range_decoder, true ) ) continue; + if( Rd_try_reload( decoder->rdec, true ) ) continue; else return 0; } return 4; diff --git a/decoder.h b/decoder.h index 49e2bef..c8d3786 100644 --- a/decoder.h +++ b/decoder.h @@ -216,24 +216,24 @@ static inline int Rd_decode_bit( struct Range_decoder * const rdec, static inline int Rd_decode_tree( struct Range_decoder * const rdec, Bit_model bm[], const int num_bits ) { - int model = 1; + int symbol = 1; int i; for( i = num_bits; i > 0; --i ) - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - return model - (1 << num_bits); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + return symbol - (1 << num_bits); } static inline int Rd_decode_tree6( struct Range_decoder * const rdec, Bit_model bm[] ) { - int model = 1; - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - return model - (1 << 6); + int symbol = 1; + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + return symbol - (1 << 6); } static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec, @@ -289,36 +289,17 @@ static inline int Rd_decode_matched( struct Range_decoder * const rdec, return symbol - 0x100; } - -struct Len_decoder - { - Bit_model choice1; - Bit_model choice2; - Bit_model bm_low[pos_states][len_low_symbols]; - Bit_model bm_mid[pos_states][len_mid_symbols]; - Bit_model bm_high[len_high_symbols]; - }; - -static inline void Led_init( struct Len_decoder * const len_decoder ) - { - Bm_init( &len_decoder->choice1 ); - Bm_init( &len_decoder->choice2 ); - Bm_array_init( len_decoder->bm_low[0], pos_states * len_low_symbols ); - Bm_array_init( len_decoder->bm_mid[0], pos_states * len_mid_symbols ); - Bm_array_init( len_decoder->bm_high, len_high_symbols ); - } - -static inline int Led_decode( struct Len_decoder * const len_decoder, - struct Range_decoder * const rdec, - const int pos_state ) +static inline int Rd_decode_len( struct Range_decoder * const rdec, + struct Len_model * const lm, + const int pos_state ) { - if( Rd_decode_bit( rdec, &len_decoder->choice1 ) == 0 ) - return Rd_decode_tree( rdec, len_decoder->bm_low[pos_state], len_low_bits ); - if( Rd_decode_bit( rdec, &len_decoder->choice2 ) == 0 ) + if( Rd_decode_bit( rdec, &lm->choice1 ) == 0 ) + return Rd_decode_tree( rdec, lm->bm_low[pos_state], len_low_bits ); + if( Rd_decode_bit( rdec, &lm->choice2 ) == 0 ) return len_low_symbols + - Rd_decode_tree( rdec, len_decoder->bm_mid[pos_state], len_mid_bits ); + Rd_decode_tree( rdec, lm->bm_mid[pos_state], len_mid_bits ); return len_low_symbols + len_mid_symbols + - Rd_decode_tree( rdec, len_decoder->bm_high, len_high_bits ); + Rd_decode_tree( rdec, lm->bm_high, len_high_bits ); } @@ -350,9 +331,9 @@ struct LZ_decoder Bit_model bm_dis[modeled_distances-end_dis_model]; Bit_model bm_align[dis_align_size]; - struct Range_decoder * range_decoder; - struct Len_decoder len_decoder; - struct Len_decoder rep_match_len_decoder; + struct Range_decoder * rdec; + struct Len_model match_len_model; + struct Len_model rep_len_model; }; static inline bool LZd_enough_free_bytes( const struct LZ_decoder * const decoder ) @@ -403,7 +384,7 @@ static inline void LZd_copy_block( struct LZ_decoder * const decoder, static inline bool LZd_init( struct LZ_decoder * const decoder, const File_header header, - struct Range_decoder * const rdec ) + struct Range_decoder * const rde ) { decoder->dictionary_size = Fh_get_dictionary_size( header ); if( !Cb_init( &decoder->cb, max( 65536, decoder->dictionary_size ) + lzd_min_free_bytes ) ) @@ -430,9 +411,9 @@ static inline bool LZd_init( struct LZ_decoder * const decoder, Bm_array_init( decoder->bm_dis, modeled_distances - end_dis_model ); Bm_array_init( decoder->bm_align, dis_align_size ); - decoder->range_decoder = rdec; - Led_init( &decoder->len_decoder ); - Led_init( &decoder->rep_match_len_decoder ); + decoder->rdec = rde; + Lm_init( &decoder->match_len_model ); + Lm_init( &decoder->rep_len_model ); decoder->cb.buffer[decoder->cb.buffer_size-1] = 0; /* prev_byte of first_byte */ return true; } diff --git a/doc/lzlib.info b/doc/lzlib.info index 511fda5..84e0501 100644 --- a/doc/lzlib.info +++ b/doc/lzlib.info @@ -12,7 +12,7 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir) Lzlib Manual ************ -This manual is for Lzlib (version 1.4-rc2, 7 February 2013). +This manual is for Lzlib (version 1.4, 28 May 2013). * Menu: @@ -530,15 +530,21 @@ with no additional information before, between, or after them. now. `DS (coded dictionary size, 1 byte)' - Bits 4-0 contain the base 2 logarithm of the base dictionary size. - Bits 7-5 contain the number of "wedges" to substract from the base - dictionary size to obtain the dictionary size. The size of a wedge - is (base dictionary size / 16). + Lzip divides the distance between any two powers of 2 into 8 + equally spaced intervals, named "wedges". The dictionary size is + calculated by taking a power of 2 (the base size) and substracting + from it a number of wedges between 0 and 7. The size of a wedge is + (base_size / 16). + Bits 4-0 contain the base 2 logarithm of the base size (12 to 29). + Bits 7-5 contain the number of wedges (0 to 7) to substract from + the base size to obtain the dictionary size. + Example: 0xD3 = (2^19 - 6 * 2^15) = (512KiB - 6 * 32KiB) = 320KiB Valid values for dictionary size range from 4KiB to 512MiB. `Lzma stream' The lzma stream, finished by an end of stream marker. Uses default - values for encoder properties. + values for encoder properties. See the lzip manual for a full + description. `CRC32 (4 bytes)' CRC of the uncompressed original data. @@ -548,8 +554,9 @@ with no additional information before, between, or after them. `Member size (8 bytes)' Total size of the member, including header and trailer. This field - acts as a distributed index, and facilitates safe recovery of - undamaged members from multi-member files. + acts as a distributed index, allows the verification of stream + integrity, and facilitates safe recovery of undamaged members from + multi-member files.  @@ -715,18 +722,18 @@ Concept Index  Tag Table: Node: Top219 -Node: Introduction1327 -Node: Library Version3173 -Node: Buffering3818 -Node: Parameter Limits4937 -Node: Compression Functions5894 -Node: Decompression Functions12104 -Node: Error Codes18265 -Node: Error Messages20204 -Node: Data Format20783 -Node: Examples22864 -Node: Problems26947 -Node: Concept Index27519 +Node: Introduction1319 +Node: Library Version3165 +Node: Buffering3810 +Node: Parameter Limits4929 +Node: Compression Functions5886 +Node: Decompression Functions12096 +Node: Error Codes18257 +Node: Error Messages20196 +Node: Data Format20775 +Node: Examples23268 +Node: Problems27351 +Node: Concept Index27923  End Tag Table diff --git a/doc/lzlib.texinfo b/doc/lzlib.texinfo index eeae174..c08303e 100644 --- a/doc/lzlib.texinfo +++ b/doc/lzlib.texinfo @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 7 February 2013 -@set VERSION 1.4-rc2 +@set UPDATED 28 May 2013 +@set VERSION 1.4 @dircategory Data Compression @direntry @@ -600,15 +600,19 @@ A four byte string, identifying the lzip format, with the value "LZIP". Just in case something needs to be modified in the future. 1 for now. @item DS (coded dictionary size, 1 byte) -Bits 4-0 contain the base 2 logarithm of the base dictionary size.@* -Bits 7-5 contain the number of "wedges" to substract from the base -dictionary size to obtain the dictionary size. The size of a wedge is -(base dictionary size / 16).@* +Lzip divides the distance between any two powers of 2 into 8 equally +spaced intervals, named "wedges". The dictionary size is calculated by +taking a power of 2 (the base size) and substracting from it a number of +wedges between 0 and 7. The size of a wedge is (base_size / 16).@* +Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@* +Bits 7-5 contain the number of wedges (0 to 7) to substract from the +base size to obtain the dictionary size.@* +Example: 0xD3 = (2^19 - 6 * 2^15) = (512KiB - 6 * 32KiB) = 320KiB@* Valid values for dictionary size range from 4KiB to 512MiB. @item Lzma stream The lzma stream, finished by an end of stream marker. Uses default values -for encoder properties. +for encoder properties. See the lzip manual for a full description. @item CRC32 (4 bytes) CRC of the uncompressed original data. @@ -618,8 +622,8 @@ Size of the uncompressed original data. @item Member size (8 bytes) Total size of the member, including header and trailer. This field acts -as a distributed index, and facilitates safe recovery of undamaged -members from multi-member files. +as a distributed index, allows the verification of stream integrity, and +facilitates safe recovery of undamaged members from multi-member files. @end table diff --git a/doc/minilzip.1 b/doc/minilzip.1 index a7f6d21..eba916d 100644 --- a/doc/minilzip.1 +++ b/doc/minilzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH MINILZIP "1" "February 2013" "Minilzip 1.4-rc2" "User Commands" +.TH MINILZIP "1" "May 2013" "Minilzip 1.4" "User Commands" .SH NAME Minilzip \- reduces the size of files .SH SYNOPSIS @@ -71,13 +71,18 @@ The bidimensional parameter space of LZMA can't be mapped to a linear scale optimal for all files. If your files are large, very repetitive, etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR options directly to achieve optimal performance. +.PP +Exit status: 0 for a normal exit, 1 for environmental problems (file +not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or +invalid input file, 3 for an internal consistency error (eg, bug) which +caused minilzip to panic. .SH "REPORTING BUGS" Report bugs to lzip\-bug@nongnu.org .br Lzlib home page: http://www.nongnu.org/lzip/lzlib.html .SH COPYRIGHT Copyright \(co 2013 Antonio Diaz Diaz. -Using Lzlib 1.4\-rc2 +Using Lzlib 1.4 License GPLv3+: GNU GPL version 3 or later .br This is free software: you are free to change and redistribute it. diff --git a/encoder.c b/encoder.c index 7b417ba..e7c0f5c 100644 --- a/encoder.c +++ b/encoder.c @@ -233,22 +233,22 @@ static void Lee_encode( struct Len_encoder * const len_encoder, symbol -= min_match_len; if( symbol < len_low_symbols ) { - Re_encode_bit( renc, &len_encoder->choice1, 0 ); - Re_encode_tree( renc, len_encoder->bm_low[pos_state], symbol, len_low_bits ); + Re_encode_bit( renc, &len_encoder->lm.choice1, 0 ); + Re_encode_tree( renc, len_encoder->lm.bm_low[pos_state], symbol, len_low_bits ); } else { - Re_encode_bit( renc, &len_encoder->choice1, 1 ); + Re_encode_bit( renc, &len_encoder->lm.choice1, 1 ); if( symbol < len_low_symbols + len_mid_symbols ) { - Re_encode_bit( renc, &len_encoder->choice2, 0 ); - Re_encode_tree( renc, len_encoder->bm_mid[pos_state], + Re_encode_bit( renc, &len_encoder->lm.choice2, 0 ); + Re_encode_tree( renc, len_encoder->lm.bm_mid[pos_state], symbol - len_low_symbols, len_mid_bits ); } else { - Re_encode_bit( renc, &len_encoder->choice2, 1 ); - Re_encode_tree( renc, len_encoder->bm_high, + Re_encode_bit( renc, &len_encoder->lm.choice2, 1 ); + Re_encode_tree( renc, len_encoder->lm.bm_high, symbol - len_low_symbols - len_mid_symbols, len_high_bits ); } } @@ -364,8 +364,8 @@ static bool LZe_init( struct LZ_encoder * const encoder, encoder->matchfinder = mf; if( !Re_init( &encoder->range_encoder ) ) return false; - Lee_init( &encoder->len_encoder, encoder->matchfinder->match_len_limit ); - Lee_init( &encoder->rep_match_len_encoder, encoder->matchfinder->match_len_limit ); + Lee_init( &encoder->match_len_encoder, encoder->matchfinder->match_len_limit ); + Lee_init( &encoder->rep_len_encoder, encoder->matchfinder->match_len_limit ); encoder->num_dis_slots = 2 * real_bits( encoder->matchfinder->dictionary_size - 1 ); @@ -472,7 +472,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, for( len = min_match_len; len <= replens[rep]; ++len ) Tr_update( &encoder->trials[len], price + - Lee_price( &encoder->rep_match_len_encoder, len, pos_state ), + Lee_price( &encoder->rep_len_encoder, len, pos_state ), rep, 0 ); } @@ -653,7 +653,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, LZe_price_rep( encoder, rep, cur_state, pos_state ); for( i = min_match_len; i <= len; ++i ) Tr_update( &encoder->trials[cur+i], price + - Lee_price( &encoder->rep_match_len_encoder, i, pos_state ), + Lee_price( &encoder->rep_len_encoder, i, pos_state ), rep, cur ); if( rep == 0 ) start_len = len + 1; /* discard shorter matches */ @@ -670,7 +670,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, pos_state2 = ( pos_state + len ) & pos_state_mask; state2 = St_set_rep( cur_state ); - price += Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) + + price += Lee_price( &encoder->rep_len_encoder, len, pos_state ) + price0( encoder->bm_match[state2][pos_state2] ) + LZe_price_matched( encoder, data[len-1], data[len], data[len-dis] ); pos_state2 = ( pos_state2 + 1 ) & pos_state_mask; @@ -832,7 +832,7 @@ static bool LZe_encode_member( struct LZ_encoder * const encoder, if( len == 1 ) *state = St_set_short_rep( *state ); else { - Lee_encode( &encoder->rep_match_len_encoder, &encoder->range_encoder, len, pos_state ); + Lee_encode( &encoder->rep_len_encoder, &encoder->range_encoder, len, pos_state ); *state = St_set_rep( *state ); } } diff --git a/encoder.h b/encoder.h index 90fa91c..122d7fd 100644 --- a/encoder.h +++ b/encoder.h @@ -178,9 +178,9 @@ static inline int price_symbol_reversed( const Bit_model bm[], int symbol, for( i = num_bits; i > 0; --i ) { const int bit = symbol & 1; - symbol >>= 1; price += price_bit( bm[model], bit ); model = ( model << 1 ) | bit; + symbol >>= 1; } return price; } @@ -472,11 +472,7 @@ static inline void Re_encode_matched( struct Range_encoder * const renc, struct Len_encoder { - Bit_model choice1; - Bit_model choice2; - Bit_model bm_low[pos_states][len_low_symbols]; - Bit_model bm_mid[pos_states][len_mid_symbols]; - Bit_model bm_high[len_high_symbols]; + struct Len_model lm; int prices[pos_states][max_len_symbols]; int len_symbols; int counters[pos_states]; @@ -486,21 +482,21 @@ static void Lee_update_prices( struct Len_encoder * const len_encoder, const int pos_state ) { int * const pps = len_encoder->prices[pos_state]; - int tmp = price0( len_encoder->choice1 ); + int tmp = price0( len_encoder->lm.choice1 ); int len = 0; for( ; len < len_low_symbols && len < len_encoder->len_symbols; ++len ) pps[len] = tmp + - price_symbol( len_encoder->bm_low[pos_state], len, len_low_bits ); - tmp = price1( len_encoder->choice1 ); + price_symbol( len_encoder->lm.bm_low[pos_state], len, len_low_bits ); + tmp = price1( len_encoder->lm.choice1 ); for( ; len < len_low_symbols + len_mid_symbols && len < len_encoder->len_symbols; ++len ) - pps[len] = tmp + price0( len_encoder->choice2 ) + - price_symbol( len_encoder->bm_mid[pos_state], len - len_low_symbols, len_mid_bits ); + pps[len] = tmp + price0( len_encoder->lm.choice2 ) + + price_symbol( len_encoder->lm.bm_mid[pos_state], len - len_low_symbols, len_mid_bits ); for( ; len < len_encoder->len_symbols; ++len ) /* using 4 slots per value makes "Lee_price" faster */ len_encoder->prices[3][len] = len_encoder->prices[2][len] = len_encoder->prices[1][len] = len_encoder->prices[0][len] = - tmp + price1( len_encoder->choice2 ) + - price_symbol( len_encoder->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); + tmp + price1( len_encoder->lm.choice2 ) + + price_symbol( len_encoder->lm.bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); len_encoder->counters[pos_state] = len_encoder->len_symbols; } @@ -508,11 +504,7 @@ static void Lee_init( struct Len_encoder * const len_encoder, const int match_len_limit ) { int i; - Bm_init( &len_encoder->choice1 ); - Bm_init( &len_encoder->choice2 ); - Bm_array_init( len_encoder->bm_low[0], pos_states * len_low_symbols ); - Bm_array_init( len_encoder->bm_mid[0], pos_states * len_mid_symbols ); - Bm_array_init( len_encoder->bm_high, len_high_symbols ); + Lm_init( &len_encoder->lm ); len_encoder->len_symbols = match_len_limit + 1 - min_match_len; for( i = 0; i < pos_states; ++i ) Lee_update_prices( len_encoder, i ); } @@ -599,8 +591,8 @@ struct LZ_encoder struct Matchfinder * matchfinder; struct Range_encoder range_encoder; - struct Len_encoder len_encoder; - struct Len_encoder rep_match_len_encoder; + struct Len_encoder match_len_encoder; + struct Len_encoder rep_len_encoder; int num_dis_slots; int rep_distances[num_rep_distances]; @@ -677,7 +669,7 @@ static inline int LZe_price_rep0_len( const struct LZ_encoder * const encoder, const State state, const int pos_state ) { return LZe_price_rep( encoder, 0, state, pos_state ) + - Lee_price( &encoder->rep_match_len_encoder, len, pos_state ); + Lee_price( &encoder->rep_len_encoder, len, pos_state ); } static inline int LZe_price_dis( const struct LZ_encoder * const encoder, @@ -694,7 +686,7 @@ static inline int LZe_price_pair( const struct LZ_encoder * const encoder, const int dis, const int len, const int pos_state ) { - return Lee_price( &encoder->len_encoder, len, pos_state ) + + return Lee_price( &encoder->match_len_encoder, len, pos_state ) + LZe_price_dis( encoder, dis, get_dis_state( len ) ); } @@ -725,7 +717,7 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder, const int pos_state ) { const int dis_slot = get_slot( dis ); - Lee_encode( &encoder->len_encoder, &encoder->range_encoder, len, pos_state ); + Lee_encode( &encoder->match_len_encoder, &encoder->range_encoder, len, pos_state ); Re_encode_tree( &encoder->range_encoder, encoder->bm_dis_slot[get_dis_state(len)], dis_slot, dis_slot_bits ); diff --git a/lzip.h b/lzip.h new file mode 100644 index 0000000..d3b1254 --- /dev/null +++ b/lzip.h @@ -0,0 +1,298 @@ +/* Lzlib - A compression library for lzip files + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + + This library is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this library. If not, see . + + As a special exception, you may use this file as part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this + file, or you compile this file and link it with other files to + produce an executable, this file does not by itself cause the + resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. +*/ + +#ifndef max + #define max(x,y) ((x) >= (y) ? (x) : (y)) +#endif +#ifndef min + #define min(x,y) ((x) <= (y) ? (x) : (y)) +#endif + +typedef int State; + +enum { states = 12 }; + +static inline bool St_is_char( const State st ) { return st < 7; } + +static inline State St_set_char( const State st ) + { + static const State next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; + return next[st]; + } + +static inline State St_set_match( const State st ) + { return ( ( st < 7 ) ? 7 : 10 ); } + +static inline State St_set_rep( const State st ) + { return ( ( st < 7 ) ? 8 : 11 ); } + +static inline State St_set_short_rep( const State st ) + { return ( ( st < 7 ) ? 9 : 11 ); } + + +enum { + min_dictionary_bits = 12, + min_dictionary_size = 1 << min_dictionary_bits, + max_dictionary_bits = 29, + max_dictionary_size = 1 << max_dictionary_bits, + literal_context_bits = 3, + pos_state_bits = 2, + pos_states = 1 << pos_state_bits, + pos_state_mask = pos_states - 1, + + dis_slot_bits = 6, + start_dis_model = 4, + end_dis_model = 14, + modeled_distances = 1 << (end_dis_model / 2), /* 128 */ + dis_align_bits = 4, + dis_align_size = 1 << dis_align_bits, + + len_low_bits = 3, + len_mid_bits = 3, + len_high_bits = 8, + len_low_symbols = 1 << len_low_bits, + len_mid_symbols = 1 << len_mid_bits, + len_high_symbols = 1 << len_high_bits, + max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols, + + min_match_len = 2, /* must be 2 */ + max_match_len = min_match_len + max_len_symbols - 1, /* 273 */ + min_match_len_limit = 5, + + max_dis_states = 4 }; + +static inline int get_dis_state( const int len ) + { return min( len - min_match_len, max_dis_states - 1 ); } + +static inline int get_lit_state( const uint8_t prev_byte ) + { return ( prev_byte >> ( 8 - literal_context_bits ) ); } + + +enum { bit_model_move_bits = 5, + bit_model_total_bits = 11, + bit_model_total = 1 << bit_model_total_bits }; + +typedef int Bit_model; + +static inline void Bm_init( Bit_model * const probability ) + { *probability = bit_model_total / 2; } + +static inline void Bm_array_init( Bit_model * const p, const int size ) + { int i = 0; while( i < size ) p[i++] = bit_model_total / 2; } + +struct Len_model + { + Bit_model choice1; + Bit_model choice2; + Bit_model bm_low[pos_states][len_low_symbols]; + Bit_model bm_mid[pos_states][len_mid_symbols]; + Bit_model bm_high[len_high_symbols]; + }; + +static inline void Lm_init( struct Len_model * const lm ) + { + Bm_init( &lm->choice1 ); + Bm_init( &lm->choice2 ); + Bm_array_init( lm->bm_low[0], pos_states * len_low_symbols ); + Bm_array_init( lm->bm_mid[0], pos_states * len_mid_symbols ); + Bm_array_init( lm->bm_high, len_high_symbols ); + } + + +/* Table of CRCs of all 8-bit messages. */ +static const uint32_t crc32[256] = + { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, + 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, + 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, + 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, + 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, + 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106, + 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, + 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, + 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, + 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, + 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, + 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, + 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, + 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, + 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, + 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, + 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, + 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, + 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, + 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, + 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, + 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D }; + + +static inline void CRC32_update_byte( uint32_t * const crc, const uint8_t byte ) + { *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); } + +static inline void CRC32_update_buf( uint32_t * const crc, + const uint8_t * const buffer, const int size ) + { + int i; + for( i = 0; i < size; ++i ) + *crc = crc32[(*crc^buffer[i])&0xFF] ^ ( *crc >> 8 ); + } + + +static inline int real_bits( unsigned value ) + { + int bits = 0; + while( value > 0 ) { value >>= 1; ++bits; } + return bits; + } + + +static const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */ + +typedef uint8_t File_header[6]; /* 0-3 magic bytes */ + /* 4 version */ + /* 5 coded_dict_size */ +enum { Fh_size = 6 }; + +static inline void Fh_set_magic( File_header data ) + { memcpy( data, magic_string, 4 ); data[4] = 1; } + +static inline bool Fh_verify_magic( const File_header data ) + { return ( memcmp( data, magic_string, 4 ) == 0 ); } + +static inline uint8_t Fh_version( const File_header data ) + { return data[4]; } + +static inline bool Fh_verify_version( const File_header data ) + { return ( data[4] <= 1 ); } + +static inline unsigned Fh_get_dictionary_size( const File_header data ) + { + unsigned sz = ( 1 << ( data[5] & 0x1F ) ); + if( sz > min_dictionary_size ) + sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 ); + return sz; + } + +static inline bool Fh_set_dictionary_size( File_header data, const int sz ) + { + if( sz >= min_dictionary_size && sz <= max_dictionary_size ) + { + data[5] = real_bits( sz - 1 ); + if( sz > min_dictionary_size ) + { + const int base_size = 1 << data[5]; + const int wedge = base_size / 16; + int i; + for( i = 7; i >= 1; --i ) + if( base_size - ( i * wedge ) >= sz ) + { data[5] |= ( i << 5 ); break; } + } + return true; + } + return false; + } + +static inline bool Fh_verify( const File_header data ) + { + return ( Fh_verify_magic( data ) && Fh_verify_version( data ) && + Fh_get_dictionary_size( data ) >= min_dictionary_size && + Fh_get_dictionary_size( data ) <= max_dictionary_size ); + } + + +typedef uint8_t File_trailer[20]; + /* 0-3 CRC32 of the uncompressed data */ + /* 4-11 size of the uncompressed data */ + /* 12-19 member size including header and trailer */ + +enum { Ft_size = 20 }; + +static inline int Ft_versioned_size( const int version ) + { return ( ( version >= 1 ) ? 20 : 12 ); } + +static inline unsigned Ft_get_data_crc( const File_trailer data ) + { + unsigned tmp = 0; + int i; + for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + +static inline void Ft_set_data_crc( File_trailer data, unsigned crc ) + { + int i; + for( i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } + } + +static inline unsigned long long Ft_get_data_size( const File_trailer data ) + { + unsigned long long tmp = 0; + int i; + for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + +static inline void Ft_set_data_size( File_trailer data, unsigned long long sz ) + { + int i; + for( i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } + } + +static inline unsigned long long Ft_get_member_size( const File_trailer data ) + { + unsigned long long tmp = 0; + int i; + for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + +static inline void Ft_set_member_size( File_trailer data, unsigned long long sz ) + { + int i; + for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } + } diff --git a/lzlib.c b/lzlib.c index 1046bf7..d8e494f 100644 --- a/lzlib.c +++ b/lzlib.c @@ -31,7 +31,7 @@ #include #include "lzlib.h" -#include "clzip.h" +#include "lzip.h" #include "cbuffer.c" #include "decoder.h" #include "decoder.c" diff --git a/lzlib.h b/lzlib.h index 1da08fa..f50baf4 100644 --- a/lzlib.h +++ b/lzlib.h @@ -29,7 +29,7 @@ extern "C" { #endif -static const char * const LZ_version_string = "1.4-rc2"; +static const char * const LZ_version_string = "1.4"; enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error, LZ_sequence_error, LZ_header_error, LZ_unexpected_eof, diff --git a/main.c b/main.c index 9e69828..6732b13 100644 --- a/main.c +++ b/main.c @@ -15,7 +15,7 @@ along with this program. If not, see . */ /* - Return values: 0 for a normal exit, 1 for environmental problems + Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid input file, 3 for an internal consistency error (eg, bug) which caused minilzip to panic. @@ -176,6 +176,10 @@ static void show_help( void ) "scale optimal for all files. If your files are large, very repetitive,\n" "etc, you may need to use the --match-length and --dictionary-size\n" "options directly to achieve optimal performance.\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" + "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" + "invalid input file, 3 for an internal consistency error (eg, bug) which\n" + "caused minilzip to panic.\n" "\nReport bugs to lzip-bug@nongnu.org\n" "Lzlib home page: http://www.nongnu.org/lzip/lzlib.html\n" ); } @@ -205,8 +209,9 @@ void show_header( struct LZ_Decoder * const decoder ) for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; np = ""; } - fprintf( stderr, "version %d, dictionary size %s%4u %sB. ", - LZ_decompress_member_version( decoder ), np, num, p ); + if( verbosity >= 4 ) + fprintf( stderr, "version %d, ", LZ_decompress_member_version( decoder ) ); + fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p ); } @@ -687,8 +692,8 @@ int do_decompress( struct LZ_Decoder * const decoder, const int infd, const unsigned long long data_position = LZ_decompress_data_position( decoder ); const unsigned long long member_size = LZ_decompress_member_position( decoder ); Pp_show_msg( pp, 0 ); - if( verbosity >= 2 ) show_header( decoder ); - if( verbosity >= 3 && data_position > 0 && member_size > 0 ) + if( verbosity >= 3 ) show_header( decoder ); + if( verbosity >= 2 && data_position > 0 && member_size > 0 ) fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", (double)data_position / member_size, ( 8.0 * member_size ) / data_position, @@ -697,8 +702,7 @@ int do_decompress( struct LZ_Decoder * const decoder, const int infd, fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ", LZ_decompress_data_crc( decoder ), data_position, member_size ); - if( testing ) fprintf( stderr, "ok\n" ); - else fprintf( stderr, "done\n" ); + fprintf( stderr, testing ? "ok\n" : "done\n" ); } first_member = false; Pp_reset( pp ); } @@ -874,6 +878,7 @@ int main( const int argc, const char * const argv[] ) { 'h', "help", ap_no }, { 'k', "keep", ap_no }, { 'm', "match-length", ap_yes }, + { 'n', "threads", ap_yes }, { 'o', "output", ap_yes }, { 'q', "quiet", ap_no }, { 's', "dictionary-size", ap_yes }, @@ -918,6 +923,7 @@ int main( const int argc, const char * const argv[] ) case 'm': encoder_options.match_len_limit = getnum( arg, LZ_min_match_len_limit(), LZ_max_match_len_limit() ); break; + case 'n': break; case 'o': default_output_filename = arg; break; case 'q': verbosity = -1; break; case 's': encoder_options.dictionary_size = get_dict_size( arg ); diff --git a/testsuite/check.sh b/testsuite/check.sh index a5d5649..a548def 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -28,6 +28,15 @@ fail=0 printf "testing lzlib-%s..." "$2" +"${LZIP}" -cqs-1 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqs0 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqs4095 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqm274 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi + "${LZIP}" -t "${testdir}"/test.txt.lz || fail=1 "${LZIP}" -cd "${testdir}"/test.txt.lz > copy || fail=1 cmp in copy || fail=1 @@ -45,15 +54,6 @@ if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi cmp in copy || fail=1 printf . -"${LZIP}" -cqs-1 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIP}" -cqs0 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIP}" -cqs4095 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIP}" -cqm274 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi - for i in s4Ki 0 1 2 3 4 5 6 7 8s16 9s16 ; do "${LZIP}" -k -$i in || fail=1 mv -f in.lz copy.lz || fail=1 -- cgit v1.2.3