From 38911c04adbe853acb3d534e1853a8a0ffce88bd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 15:06:49 +0100 Subject: Merging upstream version 1.7~rc1. Signed-off-by: Daniel Baumann --- ChangeLog | 6 ++++++ INSTALL | 4 ++-- Makefile.in | 16 ++++++++-------- README | 21 +++++++++++---------- carg_parser.c | 30 +++++++++++------------------- carg_parser.h | 30 +++++++++++------------------- configure | 2 +- decoder.c | 6 +++--- decoder.h | 6 +++--- doc/lzlib.info | 51 +++++++++++++++++++++++++-------------------------- doc/lzlib.texi | 30 +++++++++++++++--------------- doc/minilzip.1 | 4 ++-- encoder.c | 2 +- fast_encoder.c | 9 ++++----- lzip.h | 14 +++++++++----- lzlib.h | 2 +- main.c | 10 +++++++--- 17 files changed, 120 insertions(+), 123 deletions(-) diff --git a/ChangeLog b/ChangeLog index 006255e..5f5f41f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2015-05-23 Antonio Diaz Diaz + + * Version 1.7-rc1 released. + * main.c: Enable fast encoder only with option '-0'. + * Minor improvements. + 2015-02-24 Antonio Diaz Diaz * Version 1.7-pre1 released. diff --git a/INSTALL b/INSTALL index 9a393ec..6a4ac9c 100644 --- a/INSTALL +++ b/INSTALL @@ -1,7 +1,7 @@ Requirements ------------ You will need a C compiler. -I use gcc 4.9.1 and 3.3.6, but the code should compile with any +I use gcc 4.9.1 and 4.1.2, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -30,7 +30,7 @@ the main archive. 4. Optionally, type 'make check' to run the tests that come with lzlib. 5. Type 'make install' to install the library and any data files and - documentation. (You might have to run ldconfig also). + documentation. (You may need to run ldconfig also). Or type 'make install-compress', which additionally compresses the info manual and the man page after installation. (Installing diff --git a/Makefile.in b/Makefile.in index 81e029b..65dff2c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -23,28 +23,28 @@ lib$(libname).a : lzlib.o $(AR) -rcs $@ $< lib$(libname).so.$(pkgversion) : lzlib_sh.o - $(CC) $(CFLAGS) $(LDFLAGS) -fpic -fPIC -shared -Wl,--soname=lib$(libname).so.$(soversion) -o $@ $< + $(CC) $(LDFLAGS) $(CFLAGS) -fpic -fPIC -shared -Wl,--soname=lib$(libname).so.$(soversion) -o $@ $< $(progname) : $(objs) lib$(libname).a - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs) lib$(libname).a + $(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(objs) lib$(libname).a $(progname)_shared : $(objs) lib$(libname).so.$(pkgversion) - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs) lib$(libname).so.$(pkgversion) + $(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(objs) lib$(libname).so.$(pkgversion) bbexample : bbexample.o lib$(libname).a - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ bbexample.o lib$(libname).a + $(CC) $(LDFLAGS) $(CFLAGS) -o $@ bbexample.o lib$(libname).a lzcheck : lzcheck.o lib$(libname).a - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ lzcheck.o lib$(libname).a + $(CC) $(LDFLAGS) $(CFLAGS) -o $@ lzcheck.o lib$(libname).a main.o : main.c - $(CC) $(CFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< + $(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< lzlib_sh.o : lzlib.c - $(CC) $(CFLAGS) $(CPPFLAGS) -fpic -fPIC -c -o $@ $< + $(CC) $(CPPFLAGS) $(CFLAGS) -fpic -fPIC -c -o $@ $< %.o : %.c - $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $< + $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< lzdeps = lzlib.h lzip.h cbuffer.c decoder.h decoder.c encoder_base.h \ encoder_base.c encoder.h encoder.c fast_encoder.h fast_encoder.c diff --git a/README b/README index 542d8ef..8906a13 100644 --- a/README +++ b/README @@ -36,14 +36,14 @@ library are given in the files 'main.c' and 'bbexample.c' from the source distribution. Compression/decompression is done by repeatedly calling a couple of -read/write functions until all the data has been processed by the +read/write functions until all the data have been processed by the library. This interface is safer and less error prone than the traditional zlib interface. Compression/decompression is done when the read function is called. This means the value returned by the position functions will not be updated -until some data is read, even if you write a lot of data. If you want -the data to be compressed in advance, just call the read function with a +until a read call, even if a lot of data is written. If you want the +data to be compressed in advance, just call the read function with a size equal to 0. If all the data to be compressed are written in advance, lzlib will @@ -61,13 +61,14 @@ All the library functions are thread safe. The library does not install any signal handler. The decoder checks the consistency of the compressed data, so the library should never crash even in case of corrupted input. -There is no such thing as a "LZMA algorithm"; it is more like a "LZMA -coding scheme". For example, the option '-0' of lzip uses the scheme in -almost the simplest way possible; issuing the longest match it can find, -or a literal byte if it can't find a match. Inversely, a much more -elaborated way of finding coding sequences of minimum price than the one -currently used by lzip could be developed, and the resulting sequence -could also be coded using the LZMA coding scheme. +In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a +concrete algorithm; it is more like "any algorithm using the LZMA coding +scheme". For example, the option '-0' of lzip uses the scheme in almost +the simplest way possible; issuing the longest match it can find, or a +literal byte if it can't find a match. Inversely, a much more elaborated +way of finding coding sequences of minimum size than the one currently +used by lzip could be developed, and the resulting sequence could also +be coded using the LZMA coding scheme. Lzlib currently implements two variants of the LZMA algorithm; fast (used by option -0 of minilzip) and normal (used by all other diff --git a/carg_parser.c b/carg_parser.c index a453e36..8d74ea6 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,28 +1,20 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) Copyright (C) 2006-2015 Antonio Diaz Diaz. - This library is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. + This library is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this library. If not, see . - - As a special exception, you may use this file as part of a free - software library without restriction. Specifically, if other files - instantiate templates or use macros or inline functions from this - file, or you compile this file and link it with other files to - produce an executable, this file does not by itself cause the - resulting executable to be covered by the GNU General Public - License. This exception does not however invalidate any other - reasons why the executable file might be covered by the GNU General - Public License. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ #include diff --git a/carg_parser.h b/carg_parser.h index 34b1263..ed4d9c5 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,28 +1,20 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) Copyright (C) 2006-2015 Antonio Diaz Diaz. - This library is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. + This library is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this library. If not, see . - - As a special exception, you may use this file as part of a free - software library without restriction. Specifically, if other files - instantiate templates or use macros or inline functions from this - file, or you compile this file and link it with other files to - produce an executable, this file does not by itself cause the - resulting executable to be covered by the GNU General Public - License. This exception does not however invalidate any other - reasons why the executable file might be covered by the GNU General - Public License. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ /* Arg_parser reads the arguments in 'argv' and creates a number of diff --git a/configure b/configure index 0ddeeef..4feb7f6 100755 --- a/configure +++ b/configure @@ -6,7 +6,7 @@ # to copy, distribute and modify it. pkgname=lzlib -pkgversion=1.7-pre1 +pkgversion=1.7-rc1 soversion=1 progname=minilzip progname_static=${progname} diff --git a/decoder.c b/decoder.c index 07f5baf..9acae37 100644 --- a/decoder.c +++ b/decoder.c @@ -66,7 +66,7 @@ static int LZd_decode_member( struct LZ_decoder * const d ) return 0; if( Rd_decode_bit( rdec, &d->bm_match[*state][pos_state] ) == 0 ) /* 1st bit */ { - const uint8_t prev_byte = LZd_get_prev_byte( d ); + const uint8_t prev_byte = LZd_peek_prev( d ); if( St_is_char( *state ) ) { *state -= ( *state < 4 ) ? *state : 3; @@ -78,7 +78,7 @@ static int LZd_decode_member( struct LZ_decoder * const d ) *state -= ( *state < 10 ) ? 3 : 6; LZd_put_byte( d, Rd_decode_matched( rdec, d->bm_literal[get_lit_state(prev_byte)], - LZd_get_byte( d, d->rep0 ) ) ); + LZd_peek( d, d->rep0 ) ) ); } } else @@ -106,7 +106,7 @@ static int LZd_decode_member( struct LZ_decoder * const d ) { if( Rd_decode_bit( rdec, &d->bm_len[*state][pos_state] ) == 0 ) /* 4th bit */ { *state = St_set_short_rep( *state ); - LZd_put_byte( d, LZd_get_byte( d, d->rep0 ) ); continue; } + LZd_put_byte( d, LZd_peek( d, d->rep0 ) ); continue; } } *state = St_set_rep( *state ); len = min_match_len + Rd_decode_len( rdec, &d->rep_len_model, pos_state ); diff --git a/decoder.h b/decoder.h index f029212..13e06d8 100644 --- a/decoder.h +++ b/decoder.h @@ -338,14 +338,14 @@ struct LZ_decoder static inline bool LZd_enough_free_bytes( const struct LZ_decoder * const d ) { return Cb_free_bytes( &d->cb ) >= lzd_min_free_bytes; } -static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const d ) +static inline uint8_t LZd_peek_prev( const struct LZ_decoder * const d ) { const int i = ( ( d->cb.put > 0 ) ? d->cb.put : d->cb.buffer_size ) - 1; return d->cb.buffer[i]; } -static inline uint8_t LZd_get_byte( const struct LZ_decoder * const d, - const int distance ) +static inline uint8_t LZd_peek( const struct LZ_decoder * const d, + const int distance ) { int i = d->cb.put - distance - 1; if( i < 0 ) i += d->cb.buffer_size; diff --git a/doc/lzlib.info b/doc/lzlib.info index 5a32927..6ef6f46 100644 --- a/doc/lzlib.info +++ b/doc/lzlib.info @@ -11,7 +11,7 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir) Lzlib Manual ************ -This manual is for Lzlib (version 1.7-pre1, 24 February 2015). +This manual is for Lzlib (version 1.7-rc1, 23 May 2015). * Menu: @@ -102,13 +102,14 @@ install any signal handler. The decoder checks the consistency of the compressed data, so the library should never crash even in case of corrupted input. - There is no such thing as a "LZMA algorithm"; it is more like a "LZMA + In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is +not a concrete algorithm; it is more like "any algorithm using the LZMA coding scheme". For example, the option '-0' of lzip uses the scheme in -almost the simplest way possible; issuing the longest match it can find, -or a literal byte if it can't find a match. Inversely, a much more -elaborated way of finding coding sequences of minimum price than the one -currently used by lzip could be developed, and the resulting sequence -could also be coded using the LZMA coding scheme. +almost the simplest way possible; issuing the longest match it can +find, or a literal byte if it can't find a match. Inversely, a much +more elaborated way of finding coding sequences of minimum size than +the one currently used by lzip could be developed, and the resulting +sequence could also be coded using the LZMA coding scheme. Lzlib currently implements two variants of the LZMA algorithm; fast (used by option -0 of minilzip) and normal (used by all other @@ -591,14 +592,12 @@ with no additional information before, between, or after them. now. 'DS (coded dictionary size, 1 byte)' - Lzip divides the distance between any two powers of 2 into 8 - equally spaced intervals, named "wedges". The dictionary size is - calculated by taking a power of 2 (the base size) and substracting - from it a number of wedges between 0 and 7. The size of a wedge is - (base_size / 16). + The dictionary size is calculated by taking a power of 2 (the base + size) and substracting from it a fraction between 0/16 and 7/16 of + the base size. Bits 4-0 contain the base 2 logarithm of the base size (12 to 29). - Bits 7-5 contain the number of wedges (0 to 7) to substract from - the base size to obtain the dictionary size. + Bits 7-5 contain the numerator of the fraction (0 to 7) to + substract from the base size to obtain the dictionary size. Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB Valid values for dictionary size range from 4 KiB to 512 MiB. @@ -785,18 +784,18 @@ Concept index  Tag Table: Node: Top220 -Node: Introduction1311 -Node: Library version5808 -Node: Buffering6453 -Node: Parameter limits7673 -Node: Compression functions8632 -Node: Decompression functions15176 -Node: Error codes21344 -Node: Error messages23283 -Node: Data format23862 -Node: Examples26538 -Node: Problems30624 -Node: Concept index31196 +Node: Introduction1305 +Node: Library version5869 +Node: Buffering6514 +Node: Parameter limits7734 +Node: Compression functions8693 +Node: Decompression functions15237 +Node: Error codes21405 +Node: Error messages23344 +Node: Data format23923 +Node: Examples26469 +Node: Problems30555 +Node: Concept index31127  End Tag Table diff --git a/doc/lzlib.texi b/doc/lzlib.texi index 417cc7b..228d747 100644 --- a/doc/lzlib.texi +++ b/doc/lzlib.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 24 February 2015 -@set VERSION 1.7-pre1 +@set UPDATED 23 May 2015 +@set VERSION 1.7-rc1 @dircategory Data Compression @direntry @@ -126,13 +126,14 @@ All the library functions are thread safe. The library does not install any signal handler. The decoder checks the consistency of the compressed data, so the library should never crash even in case of corrupted input. -There is no such thing as a "LZMA algorithm"; it is more like a "LZMA -coding scheme". For example, the option '-0' of lzip uses the scheme in -almost the simplest way possible; issuing the longest match it can find, -or a literal byte if it can't find a match. Inversely, a much more -elaborated way of finding coding sequences of minimum price than the one -currently used by lzip could be developed, and the resulting sequence -could also be coded using the LZMA coding scheme. +In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a +concrete algorithm; it is more like "any algorithm using the LZMA coding +scheme". For example, the option '-0' of lzip uses the scheme in almost +the simplest way possible; issuing the longest match it can find, or a +literal byte if it can't find a match. Inversely, a much more elaborated +way of finding coding sequences of minimum size than the one currently +used by lzip could be developed, and the resulting sequence could also +be coded using the LZMA coding scheme. Lzlib currently implements two variants of the LZMA algorithm; fast (used by option -0 of minilzip) and normal (used by all other @@ -667,13 +668,12 @@ A four byte string, identifying the lzip format, with the value "LZIP" Just in case something needs to be modified in the future. 1 for now. @item DS (coded dictionary size, 1 byte) -Lzip divides the distance between any two powers of 2 into 8 equally -spaced intervals, named "wedges". The dictionary size is calculated by -taking a power of 2 (the base size) and substracting from it a number of -wedges between 0 and 7. The size of a wedge is (base_size / 16).@* +The dictionary size is calculated by taking a power of 2 (the base size) +and substracting from it a fraction between 0/16 and 7/16 of the base +size.@* Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@* -Bits 7-5 contain the number of wedges (0 to 7) to substract from the -base size to obtain the dictionary size.@* +Bits 7-5 contain the numerator of the fraction (0 to 7) to substract +from the base size to obtain the dictionary size.@* Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@* Valid values for dictionary size range from 4 KiB to 512 MiB. diff --git a/doc/minilzip.1 b/doc/minilzip.1 index ba63b8c..3c6b282 100644 --- a/doc/minilzip.1 +++ b/doc/minilzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH MINILZIP "1" "February 2015" "minilzip 1.7-pre1" "User Commands" +.TH MINILZIP "1" "May 2015" "minilzip 1.7-rc1" "User Commands" .SH NAME minilzip \- reduces the size of files .SH SYNOPSIS @@ -82,7 +82,7 @@ Report bugs to lzip\-bug@nongnu.org Lzlib home page: http://www.nongnu.org/lzip/lzlib.html .SH COPYRIGHT Copyright \(co 2015 Antonio Diaz Diaz. -Using lzlib 1.7\-pre1 +Using lzlib 1.7\-rc1 License GPLv2+: GNU GPL version 2 or later .br This is free software: you are free to change and redistribute it. diff --git a/encoder.c b/encoder.c index 4300801..6ac1f82 100644 --- a/encoder.c +++ b/encoder.c @@ -455,7 +455,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, e->trials[++num_trials].price = infinite_price; i = 0; - while( start_len > e->pairs[i].len ) ++i; + while( e->pairs[i].len < start_len ) ++i; dis = e->pairs[i].dis; for( len = start_len; ; ++len ) { diff --git a/fast_encoder.c b/fast_encoder.c index 6172b1a..9afc70e 100644 --- a/fast_encoder.c +++ b/fast_encoder.c @@ -167,16 +167,15 @@ bool FLZe_encode_member( struct FLZ_encoder * const fe ) if( match_byte == cur_byte ) { + const int short_rep_price = price1( fe->eb.bm_match[*state][pos_state] ) + + price1( fe->eb.bm_rep[*state] ) + + price0( fe->eb.bm_rep0[*state] ) + + price0( fe->eb.bm_len[*state][pos_state] ); int price = price0( fe->eb.bm_match[*state][pos_state] ); - int short_rep_price; if( St_is_char( *state ) ) price += LZeb_price_literal( &fe->eb, prev_byte, cur_byte ); else price += LZeb_price_matched( &fe->eb, prev_byte, cur_byte, match_byte ); - short_rep_price = price1( fe->eb.bm_match[*state][pos_state] ) + - price1( fe->eb.bm_rep[*state] ) + - price0( fe->eb.bm_rep0[*state] ) + - price0( fe->eb.bm_len[*state][pos_state] ); if( short_rep_price < price ) { Re_encode_bit( &fe->eb.renc, &fe->eb.bm_match[*state][pos_state], 1 ); diff --git a/lzip.h b/lzip.h index 1edcaa9..9ef5913 100644 --- a/lzip.h +++ b/lzip.h @@ -226,10 +226,10 @@ static inline bool Fh_set_dictionary_size( File_header data, const unsigned sz ) if( sz > min_dictionary_size ) { const unsigned base_size = 1 << data[5]; - const unsigned wedge = base_size / 16; + const unsigned fraction = base_size / 16; int i; for( i = 7; i >= 1; --i ) - if( base_size - ( i * wedge ) >= sz ) + if( base_size - ( i * fraction ) >= sz ) { data[5] |= ( i << 5 ); break; } } return true; @@ -239,9 +239,13 @@ static inline bool Fh_set_dictionary_size( File_header data, const unsigned sz ) static inline bool Fh_verify( const File_header data ) { - return ( Fh_verify_magic( data ) && Fh_verify_version( data ) && - Fh_get_dictionary_size( data ) >= min_dictionary_size && - Fh_get_dictionary_size( data ) <= max_dictionary_size ); + if( Fh_verify_magic( data ) && Fh_verify_version( data ) ) + { + const unsigned dictionary_size = Fh_get_dictionary_size( data ); + return ( dictionary_size >= min_dictionary_size && + dictionary_size <= max_dictionary_size ); + } + return false; } diff --git a/lzlib.h b/lzlib.h index 766054c..5b180a5 100644 --- a/lzlib.h +++ b/lzlib.h @@ -29,7 +29,7 @@ extern "C" { #endif -static const char * const LZ_version_string = "1.7-pre1"; +static const char * const LZ_version_string = "1.7-rc1"; enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error, LZ_sequence_error, LZ_header_error, LZ_unexpected_eof, diff --git a/main.c b/main.c index 1c9d29c..cab15b5 100644 --- a/main.c +++ b/main.c @@ -289,11 +289,15 @@ static unsigned long long getnum( const char * const ptr, static int get_dict_size( const char * const arg ) { char * tail; - int bits = strtol( arg, &tail, 0 ); + int dictionary_size; + const int bits = strtol( arg, &tail, 0 ); if( bits >= LZ_min_dictionary_bits() && bits <= LZ_max_dictionary_bits() && *tail == 0 ) return ( 1 << bits ); - return getnum( arg, LZ_min_dictionary_size(), LZ_max_dictionary_size() ); + dictionary_size = getnum( arg, LZ_min_dictionary_size(), + LZ_max_dictionary_size() ); + if( dictionary_size == 65535 ) ++dictionary_size; + return dictionary_size; } @@ -836,7 +840,7 @@ int main( const int argc, const char * const argv[] ) { 3 << 23, 132 }, /* -8 */ { 1 << 25, 273 } }; /* -9 */ struct Lzma_options encoder_options = option_mapping[6]; /* default = "-6" */ - const unsigned long long max_member_size = 0x0100000000000000ULL; + const unsigned long long max_member_size = 0x0008000000000000ULL; const unsigned long long max_volume_size = 0x4000000000000000ULL; unsigned long long member_size = max_member_size; unsigned long long volume_size = 0; -- cgit v1.2.3