From 73f5ce5a1a7ef15a0e889bf2416e401db59f8c28 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 6 Nov 2015 13:52:43 +0100 Subject: Merging upstream version 1.7~rc1. Signed-off-by: Daniel Baumann --- ChangeLog | 5 +++++ INSTALL | 2 +- Makefile.in | 6 +++--- README | 21 +++++++++++---------- carg_parser.c | 30 +++++++++++------------------- carg_parser.h | 30 +++++++++++------------------- configure | 2 +- decoder.c | 6 +++--- decoder.h | 6 +++--- doc/clzip.1 | 2 +- doc/clzip.info | 50 ++++++++++++++++++++++++-------------------------- doc/clzip.texi | 43 +++++++++++++++++++++---------------------- encoder.c | 2 +- lzip.h | 4 ++-- main.c | 14 +++++++------- testsuite/check.sh | 2 +- 16 files changed, 106 insertions(+), 119 deletions(-) diff --git a/ChangeLog b/ChangeLog index a90aa47..b907530 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2015-05-23 Antonio Diaz Diaz + + * Version 1.7-rc1 released. + * main.c (compress): Fixed spurious warning about uninitialized var. + 2015-02-26 Antonio Diaz Diaz * Version 1.7-pre1 released. diff --git a/INSTALL b/INSTALL index 921cb29..e510baf 100644 --- a/INSTALL +++ b/INSTALL @@ -1,7 +1,7 @@ Requirements ------------ You will need a C compiler. -I use gcc 4.9.1 and 3.3.6, but the code should compile with any +I use gcc 4.9.1 and 4.1.2, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. diff --git a/Makefile.in b/Makefile.in index 1e309e0..f40352e 100644 --- a/Makefile.in +++ b/Makefile.in @@ -18,13 +18,13 @@ objs = carg_parser.o encoder_base.o encoder.o fast_encoder.o decoder.o main.o all : $(progname) $(progname) : $(objs) - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs) + $(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(objs) main.o : main.c - $(CC) $(CFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< + $(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< %.o : %.c - $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $< + $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< $(objs) : Makefile carg_parser.o : carg_parser.h diff --git a/README b/README index 15f854c..b358f08 100644 --- a/README +++ b/README @@ -3,7 +3,7 @@ Description Clzip is a lossless data compressor with a user interface similar to the one of gzip or bzip2. Clzip is about as fast as gzip, compresses most files more than bzip2, and is better than both from a data recovery -perspective. Clzip is a clean implementation of the LZMA "algorithm". +perspective. Clzip uses the lzip file format; the files produced by clzip are fully compatible with lzip-1.4 or newer, and can be rescued with lziprecover. @@ -81,15 +81,16 @@ multivolume compressed tar archives. Clzip is able to compress and decompress streams of unlimited size by automatically creating multi-member output. The members so created are -large, about 64 PiB each. - -There is no such thing as a "LZMA algorithm"; it is more like a "LZMA -coding scheme". For example, the option '-0' of lzip uses the scheme in -almost the simplest way possible; issuing the longest match it can find, -or a literal byte if it can't find a match. Inversely, a much more -elaborated way of finding coding sequences of minimum price than the one -currently used by lzip could be developed, and the resulting sequence -could also be coded using the LZMA coding scheme. +large, about 2 PiB each. + +In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a +concrete algorithm; it is more like "any algorithm using the LZMA coding +scheme". For example, the option '-0' of lzip uses the scheme in almost +the simplest way possible; issuing the longest match it can find, or a +literal byte if it can't find a match. Inversely, a much more elaborated +way of finding coding sequences of minimum size than the one currently +used by lzip could be developed, and the resulting sequence could also +be coded using the LZMA coding scheme. Clzip currently implements two variants of the LZMA algorithm; fast (used by option -0) and normal (used by all other compression levels). diff --git a/carg_parser.c b/carg_parser.c index a453e36..8d74ea6 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,28 +1,20 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) Copyright (C) 2006-2015 Antonio Diaz Diaz. - This library is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. + This library is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this library. If not, see . - - As a special exception, you may use this file as part of a free - software library without restriction. Specifically, if other files - instantiate templates or use macros or inline functions from this - file, or you compile this file and link it with other files to - produce an executable, this file does not by itself cause the - resulting executable to be covered by the GNU General Public - License. This exception does not however invalidate any other - reasons why the executable file might be covered by the GNU General - Public License. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ #include diff --git a/carg_parser.h b/carg_parser.h index 34b1263..ed4d9c5 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,28 +1,20 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) Copyright (C) 2006-2015 Antonio Diaz Diaz. - This library is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. + This library is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this library. If not, see . - - As a special exception, you may use this file as part of a free - software library without restriction. Specifically, if other files - instantiate templates or use macros or inline functions from this - file, or you compile this file and link it with other files to - produce an executable, this file does not by itself cause the - resulting executable to be covered by the GNU General Public - License. This exception does not however invalidate any other - reasons why the executable file might be covered by the GNU General - Public License. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ /* Arg_parser reads the arguments in 'argv' and creates a number of diff --git a/configure b/configure index 21692b1..de1bcb0 100755 --- a/configure +++ b/configure @@ -6,7 +6,7 @@ # to copy, distribute and modify it. pkgname=clzip -pkgversion=1.7-pre1 +pkgversion=1.7-rc1 progname=clzip srctrigger=doc/${pkgname}.texi diff --git a/decoder.c b/decoder.c index a1fae99..cd3e774 100644 --- a/decoder.c +++ b/decoder.c @@ -209,7 +209,7 @@ int LZd_decode_member( struct LZ_decoder * const d, const int pos_state = LZd_data_position( d ) & pos_state_mask; if( Rd_decode_bit( rdec, &d->bm_match[state][pos_state] ) == 0 ) /* 1st bit */ { - const uint8_t prev_byte = LZd_get_prev_byte( d ); + const uint8_t prev_byte = LZd_peek_prev( d ); if( St_is_char( state ) ) { state -= ( state < 4 ) ? state : 3; @@ -221,7 +221,7 @@ int LZd_decode_member( struct LZ_decoder * const d, state -= ( state < 10 ) ? 3 : 6; LZd_put_byte( d, Rd_decode_matched( rdec, d->bm_literal[get_lit_state(prev_byte)], - LZd_get_byte( d, rep0 ) ) ); + LZd_peek( d, rep0 ) ) ); } } else @@ -249,7 +249,7 @@ int LZd_decode_member( struct LZ_decoder * const d, { if( Rd_decode_bit( rdec, &d->bm_len[state][pos_state] ) == 0 ) /* 4th bit */ { state = St_set_short_rep( state ); - LZd_put_byte( d, LZd_get_byte( d, rep0 ) ); continue; } + LZd_put_byte( d, LZd_peek( d, rep0 ) ); continue; } } state = St_set_rep( state ); len = min_match_len + Rd_decode_len( rdec, &d->rep_len_model, pos_state ); diff --git a/decoder.h b/decoder.h index 19e4c6a..833701b 100644 --- a/decoder.h +++ b/decoder.h @@ -256,14 +256,14 @@ struct LZ_decoder void LZd_flush_data( struct LZ_decoder * const d ); -static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const d ) +static inline uint8_t LZd_peek_prev( const struct LZ_decoder * const d ) { const int i = ( ( d->pos > 0 ) ? d->pos : d->buffer_size ) - 1; return d->buffer[i]; } -static inline uint8_t LZd_get_byte( const struct LZ_decoder * const d, - const int distance ) +static inline uint8_t LZd_peek( const struct LZ_decoder * const d, + const int distance ) { int i = d->pos - distance - 1; if( i < 0 ) i += d->buffer_size; diff --git a/doc/clzip.1 b/doc/clzip.1 index d351c01..cfc9050 100644 --- a/doc/clzip.1 +++ b/doc/clzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH CLZIP "1" "February 2015" "clzip 1.7-pre1" "User Commands" +.TH CLZIP "1" "May 2015" "clzip 1.7-rc1" "User Commands" .SH NAME clzip \- reduces the size of files .SH SYNOPSIS diff --git a/doc/clzip.info b/doc/clzip.info index 848adc2..b66195e 100644 --- a/doc/clzip.info +++ b/doc/clzip.info @@ -11,7 +11,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir) Clzip Manual ************ -This manual is for Clzip (version 1.7-pre1, 26 February 2015). +This manual is for Clzip (version 1.7-rc1, 23 May 2015). * Menu: @@ -38,8 +38,7 @@ File: clzip.info, Node: Introduction, Next: Algorithm, Prev: Top, Up: Top Clzip is a lossless data compressor with a user interface similar to the one of gzip or bzip2. Clzip is about as fast as gzip, compresses most files more than bzip2, and is better than both from a data recovery -perspective. Clzip is a clean implementation of the LZMA -(Lempel-Ziv-Markov chain-Algorithm) "algorithm". +perspective. Clzip uses the lzip file format; the files produced by clzip are fully compatible with lzip-1.4 or newer, and can be rescued with @@ -136,7 +135,7 @@ multivolume compressed tar archives. Clzip is able to compress and decompress streams of unlimited size by automatically creating multi-member output. The members so created are -large, about 64 PiB each. +large, about 2 PiB each.  File: clzip.info, Node: Algorithm, Next: Invoking clzip, Prev: Introduction, Up: Top @@ -144,13 +143,14 @@ File: clzip.info, Node: Algorithm, Next: Invoking clzip, Prev: Introduction, 2 Algorithm *********** -There is no such thing as a "LZMA algorithm"; it is more like a "LZMA -coding scheme". For example, the option '-0' of lzip uses the scheme in -almost the simplest way possible; issuing the longest match it can find, -or a literal byte if it can't find a match. Inversely, a much more -elaborated way of finding coding sequences of minimum price than the one -currently used by lzip could be developed, and the resulting sequence -could also be coded using the LZMA coding scheme. +In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a +concrete algorithm; it is more like "any algorithm using the LZMA coding +scheme". For example, the option '-0' of lzip uses the scheme in almost +the simplest way possible; issuing the longest match it can find, or a +literal byte if it can't find a match. Inversely, a much more elaborated +way of finding coding sequences of minimum size than the one currently +used by lzip could be developed, and the resulting sequence could also +be coded using the LZMA coding scheme. Clzip currently implements two variants of the LZMA algorithm; fast (used by option -0) and normal (used by all other compression levels). @@ -227,7 +227,7 @@ The format for running clzip is: '--member-size=BYTES' Set the member size limit to BYTES. A small member size may degrade compression ratio, so use it only when needed. Valid values - range from 100 kB to 64 PiB. Defaults to 64 PiB. + range from 100 kB to 2 PiB. Defaults to 2 PiB. '-c' '--stdout' @@ -406,14 +406,12 @@ additional information before, between, or after them. now. 'DS (coded dictionary size, 1 byte)' - Lzip divides the distance between any two powers of 2 into 8 - equally spaced intervals, named "wedges". The dictionary size is - calculated by taking a power of 2 (the base size) and substracting - from it a number of wedges between 0 and 7. The size of a wedge is - (base_size / 16). + The dictionary size is calculated by taking a power of 2 (the base + size) and substracting from it a fraction between 0/16 and 7/16 of + the base size. Bits 4-0 contain the base 2 logarithm of the base size (12 to 29). - Bits 7-5 contain the number of wedges (0 to 7) to substract from - the base size to obtain the dictionary size. + Bits 7-5 contain the numerator of the fraction (0 to 7) to + substract from the base size to obtain the dictionary size. Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB Valid values for dictionary size range from 4 KiB to 512 MiB. @@ -547,13 +545,13 @@ Concept index  Tag Table: Node: Top210 -Node: Introduction903 -Node: Algorithm6200 -Node: Invoking clzip8963 -Node: File format14514 -Node: Examples17046 -Node: Problems19015 -Node: Concept index19541 +Node: Introduction897 +Node: Algorithm6100 +Node: Invoking clzip8930 +Node: File format14479 +Node: Examples16881 +Node: Problems18850 +Node: Concept index19376  End Tag Table diff --git a/doc/clzip.texi b/doc/clzip.texi index 01f5f39..a74ec6f 100644 --- a/doc/clzip.texi +++ b/doc/clzip.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 26 February 2015 -@set VERSION 1.7-pre1 +@set UPDATED 23 May 2015 +@set VERSION 1.7-rc1 @dircategory Data Compression @direntry @@ -58,8 +58,7 @@ to copy, distribute and modify it. Clzip is a lossless data compressor with a user interface similar to the one of gzip or bzip2. Clzip is about as fast as gzip, compresses most files more than bzip2, and is better than both from a data recovery -perspective. Clzip is a clean implementation of the LZMA -(Lempel-Ziv-Markov chain-Algorithm) "algorithm". +perspective. Clzip uses the lzip file format; the files produced by clzip are fully compatible with lzip-1.4 or newer, and can be rescued with lziprecover. @@ -162,23 +161,24 @@ multivolume compressed tar archives. Clzip is able to compress and decompress streams of unlimited size by automatically creating multi-member output. The members so created are -large, about 64 PiB each. +large, about 2 PiB each. @node Algorithm @chapter Algorithm @cindex algorithm -There is no such thing as a "LZMA algorithm"; it is more like a "LZMA -coding scheme". For example, the option '-0' of lzip uses the scheme in -almost the simplest way possible; issuing the longest match it can find, -or a literal byte if it can't find a match. Inversely, a much more -elaborated way of finding coding sequences of minimum price than the one -currently used by lzip could be developed, and the resulting sequence -could also be coded using the LZMA coding scheme. +In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a +concrete algorithm; it is more like "any algorithm using the LZMA coding +scheme". For example, the option '-0' of lzip uses the scheme in almost +the simplest way possible; issuing the longest match it can find, or a +literal byte if it can't find a match. Inversely, a much more elaborated +way of finding coding sequences of minimum size than the one currently +used by lzip could be developed, and the resulting sequence could also +be coded using the LZMA coding scheme. -Clzip currently implements two variants of the LZMA algorithm; fast (used -by option -0) and normal (used by all other compression levels). +Clzip currently implements two variants of the LZMA algorithm; fast +(used by option -0) and normal (used by all other compression levels). The high compression of LZMA comes from combining two basic, well-proven compression ideas: sliding dictionaries (LZ77/78) and markov models (the @@ -245,7 +245,7 @@ clzip [@var{options}] [@var{files}] Clzip supports the following options: -@table @samp +@table @code @item -h @itemx --help Print an informative help message describing the options and exit. @@ -258,7 +258,7 @@ Print the version number of clzip on the standard output and exit. @itemx --member-size=@var{bytes} Set the member size limit to @var{bytes}. A small member size may degrade compression ratio, so use it only when needed. Valid values -range from 100 kB to 64 PiB. Defaults to 64 PiB. +range from 100 kB to 2 PiB. Defaults to 2 PiB. @item -c @itemx --stdout @@ -441,13 +441,12 @@ A four byte string, identifying the lzip format, with the value "LZIP" Just in case something needs to be modified in the future. 1 for now. @item DS (coded dictionary size, 1 byte) -Lzip divides the distance between any two powers of 2 into 8 equally -spaced intervals, named "wedges". The dictionary size is calculated by -taking a power of 2 (the base size) and substracting from it a number of -wedges between 0 and 7. The size of a wedge is (base_size / 16).@* +The dictionary size is calculated by taking a power of 2 (the base size) +and substracting from it a fraction between 0/16 and 7/16 of the base +size.@* Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@* -Bits 7-5 contain the number of wedges (0 to 7) to substract from the -base size to obtain the dictionary size.@* +Bits 7-5 contain the numerator of the fraction (0 to 7) to substract +from the base size to obtain the dictionary size.@* Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@* Valid values for dictionary size range from 4 KiB to 512 MiB. diff --git a/encoder.c b/encoder.c index 9d5fd5c..1a027f5 100644 --- a/encoder.c +++ b/encoder.c @@ -456,7 +456,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, e->trials[++num_trials].price = infinite_price; i = 0; - while( start_len > e->pairs[i].len ) ++i; + while( e->pairs[i].len < start_len ) ++i; dis = e->pairs[i].dis; for( len = start_len; ; ++len ) { diff --git a/lzip.h b/lzip.h index a478508..40e69d7 100644 --- a/lzip.h +++ b/lzip.h @@ -227,10 +227,10 @@ static inline bool Fh_set_dictionary_size( File_header data, const unsigned sz ) if( sz > min_dictionary_size ) { const unsigned base_size = 1 << data[5]; - const unsigned wedge = base_size / 16; + const unsigned fraction = base_size / 16; int i; for( i = 7; i >= 1; --i ) - if( base_size - ( i * wedge ) >= sz ) + if( base_size - ( i * fraction ) >= sz ) { data[5] |= ( i << 5 ); break; } } return true; diff --git a/main.c b/main.c index bd8e8fb..183b9bd 100644 --- a/main.c +++ b/main.c @@ -223,7 +223,7 @@ static unsigned long long getnum( const char * const ptr, static int get_dict_size( const char * const arg ) { char * tail; - int bits = strtol( arg, &tail, 0 ); + const int bits = strtol( arg, &tail, 0 ); if( bits >= min_dictionary_bits && bits <= max_dictionary_bits && *tail == 0 ) return ( 1 << bits ); @@ -469,11 +469,11 @@ static int compress( const unsigned long long member_size, else { File_header header; - if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) || - encoder_options->match_len_limit < min_match_len_limit || - encoder_options->match_len_limit > max_match_len ) - internal_error( "invalid argument to encoder." ); - encoder.e = (struct LZ_encoder *)malloc( sizeof (struct LZ_encoder) ); + if( Fh_set_dictionary_size( header, encoder_options->dictionary_size ) && + encoder_options->match_len_limit >= min_match_len_limit && + encoder_options->match_len_limit <= max_match_len ) + encoder.e = (struct LZ_encoder *)malloc( sizeof (struct LZ_encoder) ); + else internal_error( "invalid argument to encoder." ); if( !encoder.e || !LZe_init( encoder.e, Fh_get_dictionary_size( header ), encoder_options->match_len_limit, infd, outfd ) ) error = true; @@ -700,7 +700,7 @@ int main( const int argc, const char * const argv[] ) { 3 << 23, 132 }, /* -8 */ { 1 << 25, 273 } }; /* -9 */ struct Lzma_options encoder_options = option_mapping[6]; /* default = "-6" */ - const unsigned long long max_member_size = 0x0100000000000000ULL; + const unsigned long long max_member_size = 0x0008000000000000ULL; const unsigned long long max_volume_size = 0x4000000000000000ULL; unsigned long long member_size = max_member_size; unsigned long long volume_size = 0; diff --git a/testsuite/check.sh b/testsuite/check.sh index 574022c..fdfb5b8 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Clzip - LZMA lossless data compressor -# Copyright (C) 2010-2014 Antonio Diaz Diaz. +# Copyright (C) 2010-2015 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. -- cgit v1.2.3