summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--INSTALL2
-rw-r--r--Makefile.in6
-rw-r--r--README21
-rw-r--r--carg_parser.c30
-rw-r--r--carg_parser.h30
-rwxr-xr-xconfigure2
-rw-r--r--decoder.c6
-rw-r--r--decoder.h6
-rw-r--r--doc/clzip.12
-rw-r--r--doc/clzip.info50
-rw-r--r--doc/clzip.texi43
-rw-r--r--encoder.c2
-rw-r--r--lzip.h4
-rw-r--r--main.c14
-rwxr-xr-xtestsuite/check.sh2
16 files changed, 106 insertions, 119 deletions
diff --git a/ChangeLog b/ChangeLog
index a90aa47..b907530 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2015-05-23 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.7-rc1 released.
+ * main.c (compress): Fixed spurious warning about uninitialized var.
+
2015-02-26 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.7-pre1 released.
diff --git a/INSTALL b/INSTALL
index 921cb29..e510baf 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,7 +1,7 @@
Requirements
------------
You will need a C compiler.
-I use gcc 4.9.1 and 3.3.6, but the code should compile with any
+I use gcc 4.9.1 and 4.1.2, but the code should compile with any
standards compliant compiler.
Gcc is available at http://gcc.gnu.org.
diff --git a/Makefile.in b/Makefile.in
index 1e309e0..f40352e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -18,13 +18,13 @@ objs = carg_parser.o encoder_base.o encoder.o fast_encoder.o decoder.o main.o
all : $(progname)
$(progname) : $(objs)
- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs)
+ $(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(objs)
main.o : main.c
- $(CC) $(CFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
+ $(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
%.o : %.c
- $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
+ $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
$(objs) : Makefile
carg_parser.o : carg_parser.h
diff --git a/README b/README
index 15f854c..b358f08 100644
--- a/README
+++ b/README
@@ -3,7 +3,7 @@ Description
Clzip is a lossless data compressor with a user interface similar to the
one of gzip or bzip2. Clzip is about as fast as gzip, compresses most
files more than bzip2, and is better than both from a data recovery
-perspective. Clzip is a clean implementation of the LZMA "algorithm".
+perspective.
Clzip uses the lzip file format; the files produced by clzip are fully
compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
@@ -81,15 +81,16 @@ multivolume compressed tar archives.
Clzip is able to compress and decompress streams of unlimited size by
automatically creating multi-member output. The members so created are
-large, about 64 PiB each.
-
-There is no such thing as a "LZMA algorithm"; it is more like a "LZMA
-coding scheme". For example, the option '-0' of lzip uses the scheme in
-almost the simplest way possible; issuing the longest match it can find,
-or a literal byte if it can't find a match. Inversely, a much more
-elaborated way of finding coding sequences of minimum price than the one
-currently used by lzip could be developed, and the resulting sequence
-could also be coded using the LZMA coding scheme.
+large, about 2 PiB each.
+
+In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
+concrete algorithm; it is more like "any algorithm using the LZMA coding
+scheme". For example, the option '-0' of lzip uses the scheme in almost
+the simplest way possible; issuing the longest match it can find, or a
+literal byte if it can't find a match. Inversely, a much more elaborated
+way of finding coding sequences of minimum size than the one currently
+used by lzip could be developed, and the resulting sequence could also
+be coded using the LZMA coding scheme.
Clzip currently implements two variants of the LZMA algorithm; fast
(used by option -0) and normal (used by all other compression levels).
diff --git a/carg_parser.c b/carg_parser.c
index a453e36..8d74ea6 100644
--- a/carg_parser.c
+++ b/carg_parser.c
@@ -1,28 +1,20 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
Copyright (C) 2006-2015 Antonio Diaz Diaz.
- This library is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 2 of the License, or
- (at your option) any later version.
+ This library is free software. Redistribution and use in source and
+ binary forms, with or without modification, are permitted provided
+ that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this library. If not, see <http://www.gnu.org/licenses/>.
-
- As a special exception, you may use this file as part of a free
- software library without restriction. Specifically, if other files
- instantiate templates or use macros or inline functions from this
- file, or you compile this file and link it with other files to
- produce an executable, this file does not by itself cause the
- resulting executable to be covered by the GNU General Public
- License. This exception does not however invalidate any other
- reasons why the executable file might be covered by the GNU General
- Public License.
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
#include <stdlib.h>
diff --git a/carg_parser.h b/carg_parser.h
index 34b1263..ed4d9c5 100644
--- a/carg_parser.h
+++ b/carg_parser.h
@@ -1,28 +1,20 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
Copyright (C) 2006-2015 Antonio Diaz Diaz.
- This library is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 2 of the License, or
- (at your option) any later version.
+ This library is free software. Redistribution and use in source and
+ binary forms, with or without modification, are permitted provided
+ that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this library. If not, see <http://www.gnu.org/licenses/>.
-
- As a special exception, you may use this file as part of a free
- software library without restriction. Specifically, if other files
- instantiate templates or use macros or inline functions from this
- file, or you compile this file and link it with other files to
- produce an executable, this file does not by itself cause the
- resulting executable to be covered by the GNU General Public
- License. This exception does not however invalidate any other
- reasons why the executable file might be covered by the GNU General
- Public License.
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
/* Arg_parser reads the arguments in 'argv' and creates a number of
diff --git a/configure b/configure
index 21692b1..de1bcb0 100755
--- a/configure
+++ b/configure
@@ -6,7 +6,7 @@
# to copy, distribute and modify it.
pkgname=clzip
-pkgversion=1.7-pre1
+pkgversion=1.7-rc1
progname=clzip
srctrigger=doc/${pkgname}.texi
diff --git a/decoder.c b/decoder.c
index a1fae99..cd3e774 100644
--- a/decoder.c
+++ b/decoder.c
@@ -209,7 +209,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
const int pos_state = LZd_data_position( d ) & pos_state_mask;
if( Rd_decode_bit( rdec, &d->bm_match[state][pos_state] ) == 0 ) /* 1st bit */
{
- const uint8_t prev_byte = LZd_get_prev_byte( d );
+ const uint8_t prev_byte = LZd_peek_prev( d );
if( St_is_char( state ) )
{
state -= ( state < 4 ) ? state : 3;
@@ -221,7 +221,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
state -= ( state < 10 ) ? 3 : 6;
LZd_put_byte( d, Rd_decode_matched( rdec,
d->bm_literal[get_lit_state(prev_byte)],
- LZd_get_byte( d, rep0 ) ) );
+ LZd_peek( d, rep0 ) ) );
}
}
else
@@ -249,7 +249,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
{
if( Rd_decode_bit( rdec, &d->bm_len[state][pos_state] ) == 0 ) /* 4th bit */
{ state = St_set_short_rep( state );
- LZd_put_byte( d, LZd_get_byte( d, rep0 ) ); continue; }
+ LZd_put_byte( d, LZd_peek( d, rep0 ) ); continue; }
}
state = St_set_rep( state );
len = min_match_len + Rd_decode_len( rdec, &d->rep_len_model, pos_state );
diff --git a/decoder.h b/decoder.h
index 19e4c6a..833701b 100644
--- a/decoder.h
+++ b/decoder.h
@@ -256,14 +256,14 @@ struct LZ_decoder
void LZd_flush_data( struct LZ_decoder * const d );
-static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const d )
+static inline uint8_t LZd_peek_prev( const struct LZ_decoder * const d )
{
const int i = ( ( d->pos > 0 ) ? d->pos : d->buffer_size ) - 1;
return d->buffer[i];
}
-static inline uint8_t LZd_get_byte( const struct LZ_decoder * const d,
- const int distance )
+static inline uint8_t LZd_peek( const struct LZ_decoder * const d,
+ const int distance )
{
int i = d->pos - distance - 1;
if( i < 0 ) i += d->buffer_size;
diff --git a/doc/clzip.1 b/doc/clzip.1
index d351c01..cfc9050 100644
--- a/doc/clzip.1
+++ b/doc/clzip.1
@@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
-.TH CLZIP "1" "February 2015" "clzip 1.7-pre1" "User Commands"
+.TH CLZIP "1" "May 2015" "clzip 1.7-rc1" "User Commands"
.SH NAME
clzip \- reduces the size of files
.SH SYNOPSIS
diff --git a/doc/clzip.info b/doc/clzip.info
index 848adc2..b66195e 100644
--- a/doc/clzip.info
+++ b/doc/clzip.info
@@ -11,7 +11,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir)
Clzip Manual
************
-This manual is for Clzip (version 1.7-pre1, 26 February 2015).
+This manual is for Clzip (version 1.7-rc1, 23 May 2015).
* Menu:
@@ -38,8 +38,7 @@ File: clzip.info, Node: Introduction, Next: Algorithm, Prev: Top, Up: Top
Clzip is a lossless data compressor with a user interface similar to the
one of gzip or bzip2. Clzip is about as fast as gzip, compresses most
files more than bzip2, and is better than both from a data recovery
-perspective. Clzip is a clean implementation of the LZMA
-(Lempel-Ziv-Markov chain-Algorithm) "algorithm".
+perspective.
Clzip uses the lzip file format; the files produced by clzip are
fully compatible with lzip-1.4 or newer, and can be rescued with
@@ -136,7 +135,7 @@ multivolume compressed tar archives.
Clzip is able to compress and decompress streams of unlimited size by
automatically creating multi-member output. The members so created are
-large, about 64 PiB each.
+large, about 2 PiB each.

File: clzip.info, Node: Algorithm, Next: Invoking clzip, Prev: Introduction, Up: Top
@@ -144,13 +143,14 @@ File: clzip.info, Node: Algorithm, Next: Invoking clzip, Prev: Introduction,
2 Algorithm
***********
-There is no such thing as a "LZMA algorithm"; it is more like a "LZMA
-coding scheme". For example, the option '-0' of lzip uses the scheme in
-almost the simplest way possible; issuing the longest match it can find,
-or a literal byte if it can't find a match. Inversely, a much more
-elaborated way of finding coding sequences of minimum price than the one
-currently used by lzip could be developed, and the resulting sequence
-could also be coded using the LZMA coding scheme.
+In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
+concrete algorithm; it is more like "any algorithm using the LZMA coding
+scheme". For example, the option '-0' of lzip uses the scheme in almost
+the simplest way possible; issuing the longest match it can find, or a
+literal byte if it can't find a match. Inversely, a much more elaborated
+way of finding coding sequences of minimum size than the one currently
+used by lzip could be developed, and the resulting sequence could also
+be coded using the LZMA coding scheme.
Clzip currently implements two variants of the LZMA algorithm; fast
(used by option -0) and normal (used by all other compression levels).
@@ -227,7 +227,7 @@ The format for running clzip is:
'--member-size=BYTES'
Set the member size limit to BYTES. A small member size may
degrade compression ratio, so use it only when needed. Valid values
- range from 100 kB to 64 PiB. Defaults to 64 PiB.
+ range from 100 kB to 2 PiB. Defaults to 2 PiB.
'-c'
'--stdout'
@@ -406,14 +406,12 @@ additional information before, between, or after them.
now.
'DS (coded dictionary size, 1 byte)'
- Lzip divides the distance between any two powers of 2 into 8
- equally spaced intervals, named "wedges". The dictionary size is
- calculated by taking a power of 2 (the base size) and substracting
- from it a number of wedges between 0 and 7. The size of a wedge is
- (base_size / 16).
+ The dictionary size is calculated by taking a power of 2 (the base
+ size) and substracting from it a fraction between 0/16 and 7/16 of
+ the base size.
Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).
- Bits 7-5 contain the number of wedges (0 to 7) to substract from
- the base size to obtain the dictionary size.
+ Bits 7-5 contain the numerator of the fraction (0 to 7) to
+ substract from the base size to obtain the dictionary size.
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB
Valid values for dictionary size range from 4 KiB to 512 MiB.
@@ -547,13 +545,13 @@ Concept index

Tag Table:
Node: Top210
-Node: Introduction903
-Node: Algorithm6200
-Node: Invoking clzip8963
-Node: File format14514
-Node: Examples17046
-Node: Problems19015
-Node: Concept index19541
+Node: Introduction897
+Node: Algorithm6100
+Node: Invoking clzip8930
+Node: File format14479
+Node: Examples16881
+Node: Problems18850
+Node: Concept index19376

End Tag Table
diff --git a/doc/clzip.texi b/doc/clzip.texi
index 01f5f39..a74ec6f 100644
--- a/doc/clzip.texi
+++ b/doc/clzip.texi
@@ -6,8 +6,8 @@
@finalout
@c %**end of header
-@set UPDATED 26 February 2015
-@set VERSION 1.7-pre1
+@set UPDATED 23 May 2015
+@set VERSION 1.7-rc1
@dircategory Data Compression
@direntry
@@ -58,8 +58,7 @@ to copy, distribute and modify it.
Clzip is a lossless data compressor with a user interface similar to the
one of gzip or bzip2. Clzip is about as fast as gzip, compresses most
files more than bzip2, and is better than both from a data recovery
-perspective. Clzip is a clean implementation of the LZMA
-(Lempel-Ziv-Markov chain-Algorithm) "algorithm".
+perspective.
Clzip uses the lzip file format; the files produced by clzip are fully
compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
@@ -162,23 +161,24 @@ multivolume compressed tar archives.
Clzip is able to compress and decompress streams of unlimited size by
automatically creating multi-member output. The members so created are
-large, about 64 PiB each.
+large, about 2 PiB each.
@node Algorithm
@chapter Algorithm
@cindex algorithm
-There is no such thing as a "LZMA algorithm"; it is more like a "LZMA
-coding scheme". For example, the option '-0' of lzip uses the scheme in
-almost the simplest way possible; issuing the longest match it can find,
-or a literal byte if it can't find a match. Inversely, a much more
-elaborated way of finding coding sequences of minimum price than the one
-currently used by lzip could be developed, and the resulting sequence
-could also be coded using the LZMA coding scheme.
+In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
+concrete algorithm; it is more like "any algorithm using the LZMA coding
+scheme". For example, the option '-0' of lzip uses the scheme in almost
+the simplest way possible; issuing the longest match it can find, or a
+literal byte if it can't find a match. Inversely, a much more elaborated
+way of finding coding sequences of minimum size than the one currently
+used by lzip could be developed, and the resulting sequence could also
+be coded using the LZMA coding scheme.
-Clzip currently implements two variants of the LZMA algorithm; fast (used
-by option -0) and normal (used by all other compression levels).
+Clzip currently implements two variants of the LZMA algorithm; fast
+(used by option -0) and normal (used by all other compression levels).
The high compression of LZMA comes from combining two basic, well-proven
compression ideas: sliding dictionaries (LZ77/78) and markov models (the
@@ -245,7 +245,7 @@ clzip [@var{options}] [@var{files}]
Clzip supports the following options:
-@table @samp
+@table @code
@item -h
@itemx --help
Print an informative help message describing the options and exit.
@@ -258,7 +258,7 @@ Print the version number of clzip on the standard output and exit.
@itemx --member-size=@var{bytes}
Set the member size limit to @var{bytes}. A small member size may
degrade compression ratio, so use it only when needed. Valid values
-range from 100 kB to 64 PiB. Defaults to 64 PiB.
+range from 100 kB to 2 PiB. Defaults to 2 PiB.
@item -c
@itemx --stdout
@@ -441,13 +441,12 @@ A four byte string, identifying the lzip format, with the value "LZIP"
Just in case something needs to be modified in the future. 1 for now.
@item DS (coded dictionary size, 1 byte)
-Lzip divides the distance between any two powers of 2 into 8 equally
-spaced intervals, named "wedges". The dictionary size is calculated by
-taking a power of 2 (the base size) and substracting from it a number of
-wedges between 0 and 7. The size of a wedge is (base_size / 16).@*
+The dictionary size is calculated by taking a power of 2 (the base size)
+and substracting from it a fraction between 0/16 and 7/16 of the base
+size.@*
Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@*
-Bits 7-5 contain the number of wedges (0 to 7) to substract from the
-base size to obtain the dictionary size.@*
+Bits 7-5 contain the numerator of the fraction (0 to 7) to substract
+from the base size to obtain the dictionary size.@*
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
Valid values for dictionary size range from 4 KiB to 512 MiB.
diff --git a/encoder.c b/encoder.c
index 9d5fd5c..1a027f5 100644
--- a/encoder.c
+++ b/encoder.c
@@ -456,7 +456,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
e->trials[++num_trials].price = infinite_price;
i = 0;
- while( start_len > e->pairs[i].len ) ++i;
+ while( e->pairs[i].len < start_len ) ++i;
dis = e->pairs[i].dis;
for( len = start_len; ; ++len )
{
diff --git a/lzip.h b/lzip.h
index a478508..40e69d7 100644
--- a/lzip.h
+++ b/lzip.h
@@ -227,10 +227,10 @@ static inline bool Fh_set_dictionary_size( File_header data, const unsigned sz )
if( sz > min_dictionary_size )
{
const unsigned base_size = 1 << data[5];
- const unsigned wedge = base_size / 16;
+ const unsigned fraction = base_size / 16;
int i;
for( i = 7; i >= 1; --i )
- if( base_size - ( i * wedge ) >= sz )
+ if( base_size - ( i * fraction ) >= sz )
{ data[5] |= ( i << 5 ); break; }
}
return true;
diff --git a/main.c b/main.c
index bd8e8fb..183b9bd 100644
--- a/main.c
+++ b/main.c
@@ -223,7 +223,7 @@ static unsigned long long getnum( const char * const ptr,
static int get_dict_size( const char * const arg )
{
char * tail;
- int bits = strtol( arg, &tail, 0 );
+ const int bits = strtol( arg, &tail, 0 );
if( bits >= min_dictionary_bits &&
bits <= max_dictionary_bits && *tail == 0 )
return ( 1 << bits );
@@ -469,11 +469,11 @@ static int compress( const unsigned long long member_size,
else
{
File_header header;
- if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) ||
- encoder_options->match_len_limit < min_match_len_limit ||
- encoder_options->match_len_limit > max_match_len )
- internal_error( "invalid argument to encoder." );
- encoder.e = (struct LZ_encoder *)malloc( sizeof (struct LZ_encoder) );
+ if( Fh_set_dictionary_size( header, encoder_options->dictionary_size ) &&
+ encoder_options->match_len_limit >= min_match_len_limit &&
+ encoder_options->match_len_limit <= max_match_len )
+ encoder.e = (struct LZ_encoder *)malloc( sizeof (struct LZ_encoder) );
+ else internal_error( "invalid argument to encoder." );
if( !encoder.e || !LZe_init( encoder.e, Fh_get_dictionary_size( header ),
encoder_options->match_len_limit, infd, outfd ) )
error = true;
@@ -700,7 +700,7 @@ int main( const int argc, const char * const argv[] )
{ 3 << 23, 132 }, /* -8 */
{ 1 << 25, 273 } }; /* -9 */
struct Lzma_options encoder_options = option_mapping[6]; /* default = "-6" */
- const unsigned long long max_member_size = 0x0100000000000000ULL;
+ const unsigned long long max_member_size = 0x0008000000000000ULL;
const unsigned long long max_volume_size = 0x4000000000000000ULL;
unsigned long long member_size = max_member_size;
unsigned long long volume_size = 0;
diff --git a/testsuite/check.sh b/testsuite/check.sh
index 574022c..fdfb5b8 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -1,6 +1,6 @@
#! /bin/sh
# check script for Clzip - LZMA lossless data compressor
-# Copyright (C) 2010-2014 Antonio Diaz Diaz.
+# Copyright (C) 2010-2015 Antonio Diaz Diaz.
#
# This script is free software: you have unlimited permission
# to copy, distribute and modify it.