summaryrefslogtreecommitdiffstats
path: root/doc/lzip.texi
diff options
context:
space:
mode:
Diffstat (limited to 'doc/lzip.texi')
-rw-r--r--doc/lzip.texi61
1 files changed, 27 insertions, 34 deletions
diff --git a/doc/lzip.texi b/doc/lzip.texi
index 037dd6e..58d6f9a 100644
--- a/doc/lzip.texi
+++ b/doc/lzip.texi
@@ -6,8 +6,8 @@
@finalout
@c %**end of header
-@set UPDATED 26 August 2014
-@set VERSION 1.16
+@set UPDATED 26 March 2015
+@set VERSION 1.17-pre1
@dircategory Data Compression
@direntry
@@ -47,7 +47,7 @@ This manual is for Lzip (version @value{VERSION}, @value{UPDATED}).
@end menu
@sp 1
-Copyright @copyright{} 2008-2014 Antonio Diaz Diaz.
+Copyright @copyright{} 2008-2015 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission
to copy, distribute and modify it.
@@ -63,8 +63,9 @@ files more than bzip2, and is better than both from a data recovery
perspective. Lzip is a clean implementation of the LZMA
(Lempel-Ziv-Markov chain-Algorithm) "algorithm".
-The lzip file format is designed for long-term data archiving, taking
-into account both data integrity and decoder availability:
+The lzip file format is designed for data sharing and long-term
+archiving, taking into account both data integrity and decoder
+availability:
@itemize @bullet
@item
@@ -83,8 +84,8 @@ data from a lzip file long after quantum computers eventually render
LZMA obsolete.
@item
-Additionally lzip is copylefted, which guarantees that it will remain
-free forever.
+Additionally the lzip reference implementation is copylefted, which
+guarantees that it will remain free forever.
@end itemize
A nice feature of the lzip format is that a corrupt byte is easier to
@@ -169,7 +170,7 @@ There is no such thing as a "LZMA algorithm"; it is more like a "LZMA
coding scheme". For example, the option '-0' of lzip uses the scheme in
almost the simplest way possible; issuing the longest match it can find,
or a literal byte if it can't find a match. Inversely, a much more
-elaborated way of finding coding sequences of minimum price than the one
+elaborated way of finding coding sequences of minimum size than the one
currently used by lzip could be developed, and the resulting sequence
could also be coded using the LZMA coding scheme.
@@ -344,8 +345,7 @@ The bidimensional parameter space of LZMA can't be mapped to a linear
scale optimal for all files. If your files are large, very repetitive,
etc, you may need to use the @samp{--match-length} and
@samp{--dictionary-size} options directly to achieve optimal
-performance. For example, @samp{-9m64} usually compresses executables
-more (and faster) than @samp{-9}.
+performance.
@multitable {Level} {Dictionary size} {Match length limit}
@item Level @tab Dictionary size @tab Match length limit
@@ -439,20 +439,19 @@ A four byte string, identifying the lzip format, with the value "LZIP"
Just in case something needs to be modified in the future. 1 for now.
@item DS (coded dictionary size, 1 byte)
-Lzip divides the distance between any two powers of 2 into 8 equally
-spaced intervals, named "wedges". The dictionary size is calculated by
-taking a power of 2 (the base size) and substracting from it a number of
-wedges between 0 and 7. The size of a wedge is (base_size / 16).@*
+The dictionary size is calculated by taking a power of 2 (the base size)
+and substracting from it a fraction between 0/16 and 7/16 of the base
+size.@*
Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@*
-Bits 7-5 contain the number of wedges (0 to 7) to substract from the
-base size to obtain the dictionary size.@*
+Bits 7-5 contain the numerator of the fraction (0 to 7) to substract
+from the base size to obtain the dictionary size.@*
Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
Valid values for dictionary size range from 4 KiB to 512 MiB.
@item Lzma stream
The lzma stream, finished by an end of stream marker. Uses default
-values for encoder properties. See the chapter @samp{Stream format}
-(@pxref{Stream format}) for a complete description.
+values for encoder properties. @xref{Stream format}, for a complete
+description.
@item CRC32 (4 bytes)
CRC of the uncompressed original data.
@@ -805,7 +804,7 @@ for all eternity, if not longer.
If you find a bug in lzip, please send electronic mail to
@email{lzip-bug@@nongnu.org}. Include the version number, which you can
-find by running @w{@samp{lzip --version}}.
+find by running @w{@code{lzip --version}}.
@node Reference source code
@@ -814,7 +813,7 @@ find by running @w{@samp{lzip --version}}.
@verbatim
/* Lzd - Educational decompressor for lzip files
- Copyright (C) 2013, 2014 Antonio Diaz Diaz.
+ Copyright (C) 2013-2015 Antonio Diaz Diaz.
This program is free software: you have unlimited permission
to copy, distribute and modify it.
@@ -1052,7 +1051,7 @@ class LZ_decoder
void flush_data();
- uint8_t get_byte( const unsigned distance ) const
+ uint8_t peek( const unsigned distance ) const
{
unsigned i = pos - distance - 1;
if( pos <= distance ) i += dictionary_size;
@@ -1126,13 +1125,13 @@ bool LZ_decoder::decode_member() // Returns false if error
const int pos_state = data_position() & pos_state_mask;
if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
{
- const uint8_t prev_byte = get_byte( 0 );
+ const uint8_t prev_byte = peek( 0 );
const int literal_state = prev_byte >> ( 8 - literal_context_bits );
Bit_model * const bm = bm_literal[literal_state];
if( state.is_char() )
put_byte( rdec.decode_tree( bm, 8 ) );
else
- put_byte( rdec.decode_matched( bm, get_byte( rep0 ) ) );
+ put_byte( rdec.decode_matched( bm, peek( rep0 ) ) );
state.set_char();
}
else
@@ -1159,7 +1158,7 @@ bool LZ_decoder::decode_member() // Returns false if error
else
{
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
- { state.set_short_rep(); put_byte( get_byte( rep0 ) ); continue; }
+ { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
}
state.set_rep();
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
@@ -1183,7 +1182,7 @@ bool LZ_decoder::decode_member() // Returns false if error
{
rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
rep0 += rdec.decode_tree_reversed( bm_align, dis_align_bits );
- if( rep0 == 0xFFFFFFFFU ) // Marker found
+ if( rep0 == 0xFFFFFFFFU ) // marker found
{
flush_data();
return ( len == min_match_len ); // End Of Stream marker
@@ -1194,7 +1193,7 @@ bool LZ_decoder::decode_member() // Returns false if error
if( rep0 >= dictionary_size || rep0 >= data_position() )
{ flush_data(); return false; }
}
- for( int i = 0; i < len; ++i ) put_byte( get_byte( rep0 ) );
+ for( int i = 0; i < len; ++i ) put_byte( peek( rep0 ) );
}
}
flush_data();
@@ -1213,7 +1212,7 @@ int main( const int argc, const char * const argv[] )
"It is not safe to use lzd for any real work.\n"
"\nUsage: %s < file.lz > file\n", argv[0] );
std::printf( "Lzd decompresses from standard input to standard output.\n"
- "\nCopyright (C) 2014 Antonio Diaz Diaz.\n"
+ "\nCopyright (C) 2015 Antonio Diaz Diaz.\n"
"This is free software: you are free to change and redistribute it.\n"
"There is NO WARRANTY, to the extent permitted by law.\n"
"Report bugs to lzip-bug@nongnu.org\n"
@@ -1225,19 +1224,13 @@ int main( const int argc, const char * const argv[] )
{
File_header header;
for( int i = 0; i < 6; ++i ) header[i] = std::getc( stdin );
- if( std::feof( stdin ) || std::memcmp( header, "LZIP", 4 ) != 0 )
+ if( std::feof( stdin ) || std::memcmp( header, "LZIP\x01", 5 ) != 0 )
{
if( first_member )
{ std::fprintf( stderr, "Bad magic number (file not in lzip format)\n" );
return 2; }
break;
}
- if( header[4] != 1 )
- {
- std::fprintf( stderr, "Version %d member format not supported.\n",
- header[4] );
- return 2;
- }
unsigned dict_size = 1 << ( header[5] & 0x1F );
dict_size -= ( dict_size / 16 ) * ( ( header[5] >> 5 ) & 7 );
if( dict_size < min_dictionary_size || dict_size > max_dictionary_size )