summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--ChangeLog12
-rw-r--r--NEWS15
-rwxr-xr-xconfigure6
-rw-r--r--decoder.cc162
-rw-r--r--decoder.h88
-rw-r--r--doc/lzlib.info50
-rw-r--r--doc/lzlib.texinfo32
-rw-r--r--encoder.cc106
-rw-r--r--encoder.h59
-rw-r--r--lzip.h79
-rw-r--r--lzlib.cc79
-rw-r--r--lzlib.h6
-rw-r--r--main.cc141
-rwxr-xr-xtestsuite/check.sh29
14 files changed, 503 insertions, 361 deletions
diff --git a/ChangeLog b/ChangeLog
index fc7ede1..d1281f9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2010-05-08 Antonio Diaz Diaz <ant_diaz@teleline.es>
+
+ * Version 1.0 released.
+ * Added new function LZ_decompress_member_finished.
+ * Added new function LZ_decompress_member_version.
+ * Added new function LZ_decompress_dictionary_size.
+ * Added new function LZ_decompress_data_crc.
+ * Variables declared "extern" have been encapsulated in a
+ namespace.
+ * main.cc: Fixed warning about fchown's return value being ignored.
+ * decoder.h: Input_buffer integrated in Range_decoder.
+
2010-02-10 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.9 released.
diff --git a/NEWS b/NEWS
index 5e6542d..502457a 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,14 @@
-Changes in version 0.9:
+Changes in version 1.0:
-Compression time has been reduced by 8%.
+New functions:
+ LZ_decompress_member_finished.
+ LZ_decompress_member_version.
+ LZ_decompress_dictionary_size.
+ LZ_decompress_data_crc.
+
+Variables declared "extern" have been encapsulated in a namespace.
+
+A warning about fchown's return value being ignored has been fixed.
+
+Input_buffer has been integrated in Range_decoder, simplifying the code
+and making decompression slightly faster.
diff --git a/configure b/configure
index fe01eac..73e800b 100755
--- a/configure
+++ b/configure
@@ -5,13 +5,13 @@
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
#
-# Date of this version: 2010-02-10
+# Date of this version: 2010-05-08
args=
no_create=
pkgname=lzlib
-pkgversion=0.9
-soversion=0
+pkgversion=1.0
+soversion=1
progname=minilzip
progname_shared=
libname=lz
diff --git a/decoder.cc b/decoder.cc
index 429d33e..a260571 100644
--- a/decoder.cc
+++ b/decoder.cc
@@ -38,71 +38,11 @@
#include "decoder.h"
-const CRC32 crc32;
-
-// Copies up to `out_size' bytes to `out_buffer' and updates `get'.
-// Returns the number of bytes copied.
-int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size ) throw()
- {
- if( out_size < 0 ) return 0;
- int size = 0;
- if( get > put )
- {
- size = std::min( buffer_size - get, out_size );
- if( size > 0 )
- {
- std::memcpy( out_buffer, buffer + get, size );
- get += size;
- if( get >= buffer_size ) get = 0;
- }
- }
- if( get < put )
- {
- const int size2 = std::min( put - get, out_size - size );
- if( size2 > 0 )
- {
- std::memcpy( out_buffer + size, buffer + get, size2 );
- get += size2;
- size += size2;
- }
- }
- return size;
- }
-
-
-// Copies up to `in_size' bytes from `in_buffer' and updates `put'.
-// Returns the number of bytes copied.
-int Circular_buffer::write_data( const uint8_t * const in_buffer, const int in_size ) throw()
- {
- if( in_size < 0 ) return 0;
- int size = 0;
- if( put >= get )
- {
- size = std::min( buffer_size - put - (get == 0), in_size );
- if( size > 0 )
- {
- std::memcpy( buffer + put, in_buffer, size );
- put += size;
- if( put >= buffer_size ) put = 0;
- }
- }
- if( put < get )
- {
- const int size2 = std::min( get - put - 1, in_size - size );
- if( size2 > 0 )
- {
- std::memcpy( buffer + put, in_buffer + size, size2 );
- put += size2;
- size += size2;
- }
- }
- return size;
- }
-
+const CRC32 Lzlib_namespace::crc32;
// Seeks a member header and updates `get'.
// Returns true if it finds a valid header.
-bool Input_buffer::find_header() throw()
+bool Range_decoder::find_header() throw()
{
while( get != put )
{
@@ -110,10 +50,10 @@ bool Input_buffer::find_header() throw()
{
int g = get;
File_header header;
- for( unsigned int i = 0; i < sizeof header; ++i )
+ for( int i = 0; i < File_header::size; ++i )
{
if( g == put ) return false; // not enough data
- ((uint8_t *)&header)[i] = buffer[g];
+ header.data[i] = buffer[g];
if( ++g >= buffer_size ) g = 0;
}
if( header.verify() ) return true;
@@ -127,36 +67,44 @@ bool Input_buffer::find_header() throw()
// Returns true, fills `header', and updates `get' if `get' points to a
// valid header.
// Else returns false and leaves `get' unmodified.
-bool Input_buffer::read_header( File_header & header ) throw()
+bool Range_decoder::read_header( File_header & header ) throw()
{
int g = get;
- for( unsigned int i = 0; i < sizeof header; ++i )
+ for( int i = 0; i < File_header::size; ++i )
{
if( g == put ) return false; // not enough data
- ((uint8_t *)&header)[i] = buffer[g];
+ header.data[i] = buffer[g];
if( ++g >= buffer_size ) g = 0;
}
- if( header.verify() ) { get = g; return true; }
+ if( header.verify() )
+ {
+ get = g;
+ member_pos = File_header::size;
+ reload_pending = true;
+ return true;
+ }
return false;
}
bool LZ_decoder::verify_trailer()
{
- bool error = false;
File_trailer trailer;
- const int trailer_size = trailer.size( format_version );
+ const int trailer_size = File_trailer::size( member_version );
+ const long long member_size = range_decoder.member_position() + trailer_size;
+ bool error = false;
+
for( int i = 0; i < trailer_size && !error; ++i )
{
if( !range_decoder.finished() )
- ((uint8_t *)&trailer)[i] = range_decoder.get_byte();
- else error = true;
+ trailer.data[i] = range_decoder.get_byte();
+ else { error = true; for( ; i < trailer_size; ++i ) trailer.data[i] = 0; }
}
- if( format_version == 0 ) trailer.member_size( member_position() );
+ if( member_version == 0 ) trailer.member_size( member_size );
if( !range_decoder.code_is_zero() ) error = true;
if( trailer.data_crc() != crc() ) error = true;
if( trailer.data_size() != data_position() ) error = true;
- if( trailer.member_size() != member_position() ) error = true;
+ if( trailer.member_size() != member_size ) error = true;
return !error;
}
@@ -169,7 +117,7 @@ int LZ_decoder::decode_member()
if( !range_decoder.try_reload() ) return 0;
if( verify_trailer_pending )
{
- if( range_decoder.available_bytes() < File_trailer::size( format_version ) &&
+ if( range_decoder.available_bytes() < File_trailer::size( member_version ) &&
!range_decoder.at_stream_end() )
return 0;
verify_trailer_pending = false;
@@ -240,13 +188,13 @@ int LZ_decoder::decode_member()
{
rep0 += range_decoder.decode( direct_bits - dis_align_bits ) << dis_align_bits;
rep0 += range_decoder.decode_tree_reversed( bm_align, dis_align_bits );
- if( rep0 == 0xFFFFFFFF ) // Marker found
+ if( rep0 == 0xFFFFFFFFU ) // Marker found
{
rep0 = rep0_saved;
range_decoder.normalize();
if( len == min_match_len ) // End Of Stream marker
{
- if( range_decoder.available_bytes() < File_trailer::size( format_version ) &&
+ if( range_decoder.available_bytes() < File_trailer::size( member_version ) &&
!range_decoder.at_stream_end() )
{ verify_trailer_pending = true; return 0; }
member_finished_ = true;
@@ -269,3 +217,63 @@ int LZ_decoder::decode_member()
}
}
}
+
+
+// Copies up to `out_size' bytes to `out_buffer' and updates `get'.
+// Returns the number of bytes copied.
+int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size ) throw()
+ {
+ if( out_size < 0 ) return 0;
+ int size = 0;
+ if( get > put )
+ {
+ size = std::min( buffer_size - get, out_size );
+ if( size > 0 )
+ {
+ std::memcpy( out_buffer, buffer + get, size );
+ get += size;
+ if( get >= buffer_size ) get = 0;
+ }
+ }
+ if( get < put )
+ {
+ const int size2 = std::min( put - get, out_size - size );
+ if( size2 > 0 )
+ {
+ std::memcpy( out_buffer + size, buffer + get, size2 );
+ get += size2;
+ size += size2;
+ }
+ }
+ return size;
+ }
+
+
+// Copies up to `in_size' bytes from `in_buffer' and updates `put'.
+// Returns the number of bytes copied.
+int Circular_buffer::write_data( const uint8_t * const in_buffer, const int in_size ) throw()
+ {
+ if( in_size < 0 ) return 0;
+ int size = 0;
+ if( put >= get )
+ {
+ size = std::min( buffer_size - put - (get == 0), in_size );
+ if( size > 0 )
+ {
+ std::memcpy( buffer + put, in_buffer, size );
+ put += size;
+ if( put >= buffer_size ) put = 0;
+ }
+ }
+ if( put < get )
+ {
+ const int size2 = std::min( get - put - 1, in_size - size );
+ if( size2 > 0 )
+ {
+ std::memcpy( buffer + put, in_buffer + size, size2 );
+ put += size2;
+ size += size2;
+ }
+ }
+ return size;
+ }
diff --git a/decoder.h b/decoder.h
index e7775b0..3842ed5 100644
--- a/decoder.h
+++ b/decoder.h
@@ -25,22 +25,33 @@
Public License.
*/
-class Input_buffer : public Circular_buffer
+class Range_decoder : public Circular_buffer
{
enum { min_available_bytes = 8 };
+ long long member_pos;
+ uint32_t code;
+ uint32_t range;
+ bool reload_pending;
bool at_stream_end_;
public:
- Input_buffer()
+ Range_decoder()
:
Circular_buffer( 65536 + min_available_bytes ),
+ member_pos( 0 ),
+ code( 0 ),
+ range( 0xFFFFFFFFU ),
+ reload_pending( false ),
at_stream_end_( false ) {}
bool at_stream_end() const throw() { return at_stream_end_; }
+ int available_bytes() const throw() { return used_bytes(); }
+ bool code_is_zero() const throw() { return ( code == 0 ); }
void finish() throw() { at_stream_end_ = true; }
bool finished() const throw() { return at_stream_end_ && !used_bytes(); }
int free_bytes() const throw()
{ if( at_stream_end_ ) return 0; return Circular_buffer::free_bytes(); }
+ long long member_position() const throw() { return member_pos; }
void purge() throw() { at_stream_end_ = true; Circular_buffer::reset(); }
void reset() throw() { at_stream_end_ = false; Circular_buffer::reset(); }
@@ -58,39 +69,11 @@ public:
if( at_stream_end_ || in_size <= 0 ) return 0;
return Circular_buffer::write_data( in_buffer, in_size );
}
- };
-
-
-class Range_decoder
- {
- mutable long long member_pos;
- uint32_t code;
- uint32_t range;
- bool reload_pending;
- Input_buffer & ibuf;
-
-public:
- Range_decoder( const int header_size, Input_buffer & buf )
- :
- member_pos( header_size ),
- code( 0 ),
- range( 0xFFFFFFFF ),
- reload_pending( false ),
- ibuf( buf )
- { for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); }
-
- bool at_stream_end() const throw() { return ibuf.at_stream_end(); }
- int available_bytes() const throw() { return ibuf.used_bytes(); }
- bool code_is_zero() const throw() { return ( code == 0 ); }
- bool enough_available_bytes() const throw()
- { return ibuf.enough_available_bytes(); }
- bool finished() const throw() { return ibuf.finished(); }
- long long member_position() const throw() { return member_pos; }
- uint8_t get_byte() const
+ uint8_t get_byte()
{
++member_pos;
- return ibuf.get_byte();
+ return Circular_buffer::get_byte();
}
bool try_reload( const bool force = false ) throw()
@@ -100,7 +83,7 @@ public:
{
reload_pending = false;
code = 0;
- range = 0xFFFFFFFF;
+ range = 0xFFFFFFFFU;
for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte();
}
return !reload_pending;
@@ -108,7 +91,7 @@ public:
void normalize()
{
- if( range <= 0x00FFFFFF )
+ if( range <= 0x00FFFFFFU )
{ range <<= 8; code = (code << 8) | get_byte(); }
}
@@ -118,7 +101,7 @@ public:
for( int i = num_bits; i > 0; --i )
{
symbol <<= 1;
- if( range <= 0x00FFFFFF )
+ if( range <= 0x00FFFFFFU )
{
range <<= 7; code = (code << 8) | get_byte();
if( code >= range ) { code -= range; symbol |= 1; }
@@ -174,16 +157,16 @@ public:
int decode_matched( Bit_model bm[], const int match_byte )
{
- Bit_model *bm1 = bm + 0x100;
+ Bit_model * const bm1 = bm + 0x100;
int symbol = 1;
- for( int i = 1; i <= 8; ++i )
+ for( int i = 7; i >= 0; --i )
{
- const int match_bit = ( match_byte << i ) & 0x100;
- const int bit = decode_bit( bm1[match_bit+symbol] );
+ const int match_bit = ( match_byte >> i ) & 1;
+ const int bit = decode_bit( bm1[(match_bit<<8)+symbol] );
symbol = ( symbol << 1 ) | bit;
- if( ( match_bit && !bit ) || ( !match_bit && bit ) )
+ if( match_bit != bit )
{
- while( ++i <= 8 )
+ while( --i >= 0 )
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
break;
}
@@ -219,16 +202,16 @@ class Literal_decoder
{
Bit_model bm_literal[1<<literal_context_bits][0x300];
- int state( const int prev_byte ) const throw()
+ int lstate( const int prev_byte ) const throw()
{ return ( prev_byte >> ( 8 - literal_context_bits ) ); }
public:
uint8_t decode( Range_decoder & range_decoder, const uint8_t prev_byte )
- { return range_decoder.decode_tree( bm_literal[state(prev_byte)], 8 ); }
+ { return range_decoder.decode_tree( bm_literal[lstate(prev_byte)], 8 ); }
uint8_t decode_matched( Range_decoder & range_decoder,
const uint8_t prev_byte, const uint8_t match_byte )
- { return range_decoder.decode_matched( bm_literal[state(prev_byte)], match_byte ); }
+ { return range_decoder.decode_matched( bm_literal[lstate(prev_byte)], match_byte ); }
};
@@ -236,7 +219,7 @@ class LZ_decoder : public Circular_buffer
{
enum { min_free_bytes = max_match_len };
long long partial_data_pos;
- const int format_version;
+ const int member_version;
const int dictionary_size;
uint32_t crc_;
bool member_finished_;
@@ -257,7 +240,7 @@ class LZ_decoder : public Circular_buffer
Bit_model bm_dis[modeled_distances-end_dis_model];
Bit_model bm_align[dis_align_size];
- Range_decoder range_decoder;
+ Range_decoder & range_decoder;
Len_decoder len_decoder;
Len_decoder rep_match_len_decoder;
Literal_decoder literal_decoder;
@@ -286,7 +269,7 @@ class LZ_decoder : public Circular_buffer
std::memcpy( buffer + put, buffer + i, len );
put += len;
}
- else for( ; len > 0 ; --len )
+ else for( ; len > 0; --len )
{
crc32.update( crc_, buffer[i] );
buffer[put] = buffer[i];
@@ -298,27 +281,26 @@ class LZ_decoder : public Circular_buffer
bool verify_trailer();
public:
- LZ_decoder( const File_header & header, Input_buffer & ibuf )
+ LZ_decoder( const File_header & header, Range_decoder & rdec )
:
Circular_buffer( std::max( 65536, header.dictionary_size() ) + min_free_bytes ),
partial_data_pos( 0 ),
- format_version( header.version ),
+ member_version( header.version() ),
dictionary_size( header.dictionary_size() ),
- crc_( 0xFFFFFFFF ),
+ crc_( 0xFFFFFFFFU ),
member_finished_( false ),
verify_trailer_pending( false ),
rep0( 0 ),
rep1( 0 ),
rep2( 0 ),
rep3( 0 ),
- range_decoder( sizeof header, ibuf ),
- literal_decoder()
+ range_decoder( rdec )
{ buffer[buffer_size-1] = 0; } // prev_byte of first_byte
bool enough_free_bytes() const throw()
{ return free_bytes() >= min_free_bytes; }
- uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; }
+ uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFFU; }
int decode_member();
bool member_finished() const throw()
{ return ( member_finished_ && !used_bytes() ); }
diff --git a/doc/lzlib.info b/doc/lzlib.info
index 9a516a9..af9f67b 100644
--- a/doc/lzlib.info
+++ b/doc/lzlib.info
@@ -12,7 +12,7 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir)
Lzlib Manual
************
-This manual is for Lzlib (version 0.9, 10 February 2010).
+This manual is for Lzlib (version 1.0, 8 May 2010).
* Menu:
@@ -373,6 +373,28 @@ be verified by calling `LZ_decompress_errno' before using it.
Returns 1 if all the data has been read and `LZ_decompress_close'
can be safely called. Otherwise it returns 0.
+ -- Function: int LZ_decompress_member_finished ( struct LZ_Decoder *
+ const DECODER )
+ Returns 1 if the previous call to `LZ_decompress_read' finished
+ reading the current member, indicating that final values for
+ member are available through `LZ_decompress_data_crc',
+ `LZ_decompress_data_position', and
+ `LZ_decompress_member_position'. Otherwise it returns 0.
+
+ -- Function: int LZ_decompress_member_version ( struct LZ_Decoder *
+ const DECODER )
+ Returns the version of current member from member header.
+
+ -- Function: int LZ_decompress_dictionary_size ( struct LZ_Decoder *
+ const DECODER )
+ Returns the dictionary size of current member from member header.
+
+ -- Function: unsigned int LZ_decompress_data_crc ( struct LZ_Decoder *
+ const DECODER )
+ Returns the 32 bit Cyclic Redundancy Check of the data
+ decompressed from the current member. The returned value is valid
+ only when `LZ_decompress_member_finished' returns 1.
+
-- Function: long long LZ_decompress_data_position ( struct LZ_Decoder
* const DECODER )
Returns the number of decompressed bytes already produced, but
@@ -575,6 +597,8 @@ Example 4: Decompression using LZ_decompress_write_size.
3) LZ_decompress_write
4) if no more data to write, call LZ_decompress_finish
5) LZ_decompress_read
+ 5a) optionally, if LZ_decompress_member_finished returns 1, read
+ final values for member with LZ_decompress_data_crc, etc.
6) go back to step 2 until LZ_decompress_finished returns 1
7) LZ_decompress_close
@@ -676,17 +700,17 @@ Concept Index

Tag Table:
Node: Top219
-Node: Introduction1158
-Node: Library Version2933
-Node: Buffering3578
-Node: Parameter Limits4698
-Node: Compression Functions5655
-Node: Decompression Functions11701
-Node: Error Codes16763
-Node: Error Messages18702
-Node: Data Format19281
-Node: Examples21251
-Node: Problems24827
-Node: Concept Index25399
+Node: Introduction1152
+Node: Library Version2927
+Node: Buffering3572
+Node: Parameter Limits4692
+Node: Compression Functions5649
+Node: Decompression Functions11695
+Node: Error Codes17766
+Node: Error Messages19705
+Node: Data Format20284
+Node: Examples22254
+Node: Problems25967
+Node: Concept Index26539

End Tag Table
diff --git a/doc/lzlib.texinfo b/doc/lzlib.texinfo
index 8163502..ef46af9 100644
--- a/doc/lzlib.texinfo
+++ b/doc/lzlib.texinfo
@@ -5,8 +5,8 @@
@finalout
@c %**end of header
-@set UPDATED 10 February 2010
-@set VERSION 0.9
+@set UPDATED 8 May 2010
+@set VERSION 1.0
@dircategory Data Compression
@direntry
@@ -424,6 +424,32 @@ can be safely called. Otherwise it returns 0.
@end deftypefun
+@deftypefun int LZ_decompress_member_finished ( struct LZ_Decoder * const @var{decoder} )
+Returns 1 if the previous call to @samp{LZ_decompress_read} finished
+reading the current member, indicating that final values for member are
+available through @samp{LZ_decompress_data_crc},
+@samp{LZ_decompress_data_position}, and
+@samp{LZ_decompress_member_position}. Otherwise it returns 0.
+@end deftypefun
+
+
+@deftypefun int LZ_decompress_member_version ( struct LZ_Decoder * const @var{decoder} )
+Returns the version of current member from member header.
+@end deftypefun
+
+
+@deftypefun int LZ_decompress_dictionary_size ( struct LZ_Decoder * const @var{decoder} )
+Returns the dictionary size of current member from member header.
+@end deftypefun
+
+
+@deftypefun {unsigned int} LZ_decompress_data_crc ( struct LZ_Decoder * const @var{decoder} )
+Returns the 32 bit Cyclic Redundancy Check of the data decompressed from
+the current member. The returned value is valid only when
+@samp{LZ_decompress_member_finished} returns 1.
+@end deftypefun
+
+
@deftypefun {long long} LZ_decompress_data_position ( struct LZ_Decoder * const @var{decoder} )
Returns the number of decompressed bytes already produced, but perhaps
not yet read, in the current member.
@@ -652,6 +678,8 @@ Example 4: Decompression using LZ_decompress_write_size.
3) LZ_decompress_write
4) if no more data to write, call LZ_decompress_finish
5) LZ_decompress_read
+5a) optionally, if LZ_decompress_member_finished returns 1, read
+ final values for member with LZ_decompress_data_crc, etc.
6) go back to step 2 until LZ_decompress_finished returns 1
7) LZ_decompress_close
@end example
diff --git a/encoder.cc b/encoder.cc
index 1b979ed..032b07c 100644
--- a/encoder.cc
+++ b/encoder.cc
@@ -38,8 +38,8 @@
#include "encoder.h"
-const Dis_slots dis_slots;
-const Prob_prices prob_prices;
+const Dis_slots Lzlib_namespace::dis_slots;
+const Prob_prices Lzlib_namespace::prob_prices;
int Matchfinder::write_data( const uint8_t * const in_buffer, const int in_size ) throw()
@@ -140,10 +140,11 @@ int Matchfinder::longest_match_len( int * const distances ) throw()
const uint8_t * const data = buffer + pos;
const int key2 = num_prev_positions4 + num_prev_positions3 +
( ( (int)data[0] << 8 ) | data[1] );
- const int tmp = crc32[data[0]] ^ data[1] ^ ( (int)data[2] << 8 );
- const int key3 = num_prev_positions4 + ( tmp & ( num_prev_positions3 - 1 ) );
- const int key4 = ( tmp ^ ( crc32[data[3]] << 5 ) ) &
- ( num_prev_positions4 - 1 );
+ const uint32_t tmp = crc32[data[0]] ^ data[1] ^ ( (uint32_t)data[2] << 8 );
+ const int key3 = num_prev_positions4 +
+ (int)( tmp & ( num_prev_positions3 - 1 ) );
+ const int key4 = (int)( ( tmp ^ ( crc32[data[3]] << 5 ) ) &
+ ( num_prev_positions4 - 1 ) );
if( distances )
{
@@ -251,8 +252,8 @@ void LZ_encoder::fill_distance_prices() throw()
{
for( int dis_state = 0; dis_state < max_dis_states; ++dis_state )
{
- int * dsp = dis_slot_prices[dis_state];
- const Bit_model * bmds = bm_dis_slot[dis_state];
+ int * const dsp = dis_slot_prices[dis_state];
+ const Bit_model * const bmds = bm_dis_slot[dis_state];
int slot = 0;
for( ; slot < end_dis_model && slot < num_dis_slots; ++slot )
dsp[slot] = price_symbol( bmds, slot, dis_slot_bits );
@@ -260,7 +261,7 @@ void LZ_encoder::fill_distance_prices() throw()
dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ) +
(((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift );
- int * dp = dis_prices[dis_state];
+ int * const dp = dis_prices[dis_state];
int dis = 0;
for( ; dis < start_dis_model; ++dis )
dp[dis] = dsp[dis];
@@ -276,8 +277,10 @@ void LZ_encoder::fill_distance_prices() throw()
}
-// Return value: ( dis == -1 ) && ( len == 1 ) means literal
-int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
+// Return value == number of bytes advanced (ahead).
+// trials[0]..trials[retval-1] contain the steps to encode.
+// ( trials[0].dis == -1 && trials[0].price == 1 ) means literal.
+int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
const State & state )
{
int main_len;
@@ -312,15 +315,14 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
return main_len;
}
- trials[0].state = state;
- for( int i = 0; i < num_rep_distances; ++i ) trials[0].reps[i] = reps[i];
-
+ {
+ const int pos_state = matchfinder.data_position() & pos_state_mask;
const uint8_t prev_byte = matchfinder[-1];
const uint8_t cur_byte = matchfinder[0];
const uint8_t match_byte = matchfinder[-reps[0]-1];
- unsigned int position = matchfinder.data_position();
- const int pos_state = position & pos_state_mask;
+ trials[0].state = state;
+ for( int i = 0; i < num_rep_distances; ++i ) trials[0].reps[i] = reps[i];
trials[1].dis = -1;
trials[1].prev_index = 0;
trials[1].price = price0( bm_match[state()][pos_state] );
@@ -368,6 +370,7 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
trials[len].update( rep, 0, price +
rep_match_len_encoder.price( len, pos_state ) );
}
+ }
int cur = 0;
int num_trials = main_len;
@@ -375,7 +378,7 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
while( true )
{
- if( ++cur >= num_trials )
+ if( ++cur >= num_trials ) // no more initialized trials
{
backward( cur );
return cur;
@@ -407,10 +410,11 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
mtf_reps( cur_trial.dis, cur_trial.reps );
}
+ const int pos_state = matchfinder.data_position() & pos_state_mask;
const uint8_t prev_byte = matchfinder[-1];
const uint8_t cur_byte = matchfinder[0];
const uint8_t match_byte = matchfinder[-cur_trial.reps[0]-1];
- const int pos_state = ++position & pos_state_mask;
+
int next_price = cur_trial.price + price0( bm_match[cur_trial.state()][pos_state] );
if( cur_trial.state.is_char() )
next_price += literal_encoder.price_symbol( prev_byte, cur_byte );
@@ -454,7 +458,7 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
if( newlen <= len_limit &&
( newlen > min_match_len ||
( newlen == min_match_len &&
- match_distances[newlen] < modeled_distances ) ) )
+ match_distances[min_match_len] < modeled_distances ) ) )
{
const int normal_match_price = match_price +
price0( bm_rep[cur_trial.state()] );
@@ -470,37 +474,38 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
}
- // Sync Flush mark => (dis == 0xFFFFFFFF, len == min_match_len + 1)
-bool LZ_encoder::sync_flush()
+ // End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len)
+bool LZ_encoder::full_flush( const State & state )
{
- if( member_finished_ || range_encoder.free_bytes() < max_marker_size )
+ if( member_finished_ ||
+ range_encoder.free_bytes() < File_trailer::size() + max_marker_size )
return false;
- const int pos_state = ( matchfinder.data_position() ) & pos_state_mask;
+ const int pos_state = matchfinder.data_position() & pos_state_mask;
range_encoder.encode_bit( bm_match[state()][pos_state], 1 );
range_encoder.encode_bit( bm_rep[state()], 0 );
- encode_pair( 0xFFFFFFFF, min_match_len + 1, pos_state );
+ encode_pair( 0xFFFFFFFFU, min_match_len, pos_state );
range_encoder.flush();
+ File_trailer trailer;
+ trailer.data_crc( crc() );
+ trailer.data_size( matchfinder.data_position() );
+ trailer.member_size( range_encoder.member_position() + File_trailer::size() );
+ for( int i = 0; i < File_trailer::size(); ++i )
+ range_encoder.put_byte( trailer.data[i] );
return true;
}
- // End Of Stream mark => (dis == 0xFFFFFFFF, len == min_match_len)
-bool LZ_encoder::full_flush()
+ // Sync Flush mark => (dis == 0xFFFFFFFFU, len == min_match_len + 1)
+bool LZ_encoder::sync_flush()
{
- if( member_finished_ ||
- range_encoder.free_bytes() < (int)sizeof (File_trailer) + max_marker_size )
+ if( member_finished_ || range_encoder.free_bytes() < max_marker_size )
return false;
- const int pos_state = ( matchfinder.data_position() ) & pos_state_mask;
+ const State & state = main_state;
+ const int pos_state = matchfinder.data_position() & pos_state_mask;
range_encoder.encode_bit( bm_match[state()][pos_state], 1 );
range_encoder.encode_bit( bm_rep[state()], 0 );
- encode_pair( 0xFFFFFFFF, min_match_len, pos_state );
+ encode_pair( 0xFFFFFFFFU, min_match_len + 1, pos_state );
range_encoder.flush();
- File_trailer trailer;
- trailer.data_crc( crc() );
- trailer.data_size( matchfinder.data_position() );
- trailer.member_size( range_encoder.member_position() + sizeof trailer );
- for( unsigned int i = 0; i < sizeof trailer; ++i )
- range_encoder.put_byte( ((uint8_t *)&trailer)[i] );
return true;
}
@@ -508,14 +513,12 @@ bool LZ_encoder::full_flush()
LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header,
const long long member_size )
:
- member_size_limit( member_size - sizeof (File_trailer) - max_marker_size ),
+ member_size_limit( member_size - File_trailer::size() - max_marker_size ),
longest_match_found( 0 ),
- crc_( 0xFFFFFFFF ),
+ crc_( 0xFFFFFFFFU ),
matchfinder( mf ),
- range_encoder(),
len_encoder( matchfinder.match_len_limit() ),
rep_match_len_encoder( matchfinder.match_len_limit() ),
- literal_encoder(),
num_dis_slots( 2 * File_header::real_bits( matchfinder.dictionary_size() - 1 ) ),
fill_counter( 0 ),
member_finished_( false )
@@ -523,16 +526,17 @@ LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header,
for( int i = 0; i < num_rep_distances; ++i ) rep_distances[i] = 0;
fill_align_prices();
- for( unsigned int i = 0; i < sizeof header; ++i )
- range_encoder.put_byte( ((uint8_t *)&header)[i] );
+ for( int i = 0; i < File_header::size; ++i )
+ range_encoder.put_byte( header.data[i] );
}
bool LZ_encoder::encode_member( const bool finish )
{
+ State & state = main_state;
if( member_finished_ ) return true;
if( range_encoder.member_position() >= member_size_limit )
- { if( full_flush() ) { member_finished_ = true; } return true; }
+ { if( full_flush( state ) ) { member_finished_ = true; } return true; }
// encode first byte
if( matchfinder.data_position() == 0 && !matchfinder.finished() )
@@ -551,29 +555,30 @@ bool LZ_encoder::encode_member( const bool finish )
{
if( matchfinder.finished() )
{
- if( finish && full_flush() ) member_finished_ = true;
+ if( finish && full_flush( state ) ) member_finished_ = true;
return true;
}
if( !matchfinder.enough_available_bytes() ||
!range_encoder.enough_free_bytes() ) return true;
if( fill_counter <= 0 ) { fill_distance_prices(); fill_counter = 512; }
- int ahead = best_pair_sequence( rep_distances, state );
+ int ahead = sequence_optimizer( rep_distances, state );
if( ahead <= 0 ) return false;
fill_counter -= ahead;
for( int i = 0; ; )
{
const int pos_state = ( matchfinder.data_position() - ahead ) & pos_state_mask;
- int dis = trials[i].dis;
+ const int dis = trials[i].dis;
const int len = trials[i].price;
bool bit = ( dis < 0 && len == 1 );
range_encoder.encode_bit( bm_match[state()][pos_state], !bit );
- if( bit )
+ if( bit ) // literal byte
{
const uint8_t prev_byte = matchfinder[-ahead-1];
const uint8_t cur_byte = matchfinder[-ahead];
+ crc32.update( crc_, cur_byte );
if( state.is_char() )
literal_encoder.encode( range_encoder, prev_byte, cur_byte );
else
@@ -583,8 +588,9 @@ bool LZ_encoder::encode_member( const bool finish )
}
state.set_char();
}
- else
+ else // match or repeated match
{
+ crc32.update( crc_, matchfinder.ptr_to_current_pos() - ahead, len );
mtf_reps( dis, rep_distances );
bit = ( dis < num_rep_distances );
range_encoder.encode_bit( bm_rep[state()], bit );
@@ -613,13 +619,11 @@ bool LZ_encoder::encode_member( const bool finish )
state.set_match();
}
}
- for( int j = 0; j < len; ++j )
- crc32.update( crc_, matchfinder[j-ahead] );
ahead -= len; i += len;
if( range_encoder.member_position() >= member_size_limit )
{
if( !matchfinder.dec_pos( ahead ) ) return false;
- if( full_flush() ) member_finished_ = true;
+ if( full_flush( state ) ) member_finished_ = true;
return true;
}
if( ahead <= 0 ) break;
diff --git a/encoder.h b/encoder.h
index 590dea1..5f65743 100644
--- a/encoder.h
+++ b/encoder.h
@@ -53,7 +53,8 @@ public:
}
};
-extern const Dis_slots dis_slots;
+namespace Lzlib_namespace { extern const Dis_slots dis_slots; }
+using Lzlib_namespace::dis_slots;
class Prob_prices
@@ -74,11 +75,12 @@ public:
}
}
- int operator[]( const int symbol ) const throw()
- { return data[symbol >> 2]; }
+ int operator[]( const int probability ) const throw()
+ { return data[probability >> 2]; }
};
-extern const Prob_prices prob_prices;
+namespace Lzlib_namespace { extern const Prob_prices prob_prices; }
+using Lzlib_namespace::prob_prices;
inline int price0( const Bit_model & bm ) throw()
@@ -130,14 +132,14 @@ inline int price_matched( const Bit_model bm[], const int symbol,
for( int i = 7; i >= 0; --i )
{
const int match_bit = ( match_byte >> i ) & 1;
- const int bit = ( symbol >> i ) & 1;
+ int bit = ( symbol >> i ) & 1;
price += price_bit( bm[(match_bit<<8)+model+0x100], bit );
model = ( model << 1 ) | bit;
if( match_bit != bit )
{
while( --i >= 0 )
{
- const int bit = ( symbol >> i ) & 1;
+ bit = ( symbol >> i ) & 1;
price += price_bit( bm[model], bit );
model = ( model << 1 ) | bit;
}
@@ -236,14 +238,14 @@ class Range_encoder : public Circular_buffer
void shift_low()
{
const uint32_t carry = low >> 32;
- if( low < 0xFF000000LL || carry == 1 )
+ if( low < 0xFF000000U || carry == 1 )
{
put_byte( cache + carry );
for( ; ff_count > 0; --ff_count ) put_byte( 0xFF + carry );
cache = low >> 24;
}
else ++ff_count;
- low = ( low & 0x00FFFFFFLL ) << 8;
+ low = ( low & 0x00FFFFFFU ) << 8;
}
public:
@@ -252,7 +254,7 @@ public:
Circular_buffer( 65536 + min_free_bytes ),
low( 0 ),
partial_member_pos( 0 ),
- range( 0xFFFFFFFF ),
+ range( 0xFFFFFFFFU ),
ff_count( 0 ),
cache( 0 ) {}
@@ -270,7 +272,7 @@ public:
{
for( int i = 0; i < 5; ++i ) shift_low();
low = 0;
- range = 0xFFFFFFFF;
+ range = 0xFFFFFFFFU;
ff_count = 0;
cache = 0;
}
@@ -284,7 +286,7 @@ public:
{
range >>= 1;
if( (symbol >> i) & 1 ) low += range;
- if( range <= 0x00FFFFFF ) { range <<= 8; shift_low(); }
+ if( range <= 0x00FFFFFFU ) { range <<= 8; shift_low(); }
}
}
@@ -302,7 +304,7 @@ public:
range -= bound;
bm.probability -= bm.probability >> bit_model_move_bits;
}
- if( range <= 0x00FFFFFF ) { range <<= 8; shift_low(); }
+ if( range <= 0x00FFFFFFU ) { range <<= 8; shift_low(); }
}
void encode_tree( Bit_model bm[], const int symbol, const int num_bits )
@@ -335,15 +337,15 @@ public:
int model = 1;
for( int i = 7; i >= 0; --i )
{
- const int bit = ( symbol >> i ) & 1;
const int match_bit = ( match_byte >> i ) & 1;
+ int bit = ( symbol >> i ) & 1;
encode_bit( bm[(match_bit<<8)+model+0x100], bit );
model = ( model << 1 ) | bit;
if( match_bit != bit )
{
while( --i >= 0 )
{
- const int bit = ( symbol >> i ) & 1;
+ bit = ( symbol >> i ) & 1;
encode_bit( bm[model], bit );
model = ( model << 1 ) | bit;
}
@@ -368,17 +370,17 @@ class Len_encoder
void update_prices( const int pos_state ) throw()
{
int * const pps = prices[pos_state];
- int price = price0( choice1 );
+ int tmp = price0( choice1 );
int len = 0;
for( ; len < len_low_symbols && len < len_symbols; ++len )
- pps[len] = price +
+ pps[len] = tmp +
price_symbol( bm_low[pos_state], len, len_low_bits );
- price = price1( choice1 );
+ tmp = price1( choice1 );
for( ; len < len_low_symbols + len_mid_symbols && len < len_symbols; ++len )
- pps[len] = price + price0( choice2 ) +
+ pps[len] = tmp + price0( choice2 ) +
price_symbol( bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
for( ; len < len_symbols; ++len )
- pps[len] = price + price1( choice2 ) +
+ pps[len] = tmp + price1( choice2 ) +
price_symbol( bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
counters[pos_state] = len_symbols;
}
@@ -402,21 +404,21 @@ class Literal_encoder
{
Bit_model bm_literal[1<<literal_context_bits][0x300];
- int state( const int prev_byte ) const throw()
+ int lstate( const int prev_byte ) const throw()
{ return ( prev_byte >> ( 8 - literal_context_bits ) ); }
public:
void encode( Range_encoder & range_encoder, uint8_t prev_byte, uint8_t symbol )
- { range_encoder.encode_tree( bm_literal[state(prev_byte)], symbol, 8 ); }
+ { range_encoder.encode_tree( bm_literal[lstate(prev_byte)], symbol, 8 ); }
void encode_matched( Range_encoder & range_encoder, uint8_t prev_byte, uint8_t match_byte, uint8_t symbol )
- { range_encoder.encode_matched( bm_literal[state(prev_byte)], symbol, match_byte ); }
+ { range_encoder.encode_matched( bm_literal[lstate(prev_byte)], symbol, match_byte ); }
int price_matched( uint8_t prev_byte, uint8_t symbol, uint8_t match_byte ) const throw()
- { return ::price_matched( bm_literal[state(prev_byte)], symbol, match_byte ); }
+ { return ::price_matched( bm_literal[lstate(prev_byte)], symbol, match_byte ); }
int price_symbol( uint8_t prev_byte, uint8_t symbol ) const throw()
- { return ::price_symbol( bm_literal[state(prev_byte)], symbol, 8 ); }
+ { return ::price_symbol( bm_literal[lstate(prev_byte)], symbol, 8 ); }
};
@@ -468,14 +470,15 @@ class LZ_encoder
int align_prices[dis_align_size];
int align_price_count;
int fill_counter;
- State state;
+ State main_state;
bool member_finished_;
void fill_align_prices() throw();
void fill_distance_prices() throw();
- uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; }
+ uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFFU; }
+ // move-to-front dis in/into reps
void mtf_reps( const int dis, int reps[num_rep_distances] ) throw()
{
if( dis >= num_rep_distances )
@@ -582,10 +585,10 @@ class LZ_encoder
}
}
- int best_pair_sequence( const int reps[num_rep_distances],
+ int sequence_optimizer( const int reps[num_rep_distances],
const State & state );
- bool full_flush();
+ bool full_flush( const State & state );
public:
LZ_encoder( Matchfinder & mf, const File_header & header,
diff --git a/lzip.h b/lzip.h
index 7cb9927..2138056 100644
--- a/lzip.h
+++ b/lzip.h
@@ -32,7 +32,7 @@ class State
public:
enum { states = 12 };
State() throw() : st( 0 ) {}
- int operator()() const throw() { return st; }
+ unsigned char operator()() const throw() { return st; }
bool is_char() const throw() { return st < 7; }
void set_char() throw()
@@ -118,7 +118,7 @@ public:
{
unsigned int c = n;
for( int k = 0; k < 8; ++k )
- { if( c & 1 ) c = 0xEDB88320 ^ ( c >> 1 ); else c >>= 1; }
+ { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
data[n] = c;
}
}
@@ -133,29 +133,27 @@ public:
}
};
-extern const CRC32 crc32;
+namespace Lzlib_namespace { extern const CRC32 crc32; }
+using Lzlib_namespace::crc32;
const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' };
struct File_header
{
- uint8_t magic[4];
- uint8_t version;
- uint8_t coded_dict_size;
+ uint8_t data[6]; // 0-3 magic bytes
+ // 4 version
+ // 5 coded_dict_size
+ enum { size = 6 };
void set_magic() throw()
- { std::memcpy( magic, magic_string, sizeof magic ); version = 1; }
+ { std::memcpy( data, magic_string, 4 ); data[4] = 1; }
bool verify_magic() const throw()
- {
- return ( std::memcmp( magic, magic_string, sizeof magic ) == 0 );
- }
+ { return ( std::memcmp( data, magic_string, 4 ) == 0 ); }
- bool verify_version() const throw()
- {
- return ( version <= 1 );
- }
+ uint8_t version() const throw() { return data[4]; }
+ bool verify_version() const throw() { return ( data[4] <= 1 ); }
bool verify() const throw()
{
@@ -174,24 +172,24 @@ struct File_header
int dictionary_size() const throw()
{
- int size = ( 1 << ( coded_dict_size & 0x1F ) );
- if( size > min_dictionary_size && size <= max_dictionary_size )
- size -= ( size / 16 ) * ( ( coded_dict_size >> 5 ) & 0x07 );
- return size;
+ int sz = ( 1 << ( data[5] & 0x1F ) );
+ if( sz > min_dictionary_size && sz <= max_dictionary_size )
+ sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 0x07 );
+ return sz;
}
- bool dictionary_size( const int size ) throw()
+ bool dictionary_size( const int sz ) throw()
{
- if( size >= min_dictionary_size && size <= max_dictionary_size )
+ if( sz >= min_dictionary_size && sz <= max_dictionary_size )
{
- coded_dict_size = real_bits( size - 1 );
- if( size > min_dictionary_size )
+ data[5] = real_bits( sz - 1 );
+ if( sz > min_dictionary_size )
{
- const int base_size = 1 << coded_dict_size;
+ const int base_size = 1 << data[5];
const int wedge = base_size / 16;
for( int i = 7; i >= 1; --i )
- if( base_size - ( i * wedge ) >= size )
- { coded_dict_size |= ( i << 5 ); break; }
+ if( base_size - ( i * wedge ) >= sz )
+ { data[5] |= ( i << 5 ); break; }
}
return true;
}
@@ -202,50 +200,45 @@ struct File_header
struct File_trailer
{
- uint8_t data_crc_[4]; // CRC32 of the uncompressed data
- uint8_t data_size_[8]; // size of the uncompressed data
- uint8_t member_size_[8]; // member size including header and trailer
+ uint8_t data[20]; // 0-3 CRC32 of the uncompressed data
+ // 4-11 size of the uncompressed data
+ // 12-19 member size including header and trailer
- static int size( const int version )
- { return sizeof (File_trailer) - ( ( version >= 1 ) ? 0 : 8 ); }
+ static int size( const int version = 1 )
+ { return ( ( version >= 1 ) ? 20 : 12 ); }
uint32_t data_crc() const throw()
{
uint32_t tmp = 0;
- for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data_crc_[i]; }
+ for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; }
return tmp;
}
void data_crc( uint32_t crc ) throw()
- {
- for( int i = 0; i < 4; ++i )
- { data_crc_[i] = (uint8_t)crc; crc >>= 8; }
- }
+ { for( int i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } }
long long data_size() const throw()
{
long long tmp = 0;
- for( int i = 7; i >= 0; --i ) { tmp <<= 8; tmp += data_size_[i]; }
+ for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; }
return tmp;
}
- void data_size( long long size ) throw()
+ void data_size( long long sz ) throw()
{
- for( int i = 0; i < 8; ++i )
- { data_size_[i] = (uint8_t)size; size >>= 8; }
+ for( int i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
}
long long member_size() const throw()
{
long long tmp = 0;
- for( int i = 7; i >= 0; --i ) { tmp <<= 8; tmp += member_size_[i]; }
+ for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
return tmp;
}
- void member_size( long long size ) throw()
+ void member_size( long long sz ) throw()
{
- for( int i = 0; i < 8; ++i )
- { member_size_[i] = (uint8_t)size; size >>= 8; }
+ for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
}
};
diff --git a/lzlib.cc b/lzlib.cc
index de5d0a9..740e067 100644
--- a/lzlib.cc
+++ b/lzlib.cc
@@ -73,9 +73,10 @@ struct LZ_Decoder
{
long long partial_in_size;
long long partial_out_size;
- Input_buffer * ibuf;
+ Range_decoder * rdec;
LZ_decoder * lz_decoder;
LZ_Errno lz_errno;
+ File_header member_header; // header of current member
bool fatal;
bool seeking;
@@ -83,19 +84,21 @@ struct LZ_Decoder
:
partial_in_size( 0 ),
partial_out_size( 0 ),
- ibuf( 0 ),
+ rdec( 0 ),
lz_decoder( 0 ),
lz_errno( LZ_ok ),
fatal( false ),
seeking( false )
- {}
+ {
+ for( int i = 0; i < File_header::size; ++i ) member_header.data[i] = 0;
+ }
};
bool verify_decoder( struct LZ_Decoder * const decoder )
{
if( !decoder ) return false;
- if( !decoder->ibuf )
+ if( !decoder->rdec )
{ decoder->lz_errno = LZ_bad_argument; return false; }
return true;
}
@@ -317,9 +320,9 @@ struct LZ_Decoder * LZ_decompress_open()
if( !decoder ) return 0;
LZ_Decoder & d = *decoder;
- try { d.ibuf = new Input_buffer; }
+ try { d.rdec = new Range_decoder; }
catch( std::bad_alloc )
- { d.ibuf = 0; d.lz_errno = LZ_mem_error; d.fatal = true; }
+ { d.rdec = 0; d.lz_errno = LZ_mem_error; d.fatal = true; }
return decoder;
}
@@ -328,7 +331,7 @@ int LZ_decompress_close( struct LZ_Decoder * const decoder )
{
if( !decoder ) return -1;
if( decoder->lz_decoder ) delete decoder->lz_decoder;
- if( decoder->ibuf ) delete decoder->ibuf;
+ if( decoder->rdec ) delete decoder->rdec;
delete decoder;
return 0;
}
@@ -338,8 +341,8 @@ int LZ_decompress_finish( struct LZ_Decoder * const decoder )
{
if( !verify_decoder( decoder ) || decoder->fatal ) return -1;
LZ_Decoder & d = *decoder;
- if( d.seeking ) { d.seeking = false; d.ibuf->purge(); }
- else d.ibuf->finish();
+ if( d.seeking ) { d.seeking = false; d.rdec->purge(); }
+ else d.rdec->finish();
return 0;
}
@@ -351,7 +354,7 @@ int LZ_decompress_reset( struct LZ_Decoder * const decoder )
if( d.lz_decoder ) { delete d.lz_decoder; d.lz_decoder = 0; }
d.partial_in_size = 0;
d.partial_out_size = 0;
- d.ibuf->reset();
+ d.rdec->reset();
d.lz_errno = LZ_ok;
d.fatal = false;
d.seeking = false;
@@ -364,11 +367,11 @@ int LZ_decompress_sync_to_member( struct LZ_Decoder * const decoder )
if( !verify_decoder( decoder ) ) return -1;
LZ_Decoder & d = *decoder;
if( d.lz_decoder ) { delete d.lz_decoder; d.lz_decoder = 0; }
- if( d.ibuf->find_header() ) d.seeking = false;
+ if( d.rdec->find_header() ) d.seeking = false;
else
{
- if( !d.ibuf->at_stream_end() ) d.seeking = true;
- else { d.seeking = false; d.ibuf->purge(); }
+ if( !d.rdec->at_stream_end() ) d.seeking = true;
+ else { d.seeking = false; d.rdec->purge(); }
}
d.lz_errno = LZ_ok;
d.fatal = false;
@@ -391,22 +394,21 @@ int LZ_decompress_read( struct LZ_Decoder * const decoder,
}
if( !d.lz_decoder )
{
- if( d.ibuf->used_bytes() < 5 + (int)sizeof (File_header) )
+ if( d.rdec->used_bytes() < 5 + File_header::size )
{
- if( !d.ibuf->at_stream_end() || d.ibuf->finished() ) return 0;
- d.ibuf->purge(); // remove trailing garbage
+ if( !d.rdec->at_stream_end() || d.rdec->finished() ) return 0;
+ d.rdec->purge(); // remove trailing garbage
d.lz_errno = LZ_header_error;
d.fatal = true;
return -1;
}
- File_header header;
- if( !d.ibuf->read_header( header ) )
+ if( !d.rdec->read_header( d.member_header ) )
{
d.lz_errno = LZ_header_error;
d.fatal = true;
return -1;
}
- try { d.lz_decoder = new LZ_decoder( header, *d.ibuf ); }
+ try { d.lz_decoder = new LZ_decoder( d.member_header, *d.rdec ); }
catch( std::bad_alloc ) // not enough free memory
{
d.lz_decoder = 0;
@@ -432,12 +434,12 @@ int LZ_decompress_write( struct LZ_Decoder * const decoder,
{
if( !verify_decoder( decoder ) || decoder->fatal ) return -1;
LZ_Decoder & d = *decoder;
- int result = d.ibuf->write_data( buffer, size );
+ int result = d.rdec->write_data( buffer, size );
while( d.seeking )
{
- if( d.ibuf->find_header() ) d.seeking = false;
+ if( d.rdec->find_header() ) d.seeking = false;
if( result >= size ) break;
- const int size2 = d.ibuf->write_data( buffer + result, size - result );
+ const int size2 = d.rdec->write_data( buffer + result, size - result );
if( size2 > 0 ) result += size2;
else break;
}
@@ -448,7 +450,7 @@ int LZ_decompress_write( struct LZ_Decoder * const decoder,
int LZ_decompress_write_size( struct LZ_Decoder * const decoder )
{
if( !verify_decoder( decoder ) || decoder->fatal ) return -1;
- return decoder->ibuf->free_bytes();
+ return decoder->rdec->free_bytes();
}
@@ -462,11 +464,40 @@ LZ_Errno LZ_decompress_errno( struct LZ_Decoder * const decoder )
int LZ_decompress_finished( struct LZ_Decoder * const decoder )
{
if( !verify_decoder( decoder ) ) return -1;
- return ( decoder->ibuf->finished() &&
+ return ( decoder->rdec->finished() &&
( !decoder->lz_decoder || decoder->lz_decoder->member_finished() ) );
}
+int LZ_decompress_member_finished( struct LZ_Decoder * const decoder )
+ {
+ if( !verify_decoder( decoder ) ) return -1;
+ return ( decoder->lz_decoder && decoder->lz_decoder->member_finished() );
+ }
+
+
+int LZ_decompress_member_version( struct LZ_Decoder * const decoder )
+ {
+ if( !verify_decoder( decoder ) ) return -1;
+ return decoder->member_header.version();
+ }
+
+
+int LZ_decompress_dictionary_size( struct LZ_Decoder * const decoder )
+ {
+ if( !verify_decoder( decoder ) ) return -1;
+ return decoder->member_header.dictionary_size();
+ }
+
+
+unsigned int LZ_decompress_data_crc( struct LZ_Decoder * const decoder )
+ {
+ if( verify_decoder( decoder ) && decoder->lz_decoder )
+ return decoder->lz_decoder->crc();
+ else return 0;
+ }
+
+
long long LZ_decompress_data_position( struct LZ_Decoder * const decoder )
{
if( !verify_decoder( decoder ) ) return -1;
diff --git a/lzlib.h b/lzlib.h
index 9ac15fa..8eb6d75 100644
--- a/lzlib.h
+++ b/lzlib.h
@@ -29,7 +29,7 @@
extern "C" {
#endif
-const char * const LZ_version_string = "0.9";
+const char * const LZ_version_string = "1.0";
enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error,
LZ_sequence_error, LZ_header_error, LZ_unexpected_eof,
@@ -96,7 +96,11 @@ int LZ_decompress_write_size( struct LZ_Decoder * const decoder );
enum LZ_Errno LZ_decompress_errno( struct LZ_Decoder * const decoder );
int LZ_decompress_finished( struct LZ_Decoder * const decoder );
+int LZ_decompress_member_finished( struct LZ_Decoder * const decoder );
+int LZ_decompress_member_version( struct LZ_Decoder * const decoder );
+int LZ_decompress_dictionary_size( struct LZ_Decoder * const decoder );
+unsigned int LZ_decompress_data_crc( struct LZ_Decoder * const decoder );
long long LZ_decompress_data_position( struct LZ_Decoder * const decoder );
long long LZ_decompress_member_position( struct LZ_Decoder * const decoder );
long long LZ_decompress_total_in_size( struct LZ_Decoder * const decoder );
diff --git a/main.cc b/main.cc
index 3d4c0f6..ce21764 100644
--- a/main.cc
+++ b/main.cc
@@ -41,6 +41,10 @@
#include "arg_parser.h"
#include "lzlib.h"
+#if CHAR_BIT != 8
+#error "Environments where CHAR_BIT != 8 are not supported."
+#endif
+
#ifndef LLONG_MAX
#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL
#endif
@@ -51,10 +55,10 @@
#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
#endif
-void show_error( const char * msg, const int errcode = 0, const bool help = false ) throw();
-void internal_error( const char * msg );
-int readblock( const int fd, uint8_t * buf, const int size ) throw();
-int writeblock( const int fd, const uint8_t * buf, const int size ) throw();
+void show_error( const char * const msg, const int errcode = 0, const bool help = false ) throw();
+void internal_error( const char * const msg );
+int readblock( const int fd, uint8_t * const buf, const int size ) throw();
+int writeblock( const int fd, const uint8_t * const buf, const int size ) throw();
namespace {
@@ -75,7 +79,7 @@ struct { const char * from; const char * to; } const known_extensions[] = {
{ ".tlz", ".tar" },
{ 0, 0 } };
-struct lzma_options
+struct Lzma_options
{
int dictionary_size; // 4KiB..512MiB
int match_len_limit; // 5..273
@@ -85,6 +89,7 @@ enum Mode { m_compress = 0, m_decompress, m_test };
std::string output_filename;
int outfd = -1;
+mode_t outfd_mode = S_IRUSR | S_IWUSR;
int verbosity = 0;
bool delete_output_on_interrupt = false;
@@ -164,7 +169,31 @@ void show_version() throw()
}
-long long getnum( const char * ptr, const int bs = 0,
+const char * format_num( long long num, long long limit = 9999,
+ const int set_prefix = 0 ) throw()
+ {
+ const char * const si_prefix[8] =
+ { "k", "M", "G", "T", "P", "E", "Z", "Y" };
+ const char * const binary_prefix[8] =
+ { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
+ static bool si = false;
+ static char buf[16];
+
+ if( set_prefix ) si = ( set_prefix > 0 );
+ const int factor = ( si ) ? 1000 : 1024;
+ const char * const *prefix = ( si ) ? si_prefix : binary_prefix;
+ const char *p = "";
+ limit = std::max( 999LL, std::min( 999999LL, limit ) );
+
+ for( int i = 0; i < 8 && ( llabs( num ) > limit ||
+ ( llabs( num ) >= factor && num % factor == 0 ) ); ++i )
+ { num /= factor; p = prefix[i]; }
+ snprintf( buf, sizeof buf, "%lld %s", num, p );
+ return buf;
+ }
+
+
+long long getnum( const char * const ptr, const int bs = 0,
const long long llimit = LLONG_MIN + 1,
const long long ulimit = LLONG_MAX ) throw()
{
@@ -222,7 +251,7 @@ long long getnum( const char * ptr, const int bs = 0,
}
-int get_dict_size( const char * arg ) throw()
+int get_dict_size( const char * const arg ) throw()
{
char *tail;
int bits = std::strtol( arg, &tail, 0 );
@@ -246,7 +275,7 @@ int extension_index( const std::string & name ) throw()
}
-int open_instream( const std::string & name, struct stat * in_statsp,
+int open_instream( const std::string & name, struct stat * const in_statsp,
const Mode program_mode, const int eindex,
const bool force, const bool to_stdout ) throw()
{
@@ -317,13 +346,10 @@ void set_d_outname( const std::string & name, const int i ) throw()
bool open_outstream( const bool force ) throw()
{
- if( force )
- outfd = open( output_filename.c_str(),
- O_CREAT | O_TRUNC | O_WRONLY | o_binary,
- S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH );
- else outfd = open( output_filename.c_str(),
- O_CREAT | O_EXCL | O_WRONLY | o_binary,
- S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH );
+ int flags = O_CREAT | O_WRONLY | o_binary;
+ if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
+
+ outfd = open( output_filename.c_str(), flags, outfd_mode );
if( outfd < 0 )
{
if( errno == EEXIST ) outfd = -2; else outfd = -1;
@@ -362,6 +388,7 @@ void cleanup_and_fail( const int retval ) throw()
{
if( delete_output_on_interrupt )
{
+ delete_output_on_interrupt = false;
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Deleting output file `%s', if it exists.\n",
program_name, output_filename.c_str() );
@@ -379,8 +406,9 @@ void close_and_set_permissions( const struct stat * const in_statsp )
bool error = false;
if( in_statsp )
{
- if( fchmod( outfd, in_statsp->st_mode ) != 0 ) error = true;
- else (void)fchown( outfd, in_statsp->st_uid, in_statsp->st_gid );
+ if( fchmod( outfd, in_statsp->st_mode ) != 0 ||
+ ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 &&
+ errno != EPERM ) ) error = true;
// fchown will in many cases return with EPERM, which can be safely ignored.
}
if( close( outfd ) == 0 ) outfd = -1;
@@ -423,6 +451,7 @@ int do_compress( LZ_Encoder * const encoder, const long long member_size,
const int buffer_size = 65536;
uint8_t buffer[buffer_size];
+ if( verbosity >= 1 ) pp();
while( true )
{
int in_size = 0;
@@ -439,7 +468,6 @@ int do_compress( LZ_Encoder * const encoder, const long long member_size,
in_size += rd;
}
const int out_size = LZ_compress_read( encoder, buffer, buffer_size );
-// std::fprintf( stderr, "%6d in_size, %5d out_size.\n", in_size, out_size );
if( out_size < 0 )
{
pp();
@@ -503,7 +531,7 @@ int do_compress( LZ_Encoder * const encoder, const long long member_size,
int compress( const long long member_size, const long long volume_size,
- const lzma_options & encoder_options, const int infd,
+ const Lzma_options & encoder_options, const int infd,
const Pretty_print & pp, const struct stat * const in_statsp )
{
LZ_Encoder * const encoder =
@@ -560,9 +588,25 @@ int do_decompress( LZ_Decoder * const decoder, const int infd,
{ pp(); show_error( "write error", errno ); return 1; }
}
}
- else { if( rd < 0 ) out_size = rd; break; }
+ else if( rd < 0 ) { out_size = rd; break; }
+ if( verbosity >= 1 && LZ_decompress_member_finished( decoder ) == 1 )
+ {
+ pp();
+ if( verbosity >= 2 )
+ std::fprintf( stderr, "version %d, dictionary size %7sB. ",
+ LZ_decompress_member_version( decoder ),
+ format_num( LZ_decompress_dictionary_size( decoder ) ) );
+ if( verbosity >= 3 )
+ std::fprintf( stderr, "data crc %08X, data size %9lld, member size %8lld. ",
+ LZ_decompress_data_crc( decoder ),
+ LZ_decompress_data_position( decoder ),
+ LZ_decompress_member_position( decoder ) );
+ if( testing ) std::fprintf( stderr, "ok\n" );
+ else std::fprintf( stderr, "done\n" );
+ pp.reset();
+ }
+ if( rd <= 0 ) break;
}
-// std::fprintf( stderr, "%5d in_size, %6d out_size.\n", in_size, out_size );
if( out_size < 0 )
{
const LZ_Errno lz_errno = LZ_decompress_errno( decoder );
@@ -595,13 +639,6 @@ int do_decompress( LZ_Decoder * const decoder, const int infd,
if( in_size == 0 && out_size == 0 )
internal_error( "library error (LZ_decompress_read)" );
}
- if( verbosity >= 2 )
- std::fprintf( stderr, "decompressed size %9lld, size %9lld. ",
- LZ_decompress_total_out_size( decoder ),
- LZ_decompress_total_in_size( decoder ) );
- if( verbosity >= 1 )
- { if( testing ) std::fprintf( stderr, "ok\n" );
- else std::fprintf( stderr, "done\n" ); }
return 0;
}
@@ -633,9 +670,9 @@ extern "C" void signal_handler( int ) throw()
void set_signals() throw()
{
- signal( SIGHUP, signal_handler );
- signal( SIGINT, signal_handler );
- signal( SIGTERM, signal_handler );
+ std::signal( SIGHUP, signal_handler );
+ std::signal( SIGINT, signal_handler );
+ std::signal( SIGTERM, signal_handler );
}
} // end namespace
@@ -658,7 +695,7 @@ void Pretty_print::operator()( const char * const msg ) const throw()
}
-void show_error( const char * msg, const int errcode, const bool help ) throw()
+void show_error( const char * const msg, const int errcode, const bool help ) throw()
{
if( verbosity >= 0 )
{
@@ -674,7 +711,7 @@ void show_error( const char * msg, const int errcode, const bool help ) throw()
}
-void internal_error( const char * msg )
+void internal_error( const char * const msg )
{
std::string s( "internal error: " ); s += msg;
show_error( s.c_str() );
@@ -685,7 +722,7 @@ void internal_error( const char * msg )
// Returns the number of bytes really read.
// If (returned value < size) and (errno == 0), means EOF was reached.
//
-int readblock( const int fd, uint8_t * buf, const int size ) throw()
+int readblock( const int fd, uint8_t * const buf, const int size ) throw()
{
int rest = size;
errno = 0;
@@ -704,7 +741,7 @@ int readblock( const int fd, uint8_t * buf, const int size ) throw()
// Returns the number of bytes really written.
// If (returned value < size), it is always an error.
//
-int writeblock( const int fd, const uint8_t * buf, const int size ) throw()
+int writeblock( const int fd, const uint8_t * const buf, const int size ) throw()
{
int rest = size;
errno = 0;
@@ -719,22 +756,23 @@ int writeblock( const int fd, const uint8_t * buf, const int size ) throw()
}
-int main( const int argc, const char * argv[] )
+int main( const int argc, const char * const argv[] )
{
// Mapping from gzip/bzip2 style 1..9 compression modes
// to the corresponding LZMA compression modes.
- const lzma_options option_mapping[] =
+ const Lzma_options option_mapping[] =
{
+ { 1 << 16, 5 }, // -0
{ 1 << 20, 10 }, // -1
- { 1 << 20, 12 }, // -2
- { 1 << 20, 17 }, // -3
- { 1 << 21, 26 }, // -4
+ { 3 << 19, 12 }, // -2
+ { 1 << 21, 17 }, // -3
+ { 3 << 20, 26 }, // -4
{ 1 << 22, 44 }, // -5
{ 1 << 23, 80 }, // -6
{ 1 << 24, 108 }, // -7
- { 1 << 24, 163 }, // -8
+ { 3 << 23, 163 }, // -8
{ 1 << 25, 273 } }; // -9
- lzma_options encoder_options = option_mapping[5]; // default = "-6"
+ Lzma_options encoder_options = option_mapping[6]; // default = "-6"
long long member_size = LLONG_MAX;
long long volume_size = LLONG_MAX;
int infd = -1;
@@ -755,6 +793,7 @@ int main( const int argc, const char * argv[] )
const Arg_parser::Option options[] =
{
+ { '0', 0, Arg_parser::no },
{ '1', "fast", Arg_parser::no },
{ '2', 0, Arg_parser::no },
{ '3', 0, Arg_parser::no },
@@ -767,6 +806,7 @@ int main( const int argc, const char * argv[] )
{ 'b', "member-size", Arg_parser::yes },
{ 'c', "stdout", Arg_parser::no },
{ 'd', "decompress", Arg_parser::no },
+ { 'e', "extreme", Arg_parser::no },
{ 'f', "force", Arg_parser::no },
{ 'h', "help", Arg_parser::no },
{ 'k', "keep", Arg_parser::no },
@@ -789,22 +829,22 @@ int main( const int argc, const char * argv[] )
{
const int code = parser.code( argind );
if( !code ) break; // no more options
- const char * arg = parser.argument( argind ).c_str();
+ const char * const arg = parser.argument( argind ).c_str();
switch( code )
{
- case '1': case '2': case '3':
- case '4': case '5': case '6':
- case '7': case '8': case '9':
- encoder_options = option_mapping[code-'1']; break;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ encoder_options = option_mapping[code-'0']; break;
case 'b': member_size = getnum( arg, 0, 100000, LLONG_MAX / 2 ); break;
case 'c': to_stdout = true; break;
case 'd': program_mode = m_decompress; break;
+ case 'e': break;
case 'f': force = true; break;
case 'h': show_help(); return 0;
case 'k': keep_input_files = true; break;
case 'm': encoder_options.match_len_limit =
- getnum( arg, 0, LZ_min_match_len_limit(),
- LZ_max_match_len_limit() ); break;
+ getnum( arg, 0, LZ_min_match_len_limit(),
+ LZ_max_match_len_limit() ); break;
case 'o': default_output_filename = arg; break;
case 'q': verbosity = -1; break;
case 's': encoder_options.dictionary_size = get_dict_size( arg );
@@ -852,6 +892,7 @@ int main( const int argc, const char * argv[] )
if( program_mode == m_compress )
set_c_outname( default_output_filename, volume_size != LLONG_MAX );
else output_filename = default_output_filename;
+ outfd_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
if( !open_outstream( force ) )
{
if( outfd == -1 && retval < 1 ) retval = 1;
@@ -876,6 +917,7 @@ int main( const int argc, const char * argv[] )
if( program_mode == m_compress )
set_c_outname( input_filename, volume_size != LLONG_MAX );
else set_d_outname( input_filename, eindex );
+ outfd_mode = S_IRUSR | S_IWUSR;
if( !open_outstream( force ) )
{
if( outfd == -1 && retval < 1 ) retval = 1;
@@ -892,7 +934,6 @@ int main( const int argc, const char * argv[] )
delete_output_on_interrupt = true;
const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0;
pp.set_name( input_filename );
- if( verbosity >= 1 ) pp();
int tmp = 0;
if( program_mode == m_compress )
tmp = compress( member_size, volume_size, encoder_options, infd,
diff --git a/testsuite/check.sh b/testsuite/check.sh
index e35876d..69060bd 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -11,7 +11,7 @@ objdir=`pwd`
testdir=`cd "$1" ; pwd`
LZIP="${objdir}"/minilzip
LZCHECK="${objdir}"/lzcheck
-framework_failure() { echo 'failure in testing framework'; exit 1; }
+framework_failure() { echo "failure in testing framework" ; exit 1 ; }
if [ ! -x "${LZIP}" ] ; then
echo "${LZIP}: cannot execute"
@@ -20,48 +20,49 @@ fi
if [ -d tmp ] ; then rm -rf tmp ; fi
mkdir tmp
-echo -n "testing lzlib..."
+printf "testing lzlib..."
cd "${objdir}"/tmp
cat "${testdir}"/test1 > in || framework_failure
fail=0
+"${LZIP}" -t "${testdir}"/test1.lz || fail=1
"${LZIP}" -cd "${testdir}"/test1.lz > copy || fail=1
cmp in copy || fail=1
-for i in s4096 1 2 3 4 5 6 7 8; do
+for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do
"${LZIP}" -k -$i in || fail=1
mv -f in.lz copy.lz || fail=1
- echo -n "garbage" >> copy.lz || fail=1
+ printf "garbage" >> copy.lz || fail=1
"${LZIP}" -df copy.lz || fail=1
cmp in copy || fail=1
- echo -n .
+ printf .
done
-for i in s4096 1 2 3 4 5 6 7 8; do
+for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do
"${LZIP}" -c -$i in > out || fail=1
- echo -n "g" >> out || fail=1
+ printf "g" >> out || fail=1
"${LZIP}" -cd out > copy || fail=1
cmp in copy || fail=1
- echo -n .
+ printf .
done
-for i in s4096 1 2 3 4 5 6 7 8; do
+for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do
"${LZIP}" -$i < in > out || fail=1
"${LZIP}" -d < out > copy || fail=1
cmp in copy || fail=1
- echo -n .
+ printf .
done
-for i in s4096 1 2 3 4 5 6 7 8; do
- "${LZIP}" -f -$i -o out < in || fail=1
+for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do
+ "${LZIP}" -fe -$i -o out < in || fail=1
"${LZIP}" -df -o copy < out.lz || fail=1
cmp in copy || fail=1
- echo -n .
+ printf .
done
"${LZCHECK}" in 2>/dev/null || fail=1
-echo -n .
+printf .
echo
if [ ${fail} = 0 ] ; then