From 94e413eb5d2213e1311025f284773829afa4dd50 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 14:40:11 +0100 Subject: Merging upstream version 1.0. Signed-off-by: Daniel Baumann --- ChangeLog | 12 ++++ NEWS | 15 ++++- configure | 6 +- decoder.cc | 162 ++++++++++++++++++++++++++++------------------------- decoder.h | 88 ++++++++++++----------------- doc/lzlib.info | 50 ++++++++++++----- doc/lzlib.texinfo | 32 ++++++++++- encoder.cc | 106 ++++++++++++++++++----------------- encoder.h | 59 ++++++++++--------- lzip.h | 79 ++++++++++++-------------- lzlib.cc | 79 ++++++++++++++++++-------- lzlib.h | 6 +- main.cc | 141 +++++++++++++++++++++++++++++----------------- testsuite/check.sh | 29 +++++----- 14 files changed, 503 insertions(+), 361 deletions(-) diff --git a/ChangeLog b/ChangeLog index fc7ede1..d1281f9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2010-05-08 Antonio Diaz Diaz + + * Version 1.0 released. + * Added new function LZ_decompress_member_finished. + * Added new function LZ_decompress_member_version. + * Added new function LZ_decompress_dictionary_size. + * Added new function LZ_decompress_data_crc. + * Variables declared "extern" have been encapsulated in a + namespace. + * main.cc: Fixed warning about fchown's return value being ignored. + * decoder.h: Input_buffer integrated in Range_decoder. + 2010-02-10 Antonio Diaz Diaz * Version 0.9 released. diff --git a/NEWS b/NEWS index 5e6542d..502457a 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,14 @@ -Changes in version 0.9: +Changes in version 1.0: -Compression time has been reduced by 8%. +New functions: + LZ_decompress_member_finished. + LZ_decompress_member_version. + LZ_decompress_dictionary_size. + LZ_decompress_data_crc. + +Variables declared "extern" have been encapsulated in a namespace. + +A warning about fchown's return value being ignored has been fixed. + +Input_buffer has been integrated in Range_decoder, simplifying the code +and making decompression slightly faster. diff --git a/configure b/configure index fe01eac..73e800b 100755 --- a/configure +++ b/configure @@ -5,13 +5,13 @@ # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. # -# Date of this version: 2010-02-10 +# Date of this version: 2010-05-08 args= no_create= pkgname=lzlib -pkgversion=0.9 -soversion=0 +pkgversion=1.0 +soversion=1 progname=minilzip progname_shared= libname=lz diff --git a/decoder.cc b/decoder.cc index 429d33e..a260571 100644 --- a/decoder.cc +++ b/decoder.cc @@ -38,71 +38,11 @@ #include "decoder.h" -const CRC32 crc32; - -// Copies up to `out_size' bytes to `out_buffer' and updates `get'. -// Returns the number of bytes copied. -int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size ) throw() - { - if( out_size < 0 ) return 0; - int size = 0; - if( get > put ) - { - size = std::min( buffer_size - get, out_size ); - if( size > 0 ) - { - std::memcpy( out_buffer, buffer + get, size ); - get += size; - if( get >= buffer_size ) get = 0; - } - } - if( get < put ) - { - const int size2 = std::min( put - get, out_size - size ); - if( size2 > 0 ) - { - std::memcpy( out_buffer + size, buffer + get, size2 ); - get += size2; - size += size2; - } - } - return size; - } - - -// Copies up to `in_size' bytes from `in_buffer' and updates `put'. -// Returns the number of bytes copied. -int Circular_buffer::write_data( const uint8_t * const in_buffer, const int in_size ) throw() - { - if( in_size < 0 ) return 0; - int size = 0; - if( put >= get ) - { - size = std::min( buffer_size - put - (get == 0), in_size ); - if( size > 0 ) - { - std::memcpy( buffer + put, in_buffer, size ); - put += size; - if( put >= buffer_size ) put = 0; - } - } - if( put < get ) - { - const int size2 = std::min( get - put - 1, in_size - size ); - if( size2 > 0 ) - { - std::memcpy( buffer + put, in_buffer + size, size2 ); - put += size2; - size += size2; - } - } - return size; - } - +const CRC32 Lzlib_namespace::crc32; // Seeks a member header and updates `get'. // Returns true if it finds a valid header. -bool Input_buffer::find_header() throw() +bool Range_decoder::find_header() throw() { while( get != put ) { @@ -110,10 +50,10 @@ bool Input_buffer::find_header() throw() { int g = get; File_header header; - for( unsigned int i = 0; i < sizeof header; ++i ) + for( int i = 0; i < File_header::size; ++i ) { if( g == put ) return false; // not enough data - ((uint8_t *)&header)[i] = buffer[g]; + header.data[i] = buffer[g]; if( ++g >= buffer_size ) g = 0; } if( header.verify() ) return true; @@ -127,36 +67,44 @@ bool Input_buffer::find_header() throw() // Returns true, fills `header', and updates `get' if `get' points to a // valid header. // Else returns false and leaves `get' unmodified. -bool Input_buffer::read_header( File_header & header ) throw() +bool Range_decoder::read_header( File_header & header ) throw() { int g = get; - for( unsigned int i = 0; i < sizeof header; ++i ) + for( int i = 0; i < File_header::size; ++i ) { if( g == put ) return false; // not enough data - ((uint8_t *)&header)[i] = buffer[g]; + header.data[i] = buffer[g]; if( ++g >= buffer_size ) g = 0; } - if( header.verify() ) { get = g; return true; } + if( header.verify() ) + { + get = g; + member_pos = File_header::size; + reload_pending = true; + return true; + } return false; } bool LZ_decoder::verify_trailer() { - bool error = false; File_trailer trailer; - const int trailer_size = trailer.size( format_version ); + const int trailer_size = File_trailer::size( member_version ); + const long long member_size = range_decoder.member_position() + trailer_size; + bool error = false; + for( int i = 0; i < trailer_size && !error; ++i ) { if( !range_decoder.finished() ) - ((uint8_t *)&trailer)[i] = range_decoder.get_byte(); - else error = true; + trailer.data[i] = range_decoder.get_byte(); + else { error = true; for( ; i < trailer_size; ++i ) trailer.data[i] = 0; } } - if( format_version == 0 ) trailer.member_size( member_position() ); + if( member_version == 0 ) trailer.member_size( member_size ); if( !range_decoder.code_is_zero() ) error = true; if( trailer.data_crc() != crc() ) error = true; if( trailer.data_size() != data_position() ) error = true; - if( trailer.member_size() != member_position() ) error = true; + if( trailer.member_size() != member_size ) error = true; return !error; } @@ -169,7 +117,7 @@ int LZ_decoder::decode_member() if( !range_decoder.try_reload() ) return 0; if( verify_trailer_pending ) { - if( range_decoder.available_bytes() < File_trailer::size( format_version ) && + if( range_decoder.available_bytes() < File_trailer::size( member_version ) && !range_decoder.at_stream_end() ) return 0; verify_trailer_pending = false; @@ -240,13 +188,13 @@ int LZ_decoder::decode_member() { rep0 += range_decoder.decode( direct_bits - dis_align_bits ) << dis_align_bits; rep0 += range_decoder.decode_tree_reversed( bm_align, dis_align_bits ); - if( rep0 == 0xFFFFFFFF ) // Marker found + if( rep0 == 0xFFFFFFFFU ) // Marker found { rep0 = rep0_saved; range_decoder.normalize(); if( len == min_match_len ) // End Of Stream marker { - if( range_decoder.available_bytes() < File_trailer::size( format_version ) && + if( range_decoder.available_bytes() < File_trailer::size( member_version ) && !range_decoder.at_stream_end() ) { verify_trailer_pending = true; return 0; } member_finished_ = true; @@ -269,3 +217,63 @@ int LZ_decoder::decode_member() } } } + + +// Copies up to `out_size' bytes to `out_buffer' and updates `get'. +// Returns the number of bytes copied. +int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size ) throw() + { + if( out_size < 0 ) return 0; + int size = 0; + if( get > put ) + { + size = std::min( buffer_size - get, out_size ); + if( size > 0 ) + { + std::memcpy( out_buffer, buffer + get, size ); + get += size; + if( get >= buffer_size ) get = 0; + } + } + if( get < put ) + { + const int size2 = std::min( put - get, out_size - size ); + if( size2 > 0 ) + { + std::memcpy( out_buffer + size, buffer + get, size2 ); + get += size2; + size += size2; + } + } + return size; + } + + +// Copies up to `in_size' bytes from `in_buffer' and updates `put'. +// Returns the number of bytes copied. +int Circular_buffer::write_data( const uint8_t * const in_buffer, const int in_size ) throw() + { + if( in_size < 0 ) return 0; + int size = 0; + if( put >= get ) + { + size = std::min( buffer_size - put - (get == 0), in_size ); + if( size > 0 ) + { + std::memcpy( buffer + put, in_buffer, size ); + put += size; + if( put >= buffer_size ) put = 0; + } + } + if( put < get ) + { + const int size2 = std::min( get - put - 1, in_size - size ); + if( size2 > 0 ) + { + std::memcpy( buffer + put, in_buffer + size, size2 ); + put += size2; + size += size2; + } + } + return size; + } diff --git a/decoder.h b/decoder.h index e7775b0..3842ed5 100644 --- a/decoder.h +++ b/decoder.h @@ -25,22 +25,33 @@ Public License. */ -class Input_buffer : public Circular_buffer +class Range_decoder : public Circular_buffer { enum { min_available_bytes = 8 }; + long long member_pos; + uint32_t code; + uint32_t range; + bool reload_pending; bool at_stream_end_; public: - Input_buffer() + Range_decoder() : Circular_buffer( 65536 + min_available_bytes ), + member_pos( 0 ), + code( 0 ), + range( 0xFFFFFFFFU ), + reload_pending( false ), at_stream_end_( false ) {} bool at_stream_end() const throw() { return at_stream_end_; } + int available_bytes() const throw() { return used_bytes(); } + bool code_is_zero() const throw() { return ( code == 0 ); } void finish() throw() { at_stream_end_ = true; } bool finished() const throw() { return at_stream_end_ && !used_bytes(); } int free_bytes() const throw() { if( at_stream_end_ ) return 0; return Circular_buffer::free_bytes(); } + long long member_position() const throw() { return member_pos; } void purge() throw() { at_stream_end_ = true; Circular_buffer::reset(); } void reset() throw() { at_stream_end_ = false; Circular_buffer::reset(); } @@ -58,39 +69,11 @@ public: if( at_stream_end_ || in_size <= 0 ) return 0; return Circular_buffer::write_data( in_buffer, in_size ); } - }; - - -class Range_decoder - { - mutable long long member_pos; - uint32_t code; - uint32_t range; - bool reload_pending; - Input_buffer & ibuf; - -public: - Range_decoder( const int header_size, Input_buffer & buf ) - : - member_pos( header_size ), - code( 0 ), - range( 0xFFFFFFFF ), - reload_pending( false ), - ibuf( buf ) - { for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); } - - bool at_stream_end() const throw() { return ibuf.at_stream_end(); } - int available_bytes() const throw() { return ibuf.used_bytes(); } - bool code_is_zero() const throw() { return ( code == 0 ); } - bool enough_available_bytes() const throw() - { return ibuf.enough_available_bytes(); } - bool finished() const throw() { return ibuf.finished(); } - long long member_position() const throw() { return member_pos; } - uint8_t get_byte() const + uint8_t get_byte() { ++member_pos; - return ibuf.get_byte(); + return Circular_buffer::get_byte(); } bool try_reload( const bool force = false ) throw() @@ -100,7 +83,7 @@ public: { reload_pending = false; code = 0; - range = 0xFFFFFFFF; + range = 0xFFFFFFFFU; for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); } return !reload_pending; @@ -108,7 +91,7 @@ public: void normalize() { - if( range <= 0x00FFFFFF ) + if( range <= 0x00FFFFFFU ) { range <<= 8; code = (code << 8) | get_byte(); } } @@ -118,7 +101,7 @@ public: for( int i = num_bits; i > 0; --i ) { symbol <<= 1; - if( range <= 0x00FFFFFF ) + if( range <= 0x00FFFFFFU ) { range <<= 7; code = (code << 8) | get_byte(); if( code >= range ) { code -= range; symbol |= 1; } @@ -174,16 +157,16 @@ public: int decode_matched( Bit_model bm[], const int match_byte ) { - Bit_model *bm1 = bm + 0x100; + Bit_model * const bm1 = bm + 0x100; int symbol = 1; - for( int i = 1; i <= 8; ++i ) + for( int i = 7; i >= 0; --i ) { - const int match_bit = ( match_byte << i ) & 0x100; - const int bit = decode_bit( bm1[match_bit+symbol] ); + const int match_bit = ( match_byte >> i ) & 1; + const int bit = decode_bit( bm1[(match_bit<<8)+symbol] ); symbol = ( symbol << 1 ) | bit; - if( ( match_bit && !bit ) || ( !match_bit && bit ) ) + if( match_bit != bit ) { - while( ++i <= 8 ) + while( --i >= 0 ) symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); break; } @@ -219,16 +202,16 @@ class Literal_decoder { Bit_model bm_literal[1<> ( 8 - literal_context_bits ) ); } public: uint8_t decode( Range_decoder & range_decoder, const uint8_t prev_byte ) - { return range_decoder.decode_tree( bm_literal[state(prev_byte)], 8 ); } + { return range_decoder.decode_tree( bm_literal[lstate(prev_byte)], 8 ); } uint8_t decode_matched( Range_decoder & range_decoder, const uint8_t prev_byte, const uint8_t match_byte ) - { return range_decoder.decode_matched( bm_literal[state(prev_byte)], match_byte ); } + { return range_decoder.decode_matched( bm_literal[lstate(prev_byte)], match_byte ); } }; @@ -236,7 +219,7 @@ class LZ_decoder : public Circular_buffer { enum { min_free_bytes = max_match_len }; long long partial_data_pos; - const int format_version; + const int member_version; const int dictionary_size; uint32_t crc_; bool member_finished_; @@ -257,7 +240,7 @@ class LZ_decoder : public Circular_buffer Bit_model bm_dis[modeled_distances-end_dis_model]; Bit_model bm_align[dis_align_size]; - Range_decoder range_decoder; + Range_decoder & range_decoder; Len_decoder len_decoder; Len_decoder rep_match_len_decoder; Literal_decoder literal_decoder; @@ -286,7 +269,7 @@ class LZ_decoder : public Circular_buffer std::memcpy( buffer + put, buffer + i, len ); put += len; } - else for( ; len > 0 ; --len ) + else for( ; len > 0; --len ) { crc32.update( crc_, buffer[i] ); buffer[put] = buffer[i]; @@ -298,27 +281,26 @@ class LZ_decoder : public Circular_buffer bool verify_trailer(); public: - LZ_decoder( const File_header & header, Input_buffer & ibuf ) + LZ_decoder( const File_header & header, Range_decoder & rdec ) : Circular_buffer( std::max( 65536, header.dictionary_size() ) + min_free_bytes ), partial_data_pos( 0 ), - format_version( header.version ), + member_version( header.version() ), dictionary_size( header.dictionary_size() ), - crc_( 0xFFFFFFFF ), + crc_( 0xFFFFFFFFU ), member_finished_( false ), verify_trailer_pending( false ), rep0( 0 ), rep1( 0 ), rep2( 0 ), rep3( 0 ), - range_decoder( sizeof header, ibuf ), - literal_decoder() + range_decoder( rdec ) { buffer[buffer_size-1] = 0; } // prev_byte of first_byte bool enough_free_bytes() const throw() { return free_bytes() >= min_free_bytes; } - uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; } + uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFFU; } int decode_member(); bool member_finished() const throw() { return ( member_finished_ && !used_bytes() ); } diff --git a/doc/lzlib.info b/doc/lzlib.info index 9a516a9..af9f67b 100644 --- a/doc/lzlib.info +++ b/doc/lzlib.info @@ -12,7 +12,7 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir) Lzlib Manual ************ -This manual is for Lzlib (version 0.9, 10 February 2010). +This manual is for Lzlib (version 1.0, 8 May 2010). * Menu: @@ -373,6 +373,28 @@ be verified by calling `LZ_decompress_errno' before using it. Returns 1 if all the data has been read and `LZ_decompress_close' can be safely called. Otherwise it returns 0. + -- Function: int LZ_decompress_member_finished ( struct LZ_Decoder * + const DECODER ) + Returns 1 if the previous call to `LZ_decompress_read' finished + reading the current member, indicating that final values for + member are available through `LZ_decompress_data_crc', + `LZ_decompress_data_position', and + `LZ_decompress_member_position'. Otherwise it returns 0. + + -- Function: int LZ_decompress_member_version ( struct LZ_Decoder * + const DECODER ) + Returns the version of current member from member header. + + -- Function: int LZ_decompress_dictionary_size ( struct LZ_Decoder * + const DECODER ) + Returns the dictionary size of current member from member header. + + -- Function: unsigned int LZ_decompress_data_crc ( struct LZ_Decoder * + const DECODER ) + Returns the 32 bit Cyclic Redundancy Check of the data + decompressed from the current member. The returned value is valid + only when `LZ_decompress_member_finished' returns 1. + -- Function: long long LZ_decompress_data_position ( struct LZ_Decoder * const DECODER ) Returns the number of decompressed bytes already produced, but @@ -575,6 +597,8 @@ Example 4: Decompression using LZ_decompress_write_size. 3) LZ_decompress_write 4) if no more data to write, call LZ_decompress_finish 5) LZ_decompress_read + 5a) optionally, if LZ_decompress_member_finished returns 1, read + final values for member with LZ_decompress_data_crc, etc. 6) go back to step 2 until LZ_decompress_finished returns 1 7) LZ_decompress_close @@ -676,17 +700,17 @@ Concept Index  Tag Table: Node: Top219 -Node: Introduction1158 -Node: Library Version2933 -Node: Buffering3578 -Node: Parameter Limits4698 -Node: Compression Functions5655 -Node: Decompression Functions11701 -Node: Error Codes16763 -Node: Error Messages18702 -Node: Data Format19281 -Node: Examples21251 -Node: Problems24827 -Node: Concept Index25399 +Node: Introduction1152 +Node: Library Version2927 +Node: Buffering3572 +Node: Parameter Limits4692 +Node: Compression Functions5649 +Node: Decompression Functions11695 +Node: Error Codes17766 +Node: Error Messages19705 +Node: Data Format20284 +Node: Examples22254 +Node: Problems25967 +Node: Concept Index26539  End Tag Table diff --git a/doc/lzlib.texinfo b/doc/lzlib.texinfo index 8163502..ef46af9 100644 --- a/doc/lzlib.texinfo +++ b/doc/lzlib.texinfo @@ -5,8 +5,8 @@ @finalout @c %**end of header -@set UPDATED 10 February 2010 -@set VERSION 0.9 +@set UPDATED 8 May 2010 +@set VERSION 1.0 @dircategory Data Compression @direntry @@ -424,6 +424,32 @@ can be safely called. Otherwise it returns 0. @end deftypefun +@deftypefun int LZ_decompress_member_finished ( struct LZ_Decoder * const @var{decoder} ) +Returns 1 if the previous call to @samp{LZ_decompress_read} finished +reading the current member, indicating that final values for member are +available through @samp{LZ_decompress_data_crc}, +@samp{LZ_decompress_data_position}, and +@samp{LZ_decompress_member_position}. Otherwise it returns 0. +@end deftypefun + + +@deftypefun int LZ_decompress_member_version ( struct LZ_Decoder * const @var{decoder} ) +Returns the version of current member from member header. +@end deftypefun + + +@deftypefun int LZ_decompress_dictionary_size ( struct LZ_Decoder * const @var{decoder} ) +Returns the dictionary size of current member from member header. +@end deftypefun + + +@deftypefun {unsigned int} LZ_decompress_data_crc ( struct LZ_Decoder * const @var{decoder} ) +Returns the 32 bit Cyclic Redundancy Check of the data decompressed from +the current member. The returned value is valid only when +@samp{LZ_decompress_member_finished} returns 1. +@end deftypefun + + @deftypefun {long long} LZ_decompress_data_position ( struct LZ_Decoder * const @var{decoder} ) Returns the number of decompressed bytes already produced, but perhaps not yet read, in the current member. @@ -652,6 +678,8 @@ Example 4: Decompression using LZ_decompress_write_size. 3) LZ_decompress_write 4) if no more data to write, call LZ_decompress_finish 5) LZ_decompress_read +5a) optionally, if LZ_decompress_member_finished returns 1, read + final values for member with LZ_decompress_data_crc, etc. 6) go back to step 2 until LZ_decompress_finished returns 1 7) LZ_decompress_close @end example diff --git a/encoder.cc b/encoder.cc index 1b979ed..032b07c 100644 --- a/encoder.cc +++ b/encoder.cc @@ -38,8 +38,8 @@ #include "encoder.h" -const Dis_slots dis_slots; -const Prob_prices prob_prices; +const Dis_slots Lzlib_namespace::dis_slots; +const Prob_prices Lzlib_namespace::prob_prices; int Matchfinder::write_data( const uint8_t * const in_buffer, const int in_size ) throw() @@ -140,10 +140,11 @@ int Matchfinder::longest_match_len( int * const distances ) throw() const uint8_t * const data = buffer + pos; const int key2 = num_prev_positions4 + num_prev_positions3 + ( ( (int)data[0] << 8 ) | data[1] ); - const int tmp = crc32[data[0]] ^ data[1] ^ ( (int)data[2] << 8 ); - const int key3 = num_prev_positions4 + ( tmp & ( num_prev_positions3 - 1 ) ); - const int key4 = ( tmp ^ ( crc32[data[3]] << 5 ) ) & - ( num_prev_positions4 - 1 ); + const uint32_t tmp = crc32[data[0]] ^ data[1] ^ ( (uint32_t)data[2] << 8 ); + const int key3 = num_prev_positions4 + + (int)( tmp & ( num_prev_positions3 - 1 ) ); + const int key4 = (int)( ( tmp ^ ( crc32[data[3]] << 5 ) ) & + ( num_prev_positions4 - 1 ) ); if( distances ) { @@ -251,8 +252,8 @@ void LZ_encoder::fill_distance_prices() throw() { for( int dis_state = 0; dis_state < max_dis_states; ++dis_state ) { - int * dsp = dis_slot_prices[dis_state]; - const Bit_model * bmds = bm_dis_slot[dis_state]; + int * const dsp = dis_slot_prices[dis_state]; + const Bit_model * const bmds = bm_dis_slot[dis_state]; int slot = 0; for( ; slot < end_dis_model && slot < num_dis_slots; ++slot ) dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ); @@ -260,7 +261,7 @@ void LZ_encoder::fill_distance_prices() throw() dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ) + (((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift ); - int * dp = dis_prices[dis_state]; + int * const dp = dis_prices[dis_state]; int dis = 0; for( ; dis < start_dis_model; ++dis ) dp[dis] = dsp[dis]; @@ -276,8 +277,10 @@ void LZ_encoder::fill_distance_prices() throw() } -// Return value: ( dis == -1 ) && ( len == 1 ) means literal -int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], +// Return value == number of bytes advanced (ahead). +// trials[0]..trials[retval-1] contain the steps to encode. +// ( trials[0].dis == -1 && trials[0].price == 1 ) means literal. +int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances], const State & state ) { int main_len; @@ -312,15 +315,14 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], return main_len; } - trials[0].state = state; - for( int i = 0; i < num_rep_distances; ++i ) trials[0].reps[i] = reps[i]; - + { + const int pos_state = matchfinder.data_position() & pos_state_mask; const uint8_t prev_byte = matchfinder[-1]; const uint8_t cur_byte = matchfinder[0]; const uint8_t match_byte = matchfinder[-reps[0]-1]; - unsigned int position = matchfinder.data_position(); - const int pos_state = position & pos_state_mask; + trials[0].state = state; + for( int i = 0; i < num_rep_distances; ++i ) trials[0].reps[i] = reps[i]; trials[1].dis = -1; trials[1].prev_index = 0; trials[1].price = price0( bm_match[state()][pos_state] ); @@ -368,6 +370,7 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], trials[len].update( rep, 0, price + rep_match_len_encoder.price( len, pos_state ) ); } + } int cur = 0; int num_trials = main_len; @@ -375,7 +378,7 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], while( true ) { - if( ++cur >= num_trials ) + if( ++cur >= num_trials ) // no more initialized trials { backward( cur ); return cur; @@ -407,10 +410,11 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], mtf_reps( cur_trial.dis, cur_trial.reps ); } + const int pos_state = matchfinder.data_position() & pos_state_mask; const uint8_t prev_byte = matchfinder[-1]; const uint8_t cur_byte = matchfinder[0]; const uint8_t match_byte = matchfinder[-cur_trial.reps[0]-1]; - const int pos_state = ++position & pos_state_mask; + int next_price = cur_trial.price + price0( bm_match[cur_trial.state()][pos_state] ); if( cur_trial.state.is_char() ) next_price += literal_encoder.price_symbol( prev_byte, cur_byte ); @@ -454,7 +458,7 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], if( newlen <= len_limit && ( newlen > min_match_len || ( newlen == min_match_len && - match_distances[newlen] < modeled_distances ) ) ) + match_distances[min_match_len] < modeled_distances ) ) ) { const int normal_match_price = match_price + price0( bm_rep[cur_trial.state()] ); @@ -470,37 +474,38 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], } - // Sync Flush mark => (dis == 0xFFFFFFFF, len == min_match_len + 1) -bool LZ_encoder::sync_flush() + // End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) +bool LZ_encoder::full_flush( const State & state ) { - if( member_finished_ || range_encoder.free_bytes() < max_marker_size ) + if( member_finished_ || + range_encoder.free_bytes() < File_trailer::size() + max_marker_size ) return false; - const int pos_state = ( matchfinder.data_position() ) & pos_state_mask; + const int pos_state = matchfinder.data_position() & pos_state_mask; range_encoder.encode_bit( bm_match[state()][pos_state], 1 ); range_encoder.encode_bit( bm_rep[state()], 0 ); - encode_pair( 0xFFFFFFFF, min_match_len + 1, pos_state ); + encode_pair( 0xFFFFFFFFU, min_match_len, pos_state ); range_encoder.flush(); + File_trailer trailer; + trailer.data_crc( crc() ); + trailer.data_size( matchfinder.data_position() ); + trailer.member_size( range_encoder.member_position() + File_trailer::size() ); + for( int i = 0; i < File_trailer::size(); ++i ) + range_encoder.put_byte( trailer.data[i] ); return true; } - // End Of Stream mark => (dis == 0xFFFFFFFF, len == min_match_len) -bool LZ_encoder::full_flush() + // Sync Flush mark => (dis == 0xFFFFFFFFU, len == min_match_len + 1) +bool LZ_encoder::sync_flush() { - if( member_finished_ || - range_encoder.free_bytes() < (int)sizeof (File_trailer) + max_marker_size ) + if( member_finished_ || range_encoder.free_bytes() < max_marker_size ) return false; - const int pos_state = ( matchfinder.data_position() ) & pos_state_mask; + const State & state = main_state; + const int pos_state = matchfinder.data_position() & pos_state_mask; range_encoder.encode_bit( bm_match[state()][pos_state], 1 ); range_encoder.encode_bit( bm_rep[state()], 0 ); - encode_pair( 0xFFFFFFFF, min_match_len, pos_state ); + encode_pair( 0xFFFFFFFFU, min_match_len + 1, pos_state ); range_encoder.flush(); - File_trailer trailer; - trailer.data_crc( crc() ); - trailer.data_size( matchfinder.data_position() ); - trailer.member_size( range_encoder.member_position() + sizeof trailer ); - for( unsigned int i = 0; i < sizeof trailer; ++i ) - range_encoder.put_byte( ((uint8_t *)&trailer)[i] ); return true; } @@ -508,14 +513,12 @@ bool LZ_encoder::full_flush() LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header, const long long member_size ) : - member_size_limit( member_size - sizeof (File_trailer) - max_marker_size ), + member_size_limit( member_size - File_trailer::size() - max_marker_size ), longest_match_found( 0 ), - crc_( 0xFFFFFFFF ), + crc_( 0xFFFFFFFFU ), matchfinder( mf ), - range_encoder(), len_encoder( matchfinder.match_len_limit() ), rep_match_len_encoder( matchfinder.match_len_limit() ), - literal_encoder(), num_dis_slots( 2 * File_header::real_bits( matchfinder.dictionary_size() - 1 ) ), fill_counter( 0 ), member_finished_( false ) @@ -523,16 +526,17 @@ LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header, for( int i = 0; i < num_rep_distances; ++i ) rep_distances[i] = 0; fill_align_prices(); - for( unsigned int i = 0; i < sizeof header; ++i ) - range_encoder.put_byte( ((uint8_t *)&header)[i] ); + for( int i = 0; i < File_header::size; ++i ) + range_encoder.put_byte( header.data[i] ); } bool LZ_encoder::encode_member( const bool finish ) { + State & state = main_state; if( member_finished_ ) return true; if( range_encoder.member_position() >= member_size_limit ) - { if( full_flush() ) { member_finished_ = true; } return true; } + { if( full_flush( state ) ) { member_finished_ = true; } return true; } // encode first byte if( matchfinder.data_position() == 0 && !matchfinder.finished() ) @@ -551,29 +555,30 @@ bool LZ_encoder::encode_member( const bool finish ) { if( matchfinder.finished() ) { - if( finish && full_flush() ) member_finished_ = true; + if( finish && full_flush( state ) ) member_finished_ = true; return true; } if( !matchfinder.enough_available_bytes() || !range_encoder.enough_free_bytes() ) return true; if( fill_counter <= 0 ) { fill_distance_prices(); fill_counter = 512; } - int ahead = best_pair_sequence( rep_distances, state ); + int ahead = sequence_optimizer( rep_distances, state ); if( ahead <= 0 ) return false; fill_counter -= ahead; for( int i = 0; ; ) { const int pos_state = ( matchfinder.data_position() - ahead ) & pos_state_mask; - int dis = trials[i].dis; + const int dis = trials[i].dis; const int len = trials[i].price; bool bit = ( dis < 0 && len == 1 ); range_encoder.encode_bit( bm_match[state()][pos_state], !bit ); - if( bit ) + if( bit ) // literal byte { const uint8_t prev_byte = matchfinder[-ahead-1]; const uint8_t cur_byte = matchfinder[-ahead]; + crc32.update( crc_, cur_byte ); if( state.is_char() ) literal_encoder.encode( range_encoder, prev_byte, cur_byte ); else @@ -583,8 +588,9 @@ bool LZ_encoder::encode_member( const bool finish ) } state.set_char(); } - else + else // match or repeated match { + crc32.update( crc_, matchfinder.ptr_to_current_pos() - ahead, len ); mtf_reps( dis, rep_distances ); bit = ( dis < num_rep_distances ); range_encoder.encode_bit( bm_rep[state()], bit ); @@ -613,13 +619,11 @@ bool LZ_encoder::encode_member( const bool finish ) state.set_match(); } } - for( int j = 0; j < len; ++j ) - crc32.update( crc_, matchfinder[j-ahead] ); ahead -= len; i += len; if( range_encoder.member_position() >= member_size_limit ) { if( !matchfinder.dec_pos( ahead ) ) return false; - if( full_flush() ) member_finished_ = true; + if( full_flush( state ) ) member_finished_ = true; return true; } if( ahead <= 0 ) break; diff --git a/encoder.h b/encoder.h index 590dea1..5f65743 100644 --- a/encoder.h +++ b/encoder.h @@ -53,7 +53,8 @@ public: } }; -extern const Dis_slots dis_slots; +namespace Lzlib_namespace { extern const Dis_slots dis_slots; } +using Lzlib_namespace::dis_slots; class Prob_prices @@ -74,11 +75,12 @@ public: } } - int operator[]( const int symbol ) const throw() - { return data[symbol >> 2]; } + int operator[]( const int probability ) const throw() + { return data[probability >> 2]; } }; -extern const Prob_prices prob_prices; +namespace Lzlib_namespace { extern const Prob_prices prob_prices; } +using Lzlib_namespace::prob_prices; inline int price0( const Bit_model & bm ) throw() @@ -130,14 +132,14 @@ inline int price_matched( const Bit_model bm[], const int symbol, for( int i = 7; i >= 0; --i ) { const int match_bit = ( match_byte >> i ) & 1; - const int bit = ( symbol >> i ) & 1; + int bit = ( symbol >> i ) & 1; price += price_bit( bm[(match_bit<<8)+model+0x100], bit ); model = ( model << 1 ) | bit; if( match_bit != bit ) { while( --i >= 0 ) { - const int bit = ( symbol >> i ) & 1; + bit = ( symbol >> i ) & 1; price += price_bit( bm[model], bit ); model = ( model << 1 ) | bit; } @@ -236,14 +238,14 @@ class Range_encoder : public Circular_buffer void shift_low() { const uint32_t carry = low >> 32; - if( low < 0xFF000000LL || carry == 1 ) + if( low < 0xFF000000U || carry == 1 ) { put_byte( cache + carry ); for( ; ff_count > 0; --ff_count ) put_byte( 0xFF + carry ); cache = low >> 24; } else ++ff_count; - low = ( low & 0x00FFFFFFLL ) << 8; + low = ( low & 0x00FFFFFFU ) << 8; } public: @@ -252,7 +254,7 @@ public: Circular_buffer( 65536 + min_free_bytes ), low( 0 ), partial_member_pos( 0 ), - range( 0xFFFFFFFF ), + range( 0xFFFFFFFFU ), ff_count( 0 ), cache( 0 ) {} @@ -270,7 +272,7 @@ public: { for( int i = 0; i < 5; ++i ) shift_low(); low = 0; - range = 0xFFFFFFFF; + range = 0xFFFFFFFFU; ff_count = 0; cache = 0; } @@ -284,7 +286,7 @@ public: { range >>= 1; if( (symbol >> i) & 1 ) low += range; - if( range <= 0x00FFFFFF ) { range <<= 8; shift_low(); } + if( range <= 0x00FFFFFFU ) { range <<= 8; shift_low(); } } } @@ -302,7 +304,7 @@ public: range -= bound; bm.probability -= bm.probability >> bit_model_move_bits; } - if( range <= 0x00FFFFFF ) { range <<= 8; shift_low(); } + if( range <= 0x00FFFFFFU ) { range <<= 8; shift_low(); } } void encode_tree( Bit_model bm[], const int symbol, const int num_bits ) @@ -335,15 +337,15 @@ public: int model = 1; for( int i = 7; i >= 0; --i ) { - const int bit = ( symbol >> i ) & 1; const int match_bit = ( match_byte >> i ) & 1; + int bit = ( symbol >> i ) & 1; encode_bit( bm[(match_bit<<8)+model+0x100], bit ); model = ( model << 1 ) | bit; if( match_bit != bit ) { while( --i >= 0 ) { - const int bit = ( symbol >> i ) & 1; + bit = ( symbol >> i ) & 1; encode_bit( bm[model], bit ); model = ( model << 1 ) | bit; } @@ -368,17 +370,17 @@ class Len_encoder void update_prices( const int pos_state ) throw() { int * const pps = prices[pos_state]; - int price = price0( choice1 ); + int tmp = price0( choice1 ); int len = 0; for( ; len < len_low_symbols && len < len_symbols; ++len ) - pps[len] = price + + pps[len] = tmp + price_symbol( bm_low[pos_state], len, len_low_bits ); - price = price1( choice1 ); + tmp = price1( choice1 ); for( ; len < len_low_symbols + len_mid_symbols && len < len_symbols; ++len ) - pps[len] = price + price0( choice2 ) + + pps[len] = tmp + price0( choice2 ) + price_symbol( bm_mid[pos_state], len - len_low_symbols, len_mid_bits ); for( ; len < len_symbols; ++len ) - pps[len] = price + price1( choice2 ) + + pps[len] = tmp + price1( choice2 ) + price_symbol( bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); counters[pos_state] = len_symbols; } @@ -402,21 +404,21 @@ class Literal_encoder { Bit_model bm_literal[1<> ( 8 - literal_context_bits ) ); } public: void encode( Range_encoder & range_encoder, uint8_t prev_byte, uint8_t symbol ) - { range_encoder.encode_tree( bm_literal[state(prev_byte)], symbol, 8 ); } + { range_encoder.encode_tree( bm_literal[lstate(prev_byte)], symbol, 8 ); } void encode_matched( Range_encoder & range_encoder, uint8_t prev_byte, uint8_t match_byte, uint8_t symbol ) - { range_encoder.encode_matched( bm_literal[state(prev_byte)], symbol, match_byte ); } + { range_encoder.encode_matched( bm_literal[lstate(prev_byte)], symbol, match_byte ); } int price_matched( uint8_t prev_byte, uint8_t symbol, uint8_t match_byte ) const throw() - { return ::price_matched( bm_literal[state(prev_byte)], symbol, match_byte ); } + { return ::price_matched( bm_literal[lstate(prev_byte)], symbol, match_byte ); } int price_symbol( uint8_t prev_byte, uint8_t symbol ) const throw() - { return ::price_symbol( bm_literal[state(prev_byte)], symbol, 8 ); } + { return ::price_symbol( bm_literal[lstate(prev_byte)], symbol, 8 ); } }; @@ -468,14 +470,15 @@ class LZ_encoder int align_prices[dis_align_size]; int align_price_count; int fill_counter; - State state; + State main_state; bool member_finished_; void fill_align_prices() throw(); void fill_distance_prices() throw(); - uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; } + uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFFU; } + // move-to-front dis in/into reps void mtf_reps( const int dis, int reps[num_rep_distances] ) throw() { if( dis >= num_rep_distances ) @@ -582,10 +585,10 @@ class LZ_encoder } } - int best_pair_sequence( const int reps[num_rep_distances], + int sequence_optimizer( const int reps[num_rep_distances], const State & state ); - bool full_flush(); + bool full_flush( const State & state ); public: LZ_encoder( Matchfinder & mf, const File_header & header, diff --git a/lzip.h b/lzip.h index 7cb9927..2138056 100644 --- a/lzip.h +++ b/lzip.h @@ -32,7 +32,7 @@ class State public: enum { states = 12 }; State() throw() : st( 0 ) {} - int operator()() const throw() { return st; } + unsigned char operator()() const throw() { return st; } bool is_char() const throw() { return st < 7; } void set_char() throw() @@ -118,7 +118,7 @@ public: { unsigned int c = n; for( int k = 0; k < 8; ++k ) - { if( c & 1 ) c = 0xEDB88320 ^ ( c >> 1 ); else c >>= 1; } + { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; } data[n] = c; } } @@ -133,29 +133,27 @@ public: } }; -extern const CRC32 crc32; +namespace Lzlib_namespace { extern const CRC32 crc32; } +using Lzlib_namespace::crc32; const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' }; struct File_header { - uint8_t magic[4]; - uint8_t version; - uint8_t coded_dict_size; + uint8_t data[6]; // 0-3 magic bytes + // 4 version + // 5 coded_dict_size + enum { size = 6 }; void set_magic() throw() - { std::memcpy( magic, magic_string, sizeof magic ); version = 1; } + { std::memcpy( data, magic_string, 4 ); data[4] = 1; } bool verify_magic() const throw() - { - return ( std::memcmp( magic, magic_string, sizeof magic ) == 0 ); - } + { return ( std::memcmp( data, magic_string, 4 ) == 0 ); } - bool verify_version() const throw() - { - return ( version <= 1 ); - } + uint8_t version() const throw() { return data[4]; } + bool verify_version() const throw() { return ( data[4] <= 1 ); } bool verify() const throw() { @@ -174,24 +172,24 @@ struct File_header int dictionary_size() const throw() { - int size = ( 1 << ( coded_dict_size & 0x1F ) ); - if( size > min_dictionary_size && size <= max_dictionary_size ) - size -= ( size / 16 ) * ( ( coded_dict_size >> 5 ) & 0x07 ); - return size; + int sz = ( 1 << ( data[5] & 0x1F ) ); + if( sz > min_dictionary_size && sz <= max_dictionary_size ) + sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 0x07 ); + return sz; } - bool dictionary_size( const int size ) throw() + bool dictionary_size( const int sz ) throw() { - if( size >= min_dictionary_size && size <= max_dictionary_size ) + if( sz >= min_dictionary_size && sz <= max_dictionary_size ) { - coded_dict_size = real_bits( size - 1 ); - if( size > min_dictionary_size ) + data[5] = real_bits( sz - 1 ); + if( sz > min_dictionary_size ) { - const int base_size = 1 << coded_dict_size; + const int base_size = 1 << data[5]; const int wedge = base_size / 16; for( int i = 7; i >= 1; --i ) - if( base_size - ( i * wedge ) >= size ) - { coded_dict_size |= ( i << 5 ); break; } + if( base_size - ( i * wedge ) >= sz ) + { data[5] |= ( i << 5 ); break; } } return true; } @@ -202,50 +200,45 @@ struct File_header struct File_trailer { - uint8_t data_crc_[4]; // CRC32 of the uncompressed data - uint8_t data_size_[8]; // size of the uncompressed data - uint8_t member_size_[8]; // member size including header and trailer + uint8_t data[20]; // 0-3 CRC32 of the uncompressed data + // 4-11 size of the uncompressed data + // 12-19 member size including header and trailer - static int size( const int version ) - { return sizeof (File_trailer) - ( ( version >= 1 ) ? 0 : 8 ); } + static int size( const int version = 1 ) + { return ( ( version >= 1 ) ? 20 : 12 ); } uint32_t data_crc() const throw() { uint32_t tmp = 0; - for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data_crc_[i]; } + for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } void data_crc( uint32_t crc ) throw() - { - for( int i = 0; i < 4; ++i ) - { data_crc_[i] = (uint8_t)crc; crc >>= 8; } - } + { for( int i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } } long long data_size() const throw() { long long tmp = 0; - for( int i = 7; i >= 0; --i ) { tmp <<= 8; tmp += data_size_[i]; } + for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } - void data_size( long long size ) throw() + void data_size( long long sz ) throw() { - for( int i = 0; i < 8; ++i ) - { data_size_[i] = (uint8_t)size; size >>= 8; } + for( int i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } } long long member_size() const throw() { long long tmp = 0; - for( int i = 7; i >= 0; --i ) { tmp <<= 8; tmp += member_size_[i]; } + for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } - void member_size( long long size ) throw() + void member_size( long long sz ) throw() { - for( int i = 0; i < 8; ++i ) - { member_size_[i] = (uint8_t)size; size >>= 8; } + for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } } }; diff --git a/lzlib.cc b/lzlib.cc index de5d0a9..740e067 100644 --- a/lzlib.cc +++ b/lzlib.cc @@ -73,9 +73,10 @@ struct LZ_Decoder { long long partial_in_size; long long partial_out_size; - Input_buffer * ibuf; + Range_decoder * rdec; LZ_decoder * lz_decoder; LZ_Errno lz_errno; + File_header member_header; // header of current member bool fatal; bool seeking; @@ -83,19 +84,21 @@ struct LZ_Decoder : partial_in_size( 0 ), partial_out_size( 0 ), - ibuf( 0 ), + rdec( 0 ), lz_decoder( 0 ), lz_errno( LZ_ok ), fatal( false ), seeking( false ) - {} + { + for( int i = 0; i < File_header::size; ++i ) member_header.data[i] = 0; + } }; bool verify_decoder( struct LZ_Decoder * const decoder ) { if( !decoder ) return false; - if( !decoder->ibuf ) + if( !decoder->rdec ) { decoder->lz_errno = LZ_bad_argument; return false; } return true; } @@ -317,9 +320,9 @@ struct LZ_Decoder * LZ_decompress_open() if( !decoder ) return 0; LZ_Decoder & d = *decoder; - try { d.ibuf = new Input_buffer; } + try { d.rdec = new Range_decoder; } catch( std::bad_alloc ) - { d.ibuf = 0; d.lz_errno = LZ_mem_error; d.fatal = true; } + { d.rdec = 0; d.lz_errno = LZ_mem_error; d.fatal = true; } return decoder; } @@ -328,7 +331,7 @@ int LZ_decompress_close( struct LZ_Decoder * const decoder ) { if( !decoder ) return -1; if( decoder->lz_decoder ) delete decoder->lz_decoder; - if( decoder->ibuf ) delete decoder->ibuf; + if( decoder->rdec ) delete decoder->rdec; delete decoder; return 0; } @@ -338,8 +341,8 @@ int LZ_decompress_finish( struct LZ_Decoder * const decoder ) { if( !verify_decoder( decoder ) || decoder->fatal ) return -1; LZ_Decoder & d = *decoder; - if( d.seeking ) { d.seeking = false; d.ibuf->purge(); } - else d.ibuf->finish(); + if( d.seeking ) { d.seeking = false; d.rdec->purge(); } + else d.rdec->finish(); return 0; } @@ -351,7 +354,7 @@ int LZ_decompress_reset( struct LZ_Decoder * const decoder ) if( d.lz_decoder ) { delete d.lz_decoder; d.lz_decoder = 0; } d.partial_in_size = 0; d.partial_out_size = 0; - d.ibuf->reset(); + d.rdec->reset(); d.lz_errno = LZ_ok; d.fatal = false; d.seeking = false; @@ -364,11 +367,11 @@ int LZ_decompress_sync_to_member( struct LZ_Decoder * const decoder ) if( !verify_decoder( decoder ) ) return -1; LZ_Decoder & d = *decoder; if( d.lz_decoder ) { delete d.lz_decoder; d.lz_decoder = 0; } - if( d.ibuf->find_header() ) d.seeking = false; + if( d.rdec->find_header() ) d.seeking = false; else { - if( !d.ibuf->at_stream_end() ) d.seeking = true; - else { d.seeking = false; d.ibuf->purge(); } + if( !d.rdec->at_stream_end() ) d.seeking = true; + else { d.seeking = false; d.rdec->purge(); } } d.lz_errno = LZ_ok; d.fatal = false; @@ -391,22 +394,21 @@ int LZ_decompress_read( struct LZ_Decoder * const decoder, } if( !d.lz_decoder ) { - if( d.ibuf->used_bytes() < 5 + (int)sizeof (File_header) ) + if( d.rdec->used_bytes() < 5 + File_header::size ) { - if( !d.ibuf->at_stream_end() || d.ibuf->finished() ) return 0; - d.ibuf->purge(); // remove trailing garbage + if( !d.rdec->at_stream_end() || d.rdec->finished() ) return 0; + d.rdec->purge(); // remove trailing garbage d.lz_errno = LZ_header_error; d.fatal = true; return -1; } - File_header header; - if( !d.ibuf->read_header( header ) ) + if( !d.rdec->read_header( d.member_header ) ) { d.lz_errno = LZ_header_error; d.fatal = true; return -1; } - try { d.lz_decoder = new LZ_decoder( header, *d.ibuf ); } + try { d.lz_decoder = new LZ_decoder( d.member_header, *d.rdec ); } catch( std::bad_alloc ) // not enough free memory { d.lz_decoder = 0; @@ -432,12 +434,12 @@ int LZ_decompress_write( struct LZ_Decoder * const decoder, { if( !verify_decoder( decoder ) || decoder->fatal ) return -1; LZ_Decoder & d = *decoder; - int result = d.ibuf->write_data( buffer, size ); + int result = d.rdec->write_data( buffer, size ); while( d.seeking ) { - if( d.ibuf->find_header() ) d.seeking = false; + if( d.rdec->find_header() ) d.seeking = false; if( result >= size ) break; - const int size2 = d.ibuf->write_data( buffer + result, size - result ); + const int size2 = d.rdec->write_data( buffer + result, size - result ); if( size2 > 0 ) result += size2; else break; } @@ -448,7 +450,7 @@ int LZ_decompress_write( struct LZ_Decoder * const decoder, int LZ_decompress_write_size( struct LZ_Decoder * const decoder ) { if( !verify_decoder( decoder ) || decoder->fatal ) return -1; - return decoder->ibuf->free_bytes(); + return decoder->rdec->free_bytes(); } @@ -462,11 +464,40 @@ LZ_Errno LZ_decompress_errno( struct LZ_Decoder * const decoder ) int LZ_decompress_finished( struct LZ_Decoder * const decoder ) { if( !verify_decoder( decoder ) ) return -1; - return ( decoder->ibuf->finished() && + return ( decoder->rdec->finished() && ( !decoder->lz_decoder || decoder->lz_decoder->member_finished() ) ); } +int LZ_decompress_member_finished( struct LZ_Decoder * const decoder ) + { + if( !verify_decoder( decoder ) ) return -1; + return ( decoder->lz_decoder && decoder->lz_decoder->member_finished() ); + } + + +int LZ_decompress_member_version( struct LZ_Decoder * const decoder ) + { + if( !verify_decoder( decoder ) ) return -1; + return decoder->member_header.version(); + } + + +int LZ_decompress_dictionary_size( struct LZ_Decoder * const decoder ) + { + if( !verify_decoder( decoder ) ) return -1; + return decoder->member_header.dictionary_size(); + } + + +unsigned int LZ_decompress_data_crc( struct LZ_Decoder * const decoder ) + { + if( verify_decoder( decoder ) && decoder->lz_decoder ) + return decoder->lz_decoder->crc(); + else return 0; + } + + long long LZ_decompress_data_position( struct LZ_Decoder * const decoder ) { if( !verify_decoder( decoder ) ) return -1; diff --git a/lzlib.h b/lzlib.h index 9ac15fa..8eb6d75 100644 --- a/lzlib.h +++ b/lzlib.h @@ -29,7 +29,7 @@ extern "C" { #endif -const char * const LZ_version_string = "0.9"; +const char * const LZ_version_string = "1.0"; enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error, LZ_sequence_error, LZ_header_error, LZ_unexpected_eof, @@ -96,7 +96,11 @@ int LZ_decompress_write_size( struct LZ_Decoder * const decoder ); enum LZ_Errno LZ_decompress_errno( struct LZ_Decoder * const decoder ); int LZ_decompress_finished( struct LZ_Decoder * const decoder ); +int LZ_decompress_member_finished( struct LZ_Decoder * const decoder ); +int LZ_decompress_member_version( struct LZ_Decoder * const decoder ); +int LZ_decompress_dictionary_size( struct LZ_Decoder * const decoder ); +unsigned int LZ_decompress_data_crc( struct LZ_Decoder * const decoder ); long long LZ_decompress_data_position( struct LZ_Decoder * const decoder ); long long LZ_decompress_member_position( struct LZ_Decoder * const decoder ); long long LZ_decompress_total_in_size( struct LZ_Decoder * const decoder ); diff --git a/main.cc b/main.cc index 3d4c0f6..ce21764 100644 --- a/main.cc +++ b/main.cc @@ -41,6 +41,10 @@ #include "arg_parser.h" #include "lzlib.h" +#if CHAR_BIT != 8 +#error "Environments where CHAR_BIT != 8 are not supported." +#endif + #ifndef LLONG_MAX #define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL #endif @@ -51,10 +55,10 @@ #define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL #endif -void show_error( const char * msg, const int errcode = 0, const bool help = false ) throw(); -void internal_error( const char * msg ); -int readblock( const int fd, uint8_t * buf, const int size ) throw(); -int writeblock( const int fd, const uint8_t * buf, const int size ) throw(); +void show_error( const char * const msg, const int errcode = 0, const bool help = false ) throw(); +void internal_error( const char * const msg ); +int readblock( const int fd, uint8_t * const buf, const int size ) throw(); +int writeblock( const int fd, const uint8_t * const buf, const int size ) throw(); namespace { @@ -75,7 +79,7 @@ struct { const char * from; const char * to; } const known_extensions[] = { { ".tlz", ".tar" }, { 0, 0 } }; -struct lzma_options +struct Lzma_options { int dictionary_size; // 4KiB..512MiB int match_len_limit; // 5..273 @@ -85,6 +89,7 @@ enum Mode { m_compress = 0, m_decompress, m_test }; std::string output_filename; int outfd = -1; +mode_t outfd_mode = S_IRUSR | S_IWUSR; int verbosity = 0; bool delete_output_on_interrupt = false; @@ -164,7 +169,31 @@ void show_version() throw() } -long long getnum( const char * ptr, const int bs = 0, +const char * format_num( long long num, long long limit = 9999, + const int set_prefix = 0 ) throw() + { + const char * const si_prefix[8] = + { "k", "M", "G", "T", "P", "E", "Z", "Y" }; + const char * const binary_prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + static bool si = false; + static char buf[16]; + + if( set_prefix ) si = ( set_prefix > 0 ); + const int factor = ( si ) ? 1000 : 1024; + const char * const *prefix = ( si ) ? si_prefix : binary_prefix; + const char *p = ""; + limit = std::max( 999LL, std::min( 999999LL, limit ) ); + + for( int i = 0; i < 8 && ( llabs( num ) > limit || + ( llabs( num ) >= factor && num % factor == 0 ) ); ++i ) + { num /= factor; p = prefix[i]; } + snprintf( buf, sizeof buf, "%lld %s", num, p ); + return buf; + } + + +long long getnum( const char * const ptr, const int bs = 0, const long long llimit = LLONG_MIN + 1, const long long ulimit = LLONG_MAX ) throw() { @@ -222,7 +251,7 @@ long long getnum( const char * ptr, const int bs = 0, } -int get_dict_size( const char * arg ) throw() +int get_dict_size( const char * const arg ) throw() { char *tail; int bits = std::strtol( arg, &tail, 0 ); @@ -246,7 +275,7 @@ int extension_index( const std::string & name ) throw() } -int open_instream( const std::string & name, struct stat * in_statsp, +int open_instream( const std::string & name, struct stat * const in_statsp, const Mode program_mode, const int eindex, const bool force, const bool to_stdout ) throw() { @@ -317,13 +346,10 @@ void set_d_outname( const std::string & name, const int i ) throw() bool open_outstream( const bool force ) throw() { - if( force ) - outfd = open( output_filename.c_str(), - O_CREAT | O_TRUNC | O_WRONLY | o_binary, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ); - else outfd = open( output_filename.c_str(), - O_CREAT | O_EXCL | O_WRONLY | o_binary, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ); + int flags = O_CREAT | O_WRONLY | o_binary; + if( force ) flags |= O_TRUNC; else flags |= O_EXCL; + + outfd = open( output_filename.c_str(), flags, outfd_mode ); if( outfd < 0 ) { if( errno == EEXIST ) outfd = -2; else outfd = -1; @@ -362,6 +388,7 @@ void cleanup_and_fail( const int retval ) throw() { if( delete_output_on_interrupt ) { + delete_output_on_interrupt = false; if( verbosity >= 0 ) std::fprintf( stderr, "%s: Deleting output file `%s', if it exists.\n", program_name, output_filename.c_str() ); @@ -379,8 +406,9 @@ void close_and_set_permissions( const struct stat * const in_statsp ) bool error = false; if( in_statsp ) { - if( fchmod( outfd, in_statsp->st_mode ) != 0 ) error = true; - else (void)fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ); + if( fchmod( outfd, in_statsp->st_mode ) != 0 || + ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 && + errno != EPERM ) ) error = true; // fchown will in many cases return with EPERM, which can be safely ignored. } if( close( outfd ) == 0 ) outfd = -1; @@ -423,6 +451,7 @@ int do_compress( LZ_Encoder * const encoder, const long long member_size, const int buffer_size = 65536; uint8_t buffer[buffer_size]; + if( verbosity >= 1 ) pp(); while( true ) { int in_size = 0; @@ -439,7 +468,6 @@ int do_compress( LZ_Encoder * const encoder, const long long member_size, in_size += rd; } const int out_size = LZ_compress_read( encoder, buffer, buffer_size ); -// std::fprintf( stderr, "%6d in_size, %5d out_size.\n", in_size, out_size ); if( out_size < 0 ) { pp(); @@ -503,7 +531,7 @@ int do_compress( LZ_Encoder * const encoder, const long long member_size, int compress( const long long member_size, const long long volume_size, - const lzma_options & encoder_options, const int infd, + const Lzma_options & encoder_options, const int infd, const Pretty_print & pp, const struct stat * const in_statsp ) { LZ_Encoder * const encoder = @@ -560,9 +588,25 @@ int do_decompress( LZ_Decoder * const decoder, const int infd, { pp(); show_error( "write error", errno ); return 1; } } } - else { if( rd < 0 ) out_size = rd; break; } + else if( rd < 0 ) { out_size = rd; break; } + if( verbosity >= 1 && LZ_decompress_member_finished( decoder ) == 1 ) + { + pp(); + if( verbosity >= 2 ) + std::fprintf( stderr, "version %d, dictionary size %7sB. ", + LZ_decompress_member_version( decoder ), + format_num( LZ_decompress_dictionary_size( decoder ) ) ); + if( verbosity >= 3 ) + std::fprintf( stderr, "data crc %08X, data size %9lld, member size %8lld. ", + LZ_decompress_data_crc( decoder ), + LZ_decompress_data_position( decoder ), + LZ_decompress_member_position( decoder ) ); + if( testing ) std::fprintf( stderr, "ok\n" ); + else std::fprintf( stderr, "done\n" ); + pp.reset(); + } + if( rd <= 0 ) break; } -// std::fprintf( stderr, "%5d in_size, %6d out_size.\n", in_size, out_size ); if( out_size < 0 ) { const LZ_Errno lz_errno = LZ_decompress_errno( decoder ); @@ -595,13 +639,6 @@ int do_decompress( LZ_Decoder * const decoder, const int infd, if( in_size == 0 && out_size == 0 ) internal_error( "library error (LZ_decompress_read)" ); } - if( verbosity >= 2 ) - std::fprintf( stderr, "decompressed size %9lld, size %9lld. ", - LZ_decompress_total_out_size( decoder ), - LZ_decompress_total_in_size( decoder ) ); - if( verbosity >= 1 ) - { if( testing ) std::fprintf( stderr, "ok\n" ); - else std::fprintf( stderr, "done\n" ); } return 0; } @@ -633,9 +670,9 @@ extern "C" void signal_handler( int ) throw() void set_signals() throw() { - signal( SIGHUP, signal_handler ); - signal( SIGINT, signal_handler ); - signal( SIGTERM, signal_handler ); + std::signal( SIGHUP, signal_handler ); + std::signal( SIGINT, signal_handler ); + std::signal( SIGTERM, signal_handler ); } } // end namespace @@ -658,7 +695,7 @@ void Pretty_print::operator()( const char * const msg ) const throw() } -void show_error( const char * msg, const int errcode, const bool help ) throw() +void show_error( const char * const msg, const int errcode, const bool help ) throw() { if( verbosity >= 0 ) { @@ -674,7 +711,7 @@ void show_error( const char * msg, const int errcode, const bool help ) throw() } -void internal_error( const char * msg ) +void internal_error( const char * const msg ) { std::string s( "internal error: " ); s += msg; show_error( s.c_str() ); @@ -685,7 +722,7 @@ void internal_error( const char * msg ) // Returns the number of bytes really read. // If (returned value < size) and (errno == 0), means EOF was reached. // -int readblock( const int fd, uint8_t * buf, const int size ) throw() +int readblock( const int fd, uint8_t * const buf, const int size ) throw() { int rest = size; errno = 0; @@ -704,7 +741,7 @@ int readblock( const int fd, uint8_t * buf, const int size ) throw() // Returns the number of bytes really written. // If (returned value < size), it is always an error. // -int writeblock( const int fd, const uint8_t * buf, const int size ) throw() +int writeblock( const int fd, const uint8_t * const buf, const int size ) throw() { int rest = size; errno = 0; @@ -719,22 +756,23 @@ int writeblock( const int fd, const uint8_t * buf, const int size ) throw() } -int main( const int argc, const char * argv[] ) +int main( const int argc, const char * const argv[] ) { // Mapping from gzip/bzip2 style 1..9 compression modes // to the corresponding LZMA compression modes. - const lzma_options option_mapping[] = + const Lzma_options option_mapping[] = { + { 1 << 16, 5 }, // -0 { 1 << 20, 10 }, // -1 - { 1 << 20, 12 }, // -2 - { 1 << 20, 17 }, // -3 - { 1 << 21, 26 }, // -4 + { 3 << 19, 12 }, // -2 + { 1 << 21, 17 }, // -3 + { 3 << 20, 26 }, // -4 { 1 << 22, 44 }, // -5 { 1 << 23, 80 }, // -6 { 1 << 24, 108 }, // -7 - { 1 << 24, 163 }, // -8 + { 3 << 23, 163 }, // -8 { 1 << 25, 273 } }; // -9 - lzma_options encoder_options = option_mapping[5]; // default = "-6" + Lzma_options encoder_options = option_mapping[6]; // default = "-6" long long member_size = LLONG_MAX; long long volume_size = LLONG_MAX; int infd = -1; @@ -755,6 +793,7 @@ int main( const int argc, const char * argv[] ) const Arg_parser::Option options[] = { + { '0', 0, Arg_parser::no }, { '1', "fast", Arg_parser::no }, { '2', 0, Arg_parser::no }, { '3', 0, Arg_parser::no }, @@ -767,6 +806,7 @@ int main( const int argc, const char * argv[] ) { 'b', "member-size", Arg_parser::yes }, { 'c', "stdout", Arg_parser::no }, { 'd', "decompress", Arg_parser::no }, + { 'e', "extreme", Arg_parser::no }, { 'f', "force", Arg_parser::no }, { 'h', "help", Arg_parser::no }, { 'k', "keep", Arg_parser::no }, @@ -789,22 +829,22 @@ int main( const int argc, const char * argv[] ) { const int code = parser.code( argind ); if( !code ) break; // no more options - const char * arg = parser.argument( argind ).c_str(); + const char * const arg = parser.argument( argind ).c_str(); switch( code ) { - case '1': case '2': case '3': - case '4': case '5': case '6': - case '7': case '8': case '9': - encoder_options = option_mapping[code-'1']; break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + encoder_options = option_mapping[code-'0']; break; case 'b': member_size = getnum( arg, 0, 100000, LLONG_MAX / 2 ); break; case 'c': to_stdout = true; break; case 'd': program_mode = m_decompress; break; + case 'e': break; case 'f': force = true; break; case 'h': show_help(); return 0; case 'k': keep_input_files = true; break; case 'm': encoder_options.match_len_limit = - getnum( arg, 0, LZ_min_match_len_limit(), - LZ_max_match_len_limit() ); break; + getnum( arg, 0, LZ_min_match_len_limit(), + LZ_max_match_len_limit() ); break; case 'o': default_output_filename = arg; break; case 'q': verbosity = -1; break; case 's': encoder_options.dictionary_size = get_dict_size( arg ); @@ -852,6 +892,7 @@ int main( const int argc, const char * argv[] ) if( program_mode == m_compress ) set_c_outname( default_output_filename, volume_size != LLONG_MAX ); else output_filename = default_output_filename; + outfd_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; if( !open_outstream( force ) ) { if( outfd == -1 && retval < 1 ) retval = 1; @@ -876,6 +917,7 @@ int main( const int argc, const char * argv[] ) if( program_mode == m_compress ) set_c_outname( input_filename, volume_size != LLONG_MAX ); else set_d_outname( input_filename, eindex ); + outfd_mode = S_IRUSR | S_IWUSR; if( !open_outstream( force ) ) { if( outfd == -1 && retval < 1 ) retval = 1; @@ -892,7 +934,6 @@ int main( const int argc, const char * argv[] ) delete_output_on_interrupt = true; const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0; pp.set_name( input_filename ); - if( verbosity >= 1 ) pp(); int tmp = 0; if( program_mode == m_compress ) tmp = compress( member_size, volume_size, encoder_options, infd, diff --git a/testsuite/check.sh b/testsuite/check.sh index e35876d..69060bd 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -11,7 +11,7 @@ objdir=`pwd` testdir=`cd "$1" ; pwd` LZIP="${objdir}"/minilzip LZCHECK="${objdir}"/lzcheck -framework_failure() { echo 'failure in testing framework'; exit 1; } +framework_failure() { echo "failure in testing framework" ; exit 1 ; } if [ ! -x "${LZIP}" ] ; then echo "${LZIP}: cannot execute" @@ -20,48 +20,49 @@ fi if [ -d tmp ] ; then rm -rf tmp ; fi mkdir tmp -echo -n "testing lzlib..." +printf "testing lzlib..." cd "${objdir}"/tmp cat "${testdir}"/test1 > in || framework_failure fail=0 +"${LZIP}" -t "${testdir}"/test1.lz || fail=1 "${LZIP}" -cd "${testdir}"/test1.lz > copy || fail=1 cmp in copy || fail=1 -for i in s4096 1 2 3 4 5 6 7 8; do +for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do "${LZIP}" -k -$i in || fail=1 mv -f in.lz copy.lz || fail=1 - echo -n "garbage" >> copy.lz || fail=1 + printf "garbage" >> copy.lz || fail=1 "${LZIP}" -df copy.lz || fail=1 cmp in copy || fail=1 - echo -n . + printf . done -for i in s4096 1 2 3 4 5 6 7 8; do +for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do "${LZIP}" -c -$i in > out || fail=1 - echo -n "g" >> out || fail=1 + printf "g" >> out || fail=1 "${LZIP}" -cd out > copy || fail=1 cmp in copy || fail=1 - echo -n . + printf . done -for i in s4096 1 2 3 4 5 6 7 8; do +for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do "${LZIP}" -$i < in > out || fail=1 "${LZIP}" -d < out > copy || fail=1 cmp in copy || fail=1 - echo -n . + printf . done -for i in s4096 1 2 3 4 5 6 7 8; do - "${LZIP}" -f -$i -o out < in || fail=1 +for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do + "${LZIP}" -fe -$i -o out < in || fail=1 "${LZIP}" -df -o copy < out.lz || fail=1 cmp in copy || fail=1 - echo -n . + printf . done "${LZCHECK}" in 2>/dev/null || fail=1 -echo -n . +printf . echo if [ ${fail} = 0 ] ; then -- cgit v1.2.3