/* Lzlib - A compression library for lzip files Copyright (C) 2009 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . As a special exception, you may use this file as part of a free software library without restriction. Specifically, if other files instantiate templates or use macros or inline functions from this file, or you compile this file and link it with other files to produce an executable, this file does not by itself cause the resulting executable to be covered by the GNU General Public License. This exception does not however invalidate any other reasons why the executable file might be covered by the GNU General Public License. */ const int max_num_trials = 1 << 12; const int price_shift = 6; class Dis_slots { unsigned char data[1<<12]; public: Dis_slots() { for( int slot = 0; slot < 4; ++slot ) data[slot] = slot; for( int i = 4, size = 2, slot = 4; slot < 24; slot += 2 ) { std::memset( &data[i], slot, size ); std::memset( &data[i+size], slot + 1, size ); size <<= 1; i += size; } } int operator[]( const uint32_t dis ) const throw() { if( dis < (1 << 12) ) return data[dis]; if( dis < (1 << 23) ) return data[dis>>11] + 22; return data[dis>>22] + 44; } }; extern const Dis_slots dis_slots; class Prob_prices { int data[bit_model_total >> 2]; public: Prob_prices() { const int num_bits = ( bit_model_total_bits - 2 ); for( int i = num_bits - 1; i >= 0; --i ) { int start = 1 << ( num_bits - i - 1 ); int end = 1 << ( num_bits - i); for( int j = start; j < end; ++j ) data[j] = (i << price_shift) + ( ((end - j) << price_shift) >> (num_bits - i - 1) ); } } int operator[]( const int symbol ) const throw() { return data[symbol >> 2]; } }; extern const Prob_prices prob_prices; inline int price0( const Bit_model & bm ) throw() { return prob_prices[bm.probability]; } inline int price1( const Bit_model & bm ) throw() { return prob_prices[bit_model_total-bm.probability]; } inline int price_bit( const Bit_model & bm, const int bit ) throw() { if( bit ) return price1( bm ); else return price0( bm ); } inline int price_symbol( const Bit_model bm[], int symbol, const int num_bits ) throw() { symbol |= ( 1 << num_bits ); int price = 0; while( symbol > 1 ) { const int bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit ); } return price; } inline int price_symbol_reversed( const Bit_model bm[], int symbol, const int num_bits ) throw() { int price = 0; int model = 1; for( int i = num_bits; i > 0; --i ) { const int bit = symbol & 1; symbol >>= 1; price += price_bit( bm[model], bit ); model = ( model << 1 ) | bit; } return price; } inline int price_matched( const Bit_model bm[], const int symbol, const int match_byte ) throw() { int price = 0; int model = 1; for( int i = 7; i >= 0; --i ) { const int match_bit = ( match_byte >> i ) & 1; const int bit = ( symbol >> i ) & 1; price += price_bit( bm[(match_bit<<8)+model+0x100], bit ); model = ( model << 1 ) | bit; if( match_bit != bit ) { while( --i >= 0 ) { const int bit = ( symbol >> i ) & 1; price += price_bit( bm[model], bit ); model = ( model << 1 ) | bit; } break; } } return price; } class Matchfinder { enum { num_prev_positions4 = 1 << 20, num_prev_positions3 = 1 << 18, num_prev_positions2 = 1 << 16, num_prev_positions = num_prev_positions4 + num_prev_positions3 + num_prev_positions2 }; long long partial_data_pos; const int dictionary_size_; // bytes to keep in buffer before pos const int after_size; // bytes to keep in buffer after pos const int buffer_size; uint8_t * const buffer; int pos; int cyclic_pos; int stream_pos; // first byte not yet read from file const int pos_limit; // when reached, a new block must be read const int match_len_limit_; int32_t * const prev_positions; // last seen position of key int32_t * prev_pos_tree; bool at_stream_end_; // stream_pos shows real end of file public: Matchfinder( const int dict_size, const int len_limit ); ~Matchfinder() { delete[] prev_pos_tree; delete[] prev_positions; delete[] buffer; } uint8_t operator[]( const int i ) const throw() { return buffer[pos+i]; } bool at_stream_end() const throw() { return at_stream_end_; } int available_bytes() const throw() { return stream_pos - pos; } long long data_position() const throw() { return partial_data_pos + pos; } int dictionary_size() const throw() { return dictionary_size_; } void flushing( const bool b ) throw() { at_stream_end_ = b; } bool finished() const throw() { return at_stream_end_ && pos >= stream_pos; } int free_bytes() const throw() { return buffer_size - stream_pos; } int match_len_limit() const throw() { return match_len_limit_; } const uint8_t * ptr_to_current_pos() const throw() { return buffer + pos; } bool dec_pos( const int ahead ) throw() { if( ahead < 0 || pos < ahead ) return false; pos -= ahead; cyclic_pos -= ahead; if( cyclic_pos < 0 ) cyclic_pos += dictionary_size_; return true; } bool enough_available_bytes() const throw() { return ( stream_pos > pos && ( at_stream_end_ || stream_pos - pos >= after_size ) ); } int true_match_len( const int index, const int distance, int len_limit ) const throw() { if( index + len_limit > available_bytes() ) len_limit = available_bytes() - index; const uint8_t * const data = buffer + pos + index - distance; int i = 0; while( i < len_limit && data[i] == data[i+distance] ) ++i; return i; } int write_data( uint8_t * const in_buffer, const int in_size ) throw(); void reset() throw(); bool move_pos() throw(); int longest_match_len( int * const distances = 0 ) throw(); }; class Range_encoder : public Circular_buffer { enum { min_free_bytes = 2 * max_num_trials }; uint64_t low; long long partial_member_pos; uint32_t range; int ff_count; uint8_t cache; void shift_low() { const uint32_t carry = low >> 32; if( low < 0xFF000000 || carry == 1 ) { put_byte( cache + carry ); for( ; ff_count > 0; --ff_count ) put_byte( 0xFF + carry ); cache = low >> 24; } else ++ff_count; low = ( low & 0x00FFFFFF ) << 8; } public: Range_encoder() : Circular_buffer( 65536 + min_free_bytes ), low( 0 ), partial_member_pos( 0 ), range( 0xFFFFFFFF ), ff_count( 0 ), cache( 0 ) {} bool enough_free_bytes() const throw() { return free_bytes() >= min_free_bytes; } int read_data( uint8_t * const out_buffer, const int out_size ) throw() { const int size = Circular_buffer::read_data( out_buffer, out_size ); if( size > 0 ) partial_member_pos += size; return size; } void flush() { for( int i = 0; i < 5; ++i ) shift_low(); low = 0; range = 0xFFFFFFFF; ff_count = 0; cache = 0; } long long member_position() const throw() { return partial_member_pos + used_bytes() + ff_count; } void encode( const int symbol, const int num_bits ) { for( int i = num_bits - 1; i >= 0; --i ) { range >>= 1; if( (symbol >> i) & 1 ) low += range; if( range <= 0x00FFFFFF ) { range <<= 8; shift_low(); } } } void encode_bit( Bit_model & bm, const int bit ) { const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; if( !bit ) { range = bound; bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits; } else { low += bound; range -= bound; bm.probability -= bm.probability >> bit_model_move_bits; } if( range <= 0x00FFFFFF ) { range <<= 8; shift_low(); } } void encode_tree( Bit_model bm[], const int symbol, const int num_bits ) { int mask = ( 1 << ( num_bits - 1 ) ); int model = 1; for( int i = num_bits; i > 0; --i, mask >>= 1 ) { const int bit = ( symbol & mask ); encode_bit( bm[model], bit ); model <<= 1; if( bit ) model |= 1; } } void encode_tree_reversed( Bit_model bm[], int symbol, const int num_bits ) { int model = 1; for( int i = num_bits; i > 0; --i ) { const int bit = symbol & 1; encode_bit( bm[model], bit ); model = ( model << 1 ) | bit; symbol >>= 1; } } void encode_matched( Bit_model bm[], int symbol, int match_byte ) { int model = 1; for( int i = 7; i >= 0; --i ) { const int bit = ( symbol >> i ) & 1; const int match_bit = ( match_byte >> i ) & 1; encode_bit( bm[(match_bit<<8)+model+0x100], bit ); model = ( model << 1 ) | bit; if( match_bit != bit ) { while( --i >= 0 ) { const int bit = ( symbol >> i ) & 1; encode_bit( bm[model], bit ); model = ( model << 1 ) | bit; } break; } } } }; class Len_encoder { Bit_model choice1; Bit_model choice2; Bit_model bm_low[pos_states][len_low_symbols]; Bit_model bm_mid[pos_states][len_mid_symbols]; Bit_model bm_high[len_high_symbols]; int prices[pos_states][max_len_symbols]; const int len_symbols; int counters[pos_states]; void update_prices( const int pos_state ) throw() { int * const pps = prices[pos_state]; int price = price0( choice1 ); int len = 0; for( ; len < len_low_symbols && len < len_symbols; ++len ) pps[len] = price + price_symbol( bm_low[pos_state], len, len_low_bits ); price = price1( choice1 ); for( ; len < len_low_symbols + len_mid_symbols && len < len_symbols; ++len ) pps[len] = price + price0( choice2 ) + price_symbol( bm_mid[pos_state], len - len_low_symbols, len_mid_bits ); for( ; len < len_symbols; ++len ) pps[len] = price + price1( choice2 ) + price_symbol( bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); counters[pos_state] = len_symbols; } public: Len_encoder( const int len_limit ) : len_symbols( len_limit + 1 - min_match_len ) { for( int i = 0; i < pos_states; ++i ) update_prices( i ); } void encode( Range_encoder & range_encoder, int symbol, const int pos_state ); int price( const int symbol, const int pos_state ) const throw() { return prices[pos_state][symbol - min_match_len]; } }; class Literal_encoder { Bit_model bm_literal[1<> ( 8 - literal_context_bits ) ); } public: void encode( Range_encoder & range_encoder, uint8_t prev_byte, uint8_t symbol ) { range_encoder.encode_tree( bm_literal[state(prev_byte)], symbol, 8 ); } void encode_matched( Range_encoder & range_encoder, uint8_t prev_byte, uint8_t match_byte, uint8_t symbol ) { range_encoder.encode_matched( bm_literal[state(prev_byte)], symbol, match_byte ); } int price_matched( uint8_t prev_byte, uint8_t symbol, uint8_t match_byte ) const throw() { return ::price_matched( bm_literal[state(prev_byte)], symbol, match_byte ); } int price_symbol( uint8_t prev_byte, uint8_t symbol ) const throw() { return ::price_symbol( bm_literal[state(prev_byte)], symbol, 8 ); } }; class LZ_encoder { enum { dis_align_mask = dis_align_size - 1, infinite_price = 0x0FFFFFFF, max_marker_size = 15, num_rep_distances = 4 }; // must be 4 struct Trial { State state; int dis; int prev_index; int price; // dual use var; cumulative price, match length int reps[num_rep_distances]; void update( const int d, const int p_i, const int pr ) throw() { if( pr < price ) { dis = d; prev_index = p_i; price = pr; } } }; const long long member_size_limit; int longest_match_found; uint32_t crc_; Bit_model bm_match[State::states][pos_states]; Bit_model bm_rep[State::states]; Bit_model bm_rep0[State::states]; Bit_model bm_rep1[State::states]; Bit_model bm_rep2[State::states]; Bit_model bm_len[State::states][pos_states]; Bit_model bm_dis_slot[max_dis_states][1<= num_rep_distances ) { for( int i = num_rep_distances - 1; i > 0; --i ) reps[i] = reps[i-1]; reps[0] = dis - num_rep_distances; } else if( dis > 0 ) { const int distance = reps[dis]; for( int i = dis; i > 0; --i ) reps[i] = reps[i-1]; reps[0] = distance; } } int price_rep_len1( const State & state, const int pos_state ) const throw() { return price0( bm_rep0[state()] ) + price0( bm_len[state()][pos_state] ); } int price_rep( const int rep, const int len, const State & state, const int pos_state ) const throw() { int price = rep_match_len_encoder.price( len, pos_state ); if( rep == 0 ) { price += price0( bm_rep0[state()] ); price += price1( bm_len[state()][pos_state] ); } else { price += price1( bm_rep0[state()] ); if( rep == 1 ) price += price0( bm_rep1[state()] ); else { price += price1( bm_rep1[state()] ); price += price_bit( bm_rep2[state()], rep - 2 ); } } return price; } int price_pair( const int dis, const int len, const int pos_state ) const throw() { if( len <= min_match_len && dis >= modeled_distances ) return infinite_price; int price = len_encoder.price( len, pos_state ); const int dis_state = get_dis_state( len ); if( dis < modeled_distances ) price += dis_prices[dis_state][dis]; else price += dis_slot_prices[dis_state][dis_slots[dis]] + align_prices[dis & dis_align_mask]; return price; } void encode_pair( const uint32_t dis, const int len, const int pos_state ) throw() { len_encoder.encode( range_encoder, len, pos_state ); const int dis_slot = dis_slots[dis]; range_encoder.encode_tree( bm_dis_slot[get_dis_state(len)], dis_slot, dis_slot_bits ); if( dis_slot >= start_dis_model ) { const int direct_bits = ( dis_slot >> 1 ) - 1; const uint32_t base = ( 2 | ( dis_slot & 1 ) ) << direct_bits; const uint32_t direct_dis = dis - base; if( dis_slot < end_dis_model ) range_encoder.encode_tree_reversed( bm_dis + base - dis_slot, direct_dis, direct_bits ); else { range_encoder.encode( direct_dis >> dis_align_bits, direct_bits - dis_align_bits ); range_encoder.encode_tree_reversed( bm_align, direct_dis, dis_align_bits ); if( --align_price_count <= 0 ) fill_align_prices(); } } } int read_match_distances() throw() { int len = matchfinder.longest_match_len( match_distances ); if( len == matchfinder.match_len_limit() ) len += matchfinder.true_match_len( len, match_distances[len] + 1, max_match_len - len ); return len; } bool move_pos( int n, bool skip = false ) throw() { while( --n >= 0 ) { if( skip ) skip = false; else matchfinder.longest_match_len(); if( !matchfinder.move_pos() ) return false; } return true; } void backward( int cur ) { int & dis = trials[cur].dis; while( cur > 0 ) { const int prev_index = trials[cur].prev_index; Trial & prev_trial = trials[prev_index]; std::swap( dis, prev_trial.dis ); prev_trial.price = cur - prev_index; // len cur = prev_index; } } int best_pair_sequence( const int reps[num_rep_distances], const State & state ); bool full_flush(); public: LZ_encoder( Matchfinder & mf, const File_header & header, const long long member_size ); bool encode_member( const bool finish ); bool member_finished() const throw() { return member_finished_ && !range_encoder.used_bytes(); } int read_data( uint8_t * const buffer, const int size ) throw() { return range_encoder.read_data( buffer, size ); } bool sync_flush(); long long member_position() const throw() { return range_encoder.member_position(); } };