summaryrefslogtreecommitdiffstats
path: root/encoder.cc
diff options
context:
space:
mode:
Diffstat (limited to 'encoder.cc')
-rw-r--r--encoder.cc100
1 files changed, 40 insertions, 60 deletions
diff --git a/encoder.cc b/encoder.cc
index fc8c114..7bfc40f 100644
--- a/encoder.cc
+++ b/encoder.cc
@@ -51,7 +51,7 @@ bool Matchfinder_base::read_block()
void Matchfinder_base::normalize_pos()
{
if( pos > stream_pos )
- internal_error( "pos > stream_pos in Matchfinder_base::normalize_pos" );
+ internal_error( "pos > stream_pos in Matchfinder_base::normalize_pos." );
if( !at_stream_end )
{
const int offset = pos - dictionary_size_ - before_size;
@@ -130,7 +130,7 @@ void Matchfinder_base::reset()
}
-int Matchfinder::get_match_pairs( struct Pair * pairs )
+int Matchfinder::get_match_pairs( Pair * pairs )
{
int len_limit = match_len_limit_;
if( len_limit > available_bytes() )
@@ -149,7 +149,7 @@ int Matchfinder::get_match_pairs( struct Pair * pairs )
const int key2 = tmp & ( num_prev_positions2 - 1 );
tmp ^= (unsigned)data[2] << 8;
const int key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) );
- const int key4 = num_prev_positions2 + num_prev_positions3 +
+ const int key4 = num_prev_positions23 +
( ( tmp ^ ( crc32[data[3]] << 5 ) ) & key4_mask );
if( pairs )
@@ -245,33 +245,6 @@ void Range_encoder::flush_data()
}
-void Len_encoder::encode( Range_encoder & renc, int symbol,
- const int pos_state )
- {
- symbol -= min_match_len;
- if( symbol < len_low_symbols )
- {
- renc.encode_bit( choice1, 0 );
- renc.encode_tree3( bm_low[pos_state], symbol );
- }
- else
- {
- renc.encode_bit( choice1, 1 );
- if( symbol < len_low_symbols + len_mid_symbols )
- {
- renc.encode_bit( choice2, 0 );
- renc.encode_tree3( bm_mid[pos_state], symbol - len_low_symbols );
- }
- else
- {
- renc.encode_bit( choice2, 1 );
- renc.encode_tree8( bm_high, symbol - len_low_symbols - len_mid_symbols );
- }
- }
- if( --counters[pos_state] <= 0 ) update_prices( pos_state );
- }
-
-
// End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len)
void LZ_encoder_base::full_flush( const unsigned long long data_position,
const State state )
@@ -291,15 +264,7 @@ void LZ_encoder_base::full_flush( const unsigned long long data_position,
}
-void LZ_encoder::fill_align_prices()
- {
- for( int i = 0; i < dis_align_size; ++i )
- align_prices[i] = price_symbol_reversed( bm_align, i, dis_align_bits );
- align_price_count = dis_align_size;
- }
-
-
-void LZ_encoder::fill_distance_prices()
+void LZ_encoder::update_distance_prices()
{
for( int dis = start_dis_model; dis < modeled_distances; ++dis )
{
@@ -336,6 +301,7 @@ void LZ_encoder::fill_distance_prices()
/* Return value == number of bytes advanced (ahead).
trials[0]..trials[ahead-1] contain the steps to encode.
( trials[0].dis == -1 ) means literal.
+ A match/rep longer or equal than match_len_limit finishes the sequence.
*/
int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
const State state )
@@ -416,7 +382,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
if( replens[rep] < min_match_len ) continue;
const int price = rep_match_price + price_rep( rep, state, pos_state );
for( int len = min_match_len; len <= replens[rep]; ++len )
- trials[len].update( price + rep_len_encoder.price( len, pos_state ),
+ trials[len].update( price + rep_len_prices.price( len, pos_state ),
rep, 0 );
}
@@ -435,10 +401,9 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
}
int cur = 0;
- matchfinder.move_pos();
-
while( true ) // price optimization loop
{
+ matchfinder.move_pos();
if( ++cur >= num_trials ) // no more initialized trials
{
backward( cur );
@@ -499,7 +464,6 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
const uint8_t prev_byte = matchfinder[1];
const uint8_t cur_byte = matchfinder[0];
const uint8_t match_byte = matchfinder[cur_trial.reps[0]+1];
- matchfinder.move_pos();
int next_price = cur_trial.price +
price0( bm_match[cur_state()][pos_state] );
@@ -529,7 +493,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
}
}
- const int available_bytes = std::min( matchfinder.available_bytes() + 1,
+ const int available_bytes = std::min( matchfinder.available_bytes(),
max_num_trials - 1 - cur );
if( available_bytes < min_match_len ) continue;
@@ -539,7 +503,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
// try literal + rep0
if( match_byte != cur_byte && next_trial.prev_index != cur )
{
- const uint8_t * const data = matchfinder.ptr_to_current_pos() - 1;
+ const uint8_t * const data = matchfinder.ptr_to_current_pos();
const int dis = cur_trial.reps[0] + 1;
const int limit = std::min( matchfinder.match_len_limit() + 1,
available_bytes );
@@ -564,7 +528,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
// try rep distances
for( int rep = 0; rep < num_rep_distances; ++rep )
{
- const uint8_t * const data = matchfinder.ptr_to_current_pos() - 1;
+ const uint8_t * const data = matchfinder.ptr_to_current_pos();
int len;
const int dis = cur_trial.reps[rep] + 1;
@@ -575,7 +539,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
trials[++num_trials].price = infinite_price;
int price = rep_match_price + price_rep( rep, cur_state, pos_state );
for( int i = min_match_len; i <= len; ++i )
- trials[cur+i].update( price + rep_len_encoder.price( i, pos_state ),
+ trials[cur+i].update( price + rep_len_prices.price( i, pos_state ),
rep, cur );
if( rep == 0 ) start_len = len + 1; // discard shorter matches
@@ -590,7 +554,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
int pos_state2 = ( pos_state + len ) & pos_state_mask;
State state2 = cur_state; state2.set_rep();
- price += rep_len_encoder.price( len, pos_state ) +
+ price += rep_len_prices.price( len, pos_state ) +
price0( bm_match[state2()][pos_state2] ) +
price_matched( data[len-1], data[len], data[len-dis] );
pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
@@ -624,7 +588,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
// try match + literal + rep0
if( len == pairs[i].len )
{
- const uint8_t * const data = matchfinder.ptr_to_current_pos() - 1;
+ const uint8_t * const data = matchfinder.ptr_to_current_pos();
const int dis2 = dis + 1;
int len2 = len + 1;
const int limit = std::min( matchfinder.match_len_limit() + len2,
@@ -661,17 +625,22 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
{
const unsigned long long member_size_limit =
member_size - File_trailer::size() - max_marker_size;
- const int fill_count = ( matchfinder.match_len_limit() > 12 ) ? 128 : 512;
- int fill_counter = 0;
+ const bool best = ( matchfinder.match_len_limit() > 12 );
+ const int dis_price_count = best ? 1 : 512;
+ const int align_price_count = best ? 1 : dis_align_size;
+ const int price_count = ( matchfinder.match_len_limit() > 36 ) ? 1013 : 4093;
+ int price_counter = 0;
+ int dis_price_counter = 0;
+ int align_price_counter = 0;
int reps[num_rep_distances];
State state;
for( int i = 0; i < num_rep_distances; ++i ) reps[i] = 0;
if( matchfinder.data_position() != 0 ||
renc.member_position() != File_header::size )
- return false; // can be called only once
+ return false; // can be called only once
- if( !matchfinder.finished() ) // encode first byte
+ if( !matchfinder.finished() ) // encode first byte
{
const uint8_t prev_byte = 0;
const uint8_t cur_byte = matchfinder[0];
@@ -684,15 +653,24 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
while( !matchfinder.finished() )
{
- if( pending_num_pairs == 0 )
+ if( price_counter <= 0 && pending_num_pairs == 0 )
{
- if( fill_counter <= 0 )
- { fill_distance_prices(); fill_counter = fill_count; }
- if( align_price_count <= 0 ) fill_align_prices();
+ price_counter = price_count; // recalculate prices every these bytes
+ if( dis_price_counter <= 0 )
+ { dis_price_counter = dis_price_count; update_distance_prices(); }
+ if( align_price_counter <= 0 )
+ {
+ align_price_counter = align_price_count;
+ for( int i = 0; i < dis_align_size; ++i )
+ align_prices[i] = price_symbol_reversed( bm_align, i, dis_align_bits );
+ }
+ match_len_prices.update_prices();
+ rep_len_prices.update_prices();
}
int ahead = sequence_optimizer( reps, state );
if( ahead <= 0 ) return false; // can't happen
+ price_counter -= ahead;
for( int i = 0; ahead > 0; )
{
@@ -738,7 +716,8 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
if( len == 1 ) state.set_short_rep();
else
{
- rep_len_encoder.encode( renc, len, pos_state );
+ renc.encode_len( rep_len_model, len, pos_state );
+ rep_len_prices.decrement_counter( pos_state );
state.set_rep();
}
}
@@ -746,8 +725,9 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
{
encode_pair( dis - num_rep_distances, len, pos_state );
if( get_slot( dis - num_rep_distances ) >= end_dis_model )
- --align_price_count;
- --fill_counter;
+ --align_price_counter;
+ --dis_price_counter;
+ match_len_prices.decrement_counter( pos_state );
state.set_match();
}
}