From aaed943b403d90335816ae7a8e486ffa834cd383 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 6 Nov 2015 13:50:02 +0100 Subject: Merging upstream version 1.6~pre2. Signed-off-by: Daniel Baumann --- encoder.c | 105 +++++++++++++++++++++++++------------------------------------- 1 file changed, 42 insertions(+), 63 deletions(-) (limited to 'encoder.c') diff --git a/encoder.c b/encoder.c index fa896ef..2989065 100644 --- a/encoder.c +++ b/encoder.c @@ -50,7 +50,7 @@ bool Mf_read_block( struct Matchfinder * const mf ) void Mf_normalize_pos( struct Matchfinder * const mf ) { if( mf->pos > mf->stream_pos ) - internal_error( "pos > stream_pos in Mf_normalize_pos" ); + internal_error( "pos > stream_pos in Mf_normalize_pos." ); if( !mf->at_stream_end ) { int i; @@ -256,36 +256,6 @@ void Re_flush_data( struct Range_encoder * const renc ) } -void Lee_encode( struct Len_encoder * const le, - struct Range_encoder * const renc, - int symbol, const int pos_state ) - { - symbol -= min_match_len; - if( symbol < len_low_symbols ) - { - Re_encode_bit( renc, &le->lm.choice1, 0 ); - Re_encode_tree( renc, le->lm.bm_low[pos_state], symbol, len_low_bits ); - } - else - { - Re_encode_bit( renc, &le->lm.choice1, 1 ); - if( symbol < len_low_symbols + len_mid_symbols ) - { - Re_encode_bit( renc, &le->lm.choice2, 0 ); - Re_encode_tree( renc, le->lm.bm_mid[pos_state], - symbol - len_low_symbols, len_mid_bits ); - } - else - { - Re_encode_bit( renc, &le->lm.choice2, 1 ); - Re_encode_tree( renc, le->lm.bm_high, - symbol - len_low_symbols - len_mid_symbols, len_high_bits ); - } - } - if( --le->counters[pos_state] <= 0 ) Lee_update_prices( le, pos_state ); - } - - /* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */ static void LZe_full_flush( struct LZ_encoder * const e, const State state ) { @@ -305,16 +275,7 @@ static void LZe_full_flush( struct LZ_encoder * const e, const State state ) } -static void LZe_fill_align_prices( struct LZ_encoder * const e ) - { - int i; - for( i = 0; i < dis_align_size; ++i ) - e->align_prices[i] = price_symbol_reversed( e->bm_align, i, dis_align_bits ); - e->align_price_count = dis_align_size; - } - - -static void LZe_fill_distance_prices( struct LZ_encoder * const e ) +static void LZe_update_distance_prices( struct LZ_encoder * const e ) { int dis, len_state; for( dis = start_dis_model; dis < modeled_distances; ++dis ) @@ -368,9 +329,10 @@ bool LZe_init( struct LZ_encoder * const e, struct Matchfinder * const mf, e->matchfinder = mf; if( !Re_init( &e->renc, outfd ) ) return false; - Lee_init( &e->match_len_encoder, mf->match_len_limit ); - Lee_init( &e->rep_len_encoder, mf->match_len_limit ); - e->align_price_count = 0; + Lm_init( &e->match_len_model ); + Lm_init( &e->rep_len_model ); + Lp_init( &e->match_len_prices, &e->match_len_model, mf->match_len_limit ); + Lp_init( &e->rep_len_prices, &e->rep_len_model, mf->match_len_limit ); e->num_dis_slots = 2 * real_bits( mf->dictionary_size - 1 ); for( i = 0; i < Fh_size; ++i ) @@ -382,6 +344,7 @@ bool LZe_init( struct LZ_encoder * const e, struct Matchfinder * const mf, /* Return value == number of bytes advanced (ahead). trials[0]..trials[ahead-1] contain the steps to encode. ( trials[0].dis == -1 ) means literal. + A match/rep longer or equal than match_len_limit finishes the sequence. */ static int LZe_sequence_optimizer( struct LZ_encoder * const e, const int reps[num_rep_distances], @@ -468,7 +431,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, for( len = min_match_len; len <= replens[rep]; ++len ) Tr_update( &e->trials[len], price + - Lee_price( &e->rep_len_encoder, len, pos_state ), rep, 0 ); + Lp_price( &e->rep_len_prices, len, pos_state ), rep, 0 ); } if( main_len > replens[0] ) @@ -487,8 +450,6 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, } } - Mf_move_pos( e->matchfinder ); - while( true ) /* price optimization loop */ { struct Trial *cur_trial, *next_trial; @@ -498,6 +459,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, State cur_state; uint8_t prev_byte, cur_byte, match_byte; + Mf_move_pos( e->matchfinder ); if( ++cur >= num_trials ) /* no more initialized trials */ { LZe_backward( e, cur ); @@ -557,7 +519,6 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, prev_byte = Mf_peek( e->matchfinder, 1 ); cur_byte = Mf_peek( e->matchfinder, 0 ); match_byte = Mf_peek( e->matchfinder, cur_trial->reps[0] + 1 ); - Mf_move_pos( e->matchfinder ); next_price = cur_trial->price + price0( e->bm_match[cur_state][pos_state] ); @@ -587,7 +548,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, } } - available_bytes = min( Mf_available_bytes( e->matchfinder ) + 1, + available_bytes = min( Mf_available_bytes( e->matchfinder ), max_num_trials - 1 - cur ); if( available_bytes < min_match_len ) continue; @@ -596,7 +557,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, /* try literal + rep0 */ if( match_byte != cur_byte && next_trial->prev_index != cur ) { - const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1; + const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ); const int dis = cur_trial->reps[0] + 1; const int limit = min( e->matchfinder->match_len_limit + 1, available_bytes ); @@ -619,7 +580,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, /* try rep distances */ for( rep = 0; rep < num_rep_distances; ++rep ) { - const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1; + const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ); int price; const int dis = cur_trial->reps[rep] + 1; @@ -631,7 +592,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, price = rep_match_price + LZe_price_rep( e, rep, cur_state, pos_state ); for( i = min_match_len; i <= len; ++i ) Tr_update( &e->trials[cur+i], price + - Lee_price( &e->rep_len_encoder, i, pos_state ), rep, cur ); + Lp_price( &e->rep_len_prices, i, pos_state ), rep, cur ); if( rep == 0 ) start_len = len + 1; /* discard shorter matches */ @@ -647,7 +608,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, pos_state2 = ( pos_state + len ) & pos_state_mask; state2 = St_set_rep( cur_state ); - price += Lee_price( &e->rep_len_encoder, len, pos_state ) + + price += Lp_price( &e->rep_len_prices, len, pos_state ) + price0( e->bm_match[state2][pos_state2] ) + LZe_price_matched( e, data[len-1], data[len], data[len-dis] ); pos_state2 = ( pos_state2 + 1 ) & pos_state_mask; @@ -683,7 +644,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e, /* try match + literal + rep0 */ if( len == e->pairs[i].len ) { - const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1; + const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ); const int dis2 = dis + 1; int len2 = len + 1; const int limit = min( e->matchfinder->match_len_limit + len2, @@ -721,8 +682,13 @@ bool LZe_encode_member( struct LZ_encoder * const e, { const unsigned long long member_size_limit = member_size - Ft_size - max_marker_size; - const int fill_count = ( e->matchfinder->match_len_limit > 12 ) ? 128 : 512; - int fill_counter = 0; + const bool best = ( e->matchfinder->match_len_limit > 12 ); + const int dis_price_count = best ? 1 : 512; + const int align_price_count = best ? 1 : dis_align_size; + const int price_count = ( e->matchfinder->match_len_limit > 36 ) ? 1013 : 4093; + int price_counter = 0; + int dis_price_counter = 0; + int align_price_counter = 0; int ahead, i; int reps[num_rep_distances]; State state = 0; @@ -736,24 +702,33 @@ bool LZe_encode_member( struct LZ_encoder * const e, { const uint8_t prev_byte = 0; const uint8_t cur_byte = Mf_peek( e->matchfinder, 0 ); - CRC32_update_byte( &e->crc, cur_byte ); Re_encode_bit( &e->renc, &e->bm_match[state][0], 0 ); LZe_encode_literal( e, prev_byte, cur_byte ); + CRC32_update_byte( &e->crc, cur_byte ); Mf_get_match_pairs( e->matchfinder, 0 ); Mf_move_pos( e->matchfinder ); } while( !Mf_finished( e->matchfinder ) ) { - if( e->pending_num_pairs == 0 ) + if( price_counter <= 0 && e->pending_num_pairs == 0 ) { - if( fill_counter <= 0 ) - { LZe_fill_distance_prices( e ); fill_counter = fill_count; } - if( e->align_price_count <= 0 ) LZe_fill_align_prices( e ); + price_counter = price_count; /* recalculate prices every these bytes */ + if( dis_price_counter <= 0 ) + { dis_price_counter = dis_price_count; LZe_update_distance_prices( e ); } + if( align_price_counter <= 0 ) + { + align_price_counter = align_price_count; + for( i = 0; i < dis_align_size; ++i ) + e->align_prices[i] = price_symbol_reversed( e->bm_align, i, dis_align_bits ); + } + Lp_update_prices( &e->match_len_prices ); + Lp_update_prices( &e->rep_len_prices ); } ahead = LZe_sequence_optimizer( e, reps, state ); if( ahead <= 0 ) return false; /* can't happen */ + price_counter -= ahead; for( i = 0; ahead > 0; ) { @@ -800,14 +775,18 @@ bool LZe_encode_member( struct LZ_encoder * const e, if( len == 1 ) state = St_set_short_rep( state ); else { - Lee_encode( &e->rep_len_encoder, &e->renc, len, pos_state ); + Re_encode_len( &e->renc, &e->rep_len_model, len, pos_state ); + Lp_decrement_counter( &e->rep_len_prices, pos_state ); state = St_set_rep( state ); } } else /* match */ { LZe_encode_pair( e, dis - num_rep_distances, len, pos_state ); - --fill_counter; + if( get_slot( dis - num_rep_distances ) >= end_dis_model ) + --align_price_counter; + --dis_price_counter; + Lp_decrement_counter( &e->match_len_prices, pos_state ); state = St_set_match( state ); } } -- cgit v1.2.3