diff options
Diffstat (limited to 'encoder.c')
-rw-r--r-- | encoder.c | 561 |
1 files changed, 265 insertions, 296 deletions
@@ -1,5 +1,5 @@ /* Clzip - LZMA lossless data compressor - Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -61,16 +61,16 @@ void Mf_normalize_pos( struct Matchfinder * const mf ) mf->pos -= offset; mf->stream_pos -= offset; for( i = 0; i < mf->num_prev_positions; ++i ) - if( mf->prev_positions[i] >= 0 ) mf->prev_positions[i] -= offset; + mf->prev_positions[i] -= min( mf->prev_positions[i], offset ); for( i = 0; i < 2 * ( mf->dictionary_size + 1 ); ++i ) - if( mf->prev_pos_tree[i] >= 0 ) mf->prev_pos_tree[i] -= offset; + mf->prev_pos_tree[i] -= min( mf->prev_pos_tree[i], offset ); Mf_read_block( mf ); } } -bool Mf_init( struct Matchfinder * const mf, - const int dict_size, const int match_len_limit, const int ifd ) +bool Mf_init( struct Matchfinder * const mf, const int dict_size, + const int match_len_limit, const int ifd ) { const int buffer_size_limit = ( 2 * dict_size ) + before_size + after_size; unsigned size; @@ -118,7 +118,7 @@ bool Mf_init( struct Matchfinder * const mf, else mf->prev_positions = (int32_t *)malloc( size * sizeof (int32_t) ); if( !mf->prev_positions ) { free( mf->buffer ); return false; } mf->prev_pos_tree = mf->prev_positions + mf->num_prev_positions; - for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = -1; + for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = 0; return true; } @@ -126,13 +126,13 @@ bool Mf_init( struct Matchfinder * const mf, void Mf_reset( struct Matchfinder * const mf ) { int i; - const int size = mf->stream_pos - mf->pos; - if( size > 0 ) memmove( mf->buffer, mf->buffer + mf->pos, size ); + if( mf->stream_pos > mf->pos ) + memmove( mf->buffer, mf->buffer + mf->pos, mf->stream_pos - mf->pos ); mf->partial_data_pos = 0; mf->stream_pos -= mf->pos; mf->pos = 0; mf->cyclic_pos = 0; - for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = -1; + for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = 0; Mf_read_block( mf ); } @@ -143,10 +143,11 @@ int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs ) int32_t * ptr1 = ptr0 + 1; int32_t * newptr; int len = 0, len0 = 0, len1 = 0; - int maxlen = min_match_len - 1; + int maxlen = 0; int num_pairs = 0; - const int min_pos = (mf->pos > mf->dictionary_size) ? - mf->pos - mf->dictionary_size : 0; + const int pos1 = mf->pos + 1; + const int min_pos = + ( mf->pos > mf->dictionary_size ) ? mf->pos - mf->dictionary_size : 0; const uint8_t * const data = mf->buffer + mf->pos; int count, delta, key2, key3, key4, newpos; unsigned tmp; @@ -160,7 +161,7 @@ int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs ) tmp = crc32[data[0]] ^ data[1]; key2 = tmp & ( num_prev_positions2 - 1 ); - tmp ^= (uint32_t)data[2] << 8; + tmp ^= (unsigned)data[2] << 8; key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) ); key4 = num_prev_positions2 + num_prev_positions3 + ( ( tmp ^ ( crc32[data[3]] << 5 ) ) & mf->key4_mask ); @@ -169,40 +170,40 @@ int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs ) { int np2 = mf->prev_positions[key2]; int np3 = mf->prev_positions[key3]; - if( np2 >= min_pos && mf->buffer[np2] == data[0] ) + if( np2 > min_pos && mf->buffer[np2-1] == data[0] ) { - pairs[0].dis = mf->pos - np2 - 1; + pairs[0].dis = mf->pos - np2; pairs[0].len = maxlen = 2; num_pairs = 1; } - if( np2 != np3 && np3 >= min_pos && mf->buffer[np3] == data[0] ) + if( np2 != np3 && np3 > min_pos && mf->buffer[np3-1] == data[0] ) { maxlen = 3; - pairs[num_pairs].dis = mf->pos - np3 - 1; - ++num_pairs; np2 = np3; + pairs[num_pairs].dis = mf->pos - np2; + ++num_pairs; } if( num_pairs > 0 ) { - delta = mf->pos - np2; + delta = pos1 - np2; while( maxlen < len_limit && data[maxlen-delta] == data[maxlen] ) ++maxlen; pairs[num_pairs-1].len = maxlen; - if( maxlen >= len_limit ) pairs = 0; + if( maxlen >= len_limit ) pairs = 0; /* done. now just skip */ } if( maxlen < 3 ) maxlen = 3; } - mf->prev_positions[key2] = mf->pos; - mf->prev_positions[key3] = mf->pos; + mf->prev_positions[key2] = pos1; + mf->prev_positions[key3] = pos1; newpos = mf->prev_positions[key4]; - mf->prev_positions[key4] = mf->pos; + mf->prev_positions[key4] = pos1; for( count = mf->cycles; ; ) { - if( newpos < min_pos || --count < 0 ) { *ptr0 = *ptr1 = -1; break; } + if( newpos <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; } - delta = mf->pos - newpos; + delta = pos1 - newpos; newptr = mf->prev_pos_tree + ( ( mf->cyclic_pos - delta + ( (mf->cyclic_pos >= delta) ? 0 : mf->dictionary_size + 1 ) ) << 1 ); @@ -255,67 +256,65 @@ void Re_flush_data( struct Range_encoder * const renc ) } -void Lee_encode( struct Len_encoder * const len_encoder, +void Lee_encode( struct Len_encoder * const le, struct Range_encoder * const renc, int symbol, const int pos_state ) { symbol -= min_match_len; if( symbol < len_low_symbols ) { - Re_encode_bit( renc, &len_encoder->lm.choice1, 0 ); - Re_encode_tree( renc, len_encoder->lm.bm_low[pos_state], symbol, len_low_bits ); + Re_encode_bit( renc, &le->lm.choice1, 0 ); + Re_encode_tree( renc, le->lm.bm_low[pos_state], symbol, len_low_bits ); } else { - Re_encode_bit( renc, &len_encoder->lm.choice1, 1 ); + Re_encode_bit( renc, &le->lm.choice1, 1 ); if( symbol < len_low_symbols + len_mid_symbols ) { - Re_encode_bit( renc, &len_encoder->lm.choice2, 0 ); - Re_encode_tree( renc, len_encoder->lm.bm_mid[pos_state], + Re_encode_bit( renc, &le->lm.choice2, 0 ); + Re_encode_tree( renc, le->lm.bm_mid[pos_state], symbol - len_low_symbols, len_mid_bits ); } else { - Re_encode_bit( renc, &len_encoder->lm.choice2, 1 ); - Re_encode_tree( renc, len_encoder->lm.bm_high, + Re_encode_bit( renc, &le->lm.choice2, 1 ); + Re_encode_tree( renc, le->lm.bm_high, symbol - len_low_symbols - len_mid_symbols, len_high_bits ); } } - if( --len_encoder->counters[pos_state] <= 0 ) - Lee_update_prices( len_encoder, pos_state ); + if( --le->counters[pos_state] <= 0 ) Lee_update_prices( le, pos_state ); } /* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */ -static void LZe_full_flush( struct LZ_encoder * const encoder, const State state ) +static void LZe_full_flush( struct LZ_encoder * const e, const State state ) { int i; - const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; + const int pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask; File_trailer trailer; - Re_encode_bit( &encoder->renc, &encoder->bm_match[state][pos_state], 1 ); - Re_encode_bit( &encoder->renc, &encoder->bm_rep[state], 0 ); - LZe_encode_pair( encoder, 0xFFFFFFFFU, min_match_len, pos_state ); - Re_flush( &encoder->renc ); - Ft_set_data_crc( trailer, LZe_crc( encoder ) ); - Ft_set_data_size( trailer, Mf_data_position( encoder->matchfinder ) ); - Ft_set_member_size( trailer, Re_member_position( &encoder->renc ) + Ft_size ); + Re_encode_bit( &e->renc, &e->bm_match[state][pos_state], 1 ); + Re_encode_bit( &e->renc, &e->bm_rep[state], 0 ); + LZe_encode_pair( e, 0xFFFFFFFFU, min_match_len, pos_state ); + Re_flush( &e->renc ); + Ft_set_data_crc( trailer, LZe_crc( e ) ); + Ft_set_data_size( trailer, Mf_data_position( e->matchfinder ) ); + Ft_set_member_size( trailer, Re_member_position( &e->renc ) + Ft_size ); for( i = 0; i < Ft_size; ++i ) - Re_put_byte( &encoder->renc, trailer[i] ); - Re_flush_data( &encoder->renc ); + Re_put_byte( &e->renc, trailer[i] ); + Re_flush_data( &e->renc ); } -static void LZe_fill_align_prices( struct LZ_encoder * const encoder ) +static void LZe_fill_align_prices( struct LZ_encoder * const e ) { int i; for( i = 0; i < dis_align_size; ++i ) - encoder->align_prices[i] = - price_symbol_reversed( encoder->bm_align, i, dis_align_bits ); - encoder->align_price_count = dis_align_size; + e->align_prices[i] = price_symbol_reversed( e->bm_align, i, dis_align_bits ); + e->align_price_count = dis_align_size; } -static void LZe_fill_distance_prices( struct LZ_encoder * const encoder ) +static void LZe_fill_distance_prices( struct LZ_encoder * const e ) { int dis, len_state; for( dis = start_dis_model; dis < modeled_distances; ++dis ) @@ -323,22 +322,21 @@ static void LZe_fill_distance_prices( struct LZ_encoder * const encoder ) const int dis_slot = dis_slots[dis]; const int direct_bits = ( dis_slot >> 1 ) - 1; const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits; - const int price = - price_symbol_reversed( encoder->bm_dis + base - dis_slot - 1, - dis - base, direct_bits ); + const int price = price_symbol_reversed( e->bm_dis + base - dis_slot - 1, + dis - base, direct_bits ); for( len_state = 0; len_state < len_states; ++len_state ) - encoder->dis_prices[len_state][dis] = price; + e->dis_prices[len_state][dis] = price; } for( len_state = 0; len_state < len_states; ++len_state ) { - int * const dsp = encoder->dis_slot_prices[len_state]; - int * const dp = encoder->dis_prices[len_state]; - const Bit_model * const bmds = encoder->bm_dis_slot[len_state]; + int * const dsp = e->dis_slot_prices[len_state]; + int * const dp = e->dis_prices[len_state]; + const Bit_model * const bmds = e->bm_dis_slot[len_state]; int slot = 0; - for( ; slot < end_dis_model && slot < encoder->num_dis_slots; ++slot ) + for( ; slot < end_dis_model; ++slot ) dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ); - for( ; slot < encoder->num_dis_slots; ++slot ) + for( ; slot < e->num_dis_slots; ++slot ) dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ) + (((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift_bits ); @@ -350,45 +348,42 @@ static void LZe_fill_distance_prices( struct LZ_encoder * const encoder ) } -bool LZe_init( struct LZ_encoder * const encoder, - struct Matchfinder * const mf, +bool LZe_init( struct LZ_encoder * const e, struct Matchfinder * const mf, const File_header header, const int outfd ) { int i; - encoder->pending_num_pairs = 0; - encoder->crc = 0xFFFFFFFFU; - - Bm_array_init( encoder->bm_literal[0], (1 << literal_context_bits) * 0x300 ); - Bm_array_init( encoder->bm_match[0], states * pos_states ); - Bm_array_init( encoder->bm_rep, states ); - Bm_array_init( encoder->bm_rep0, states ); - Bm_array_init( encoder->bm_rep1, states ); - Bm_array_init( encoder->bm_rep2, states ); - Bm_array_init( encoder->bm_len[0], states * pos_states ); - Bm_array_init( encoder->bm_dis_slot[0], len_states * (1 << dis_slot_bits) ); - Bm_array_init( encoder->bm_dis, modeled_distances - end_dis_model ); - Bm_array_init( encoder->bm_align, dis_align_size ); - - encoder->matchfinder = mf; - if( !Re_init( &encoder->renc, outfd ) ) return false; - Lee_init( &encoder->match_len_encoder, encoder->matchfinder->match_len_limit ); - Lee_init( &encoder->rep_len_encoder, encoder->matchfinder->match_len_limit ); - encoder->num_dis_slots = - 2 * real_bits( encoder->matchfinder->dictionary_size - 1 ); - - encoder->align_price_count = 0; + e->pending_num_pairs = 0; + e->crc = 0xFFFFFFFFU; + + Bm_array_init( e->bm_literal[0], (1 << literal_context_bits) * 0x300 ); + Bm_array_init( e->bm_match[0], states * pos_states ); + Bm_array_init( e->bm_rep, states ); + Bm_array_init( e->bm_rep0, states ); + Bm_array_init( e->bm_rep1, states ); + Bm_array_init( e->bm_rep2, states ); + Bm_array_init( e->bm_len[0], states * pos_states ); + Bm_array_init( e->bm_dis_slot[0], len_states * (1 << dis_slot_bits) ); + Bm_array_init( e->bm_dis, modeled_distances - end_dis_model ); + Bm_array_init( e->bm_align, dis_align_size ); + + e->matchfinder = mf; + if( !Re_init( &e->renc, outfd ) ) return false; + Lee_init( &e->match_len_encoder, mf->match_len_limit ); + Lee_init( &e->rep_len_encoder, mf->match_len_limit ); + e->align_price_count = 0; + e->num_dis_slots = 2 * real_bits( mf->dictionary_size - 1 ); for( i = 0; i < Fh_size; ++i ) - Re_put_byte( &encoder->renc, header[i] ); + Re_put_byte( &e->renc, header[i] ); return true; } /* Return value == number of bytes advanced (ahead). trials[0]..trials[ahead-1] contain the steps to encode. - ( trials[0].dis == -1 && trials[0].price == 1 ) means literal. + ( trials[0].dis == -1 ) means literal. */ -static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, +static int LZe_sequence_optimizer( struct LZ_encoder * const e, const int reps[num_rep_distances], const State state ) { @@ -396,111 +391,108 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, int replens[num_rep_distances]; int rep_index = 0; - if( encoder->pending_num_pairs > 0 ) /* from previous call */ + if( e->pending_num_pairs > 0 ) /* from previous call */ { - num_pairs = encoder->pending_num_pairs; - encoder->pending_num_pairs = 0; + num_pairs = e->pending_num_pairs; + e->pending_num_pairs = 0; } else - num_pairs = LZe_read_match_distances( encoder ); - main_len = ( num_pairs > 0 ) ? encoder->pairs[num_pairs-1].len : 0; + num_pairs = LZe_read_match_distances( e ); + main_len = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0; for( i = 0; i < num_rep_distances; ++i ) { replens[i] = - Mf_true_match_len( encoder->matchfinder, 0, reps[i] + 1, max_match_len ); + Mf_true_match_len( e->matchfinder, 0, reps[i] + 1, max_match_len ); if( replens[i] > replens[rep_index] ) rep_index = i; } - if( replens[rep_index] >= encoder->matchfinder->match_len_limit ) + if( replens[rep_index] >= e->matchfinder->match_len_limit ) { - encoder->trials[0].dis = rep_index; - encoder->trials[0].price = replens[rep_index]; - LZe_move_pos( encoder, replens[rep_index] ); + e->trials[0].dis = rep_index; + e->trials[0].price = replens[rep_index]; + LZe_move_pos( e, replens[rep_index] ); return replens[rep_index]; } - if( main_len >= encoder->matchfinder->match_len_limit ) + if( main_len >= e->matchfinder->match_len_limit ) { - encoder->trials[0].dis = encoder->pairs[num_pairs-1].dis + num_rep_distances; - encoder->trials[0].price = main_len; - LZe_move_pos( encoder, main_len ); + e->trials[0].dis = e->pairs[num_pairs-1].dis + num_rep_distances; + e->trials[0].price = main_len; + LZe_move_pos( e, main_len ); return main_len; } { - const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; - const int match_price = price1( encoder->bm_match[state][pos_state] ); - const int rep_match_price = match_price + price1( encoder->bm_rep[state] ); - const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -1 ); - const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 ); - const uint8_t match_byte = Mf_peek( encoder->matchfinder, -reps[0]-1 ); - - encoder->trials[0].state = state; - encoder->trials[1].dis = -1; - encoder->trials[1].price = price0( encoder->bm_match[state][pos_state] ); + const int pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask; + const int match_price = price1( e->bm_match[state][pos_state] ); + const int rep_match_price = match_price + price1( e->bm_rep[state] ); + const uint8_t prev_byte = Mf_peek( e->matchfinder, 1 ); + const uint8_t cur_byte = Mf_peek( e->matchfinder, 0 ); + const uint8_t match_byte = Mf_peek( e->matchfinder, reps[0] + 1 ); + + e->trials[0].state = state; + e->trials[1].dis = -1; /* literal */ + e->trials[1].price = price0( e->bm_match[state][pos_state] ); if( St_is_char( state ) ) - encoder->trials[1].price += - LZe_price_literal( encoder, prev_byte, cur_byte ); + e->trials[1].price += LZe_price_literal( e, prev_byte, cur_byte ); else - encoder->trials[1].price += - LZe_price_matched( encoder, prev_byte, cur_byte, match_byte ); + e->trials[1].price += LZe_price_matched( e, prev_byte, cur_byte, match_byte ); if( match_byte == cur_byte ) - Tr_update( &encoder->trials[1], rep_match_price + - LZe_price_rep_len1( encoder, state, pos_state ), 0, 0 ); + Tr_update( &e->trials[1], rep_match_price + + LZe_price_shortrep( e, state, pos_state ), 0, 0 ); num_trials = max( main_len, replens[rep_index] ); if( num_trials < min_match_len ) { - encoder->trials[0].dis = encoder->trials[1].dis; - encoder->trials[0].price = 1; - Mf_move_pos( encoder->matchfinder ); + e->trials[0].dis = e->trials[1].dis; + e->trials[0].price = 1; + Mf_move_pos( e->matchfinder ); return 1; } for( i = 0; i < num_rep_distances; ++i ) - encoder->trials[0].reps[i] = reps[i]; - encoder->trials[1].prev_index = 0; - encoder->trials[1].prev_index2 = single_step_trial; + e->trials[0].reps[i] = reps[i]; + e->trials[1].prev_index = 0; + e->trials[1].prev_index2 = single_step_trial; for( len = min_match_len; len <= num_trials; ++len ) - encoder->trials[len].price = infinite_price; + e->trials[len].price = infinite_price; for( rep = 0; rep < num_rep_distances; ++rep ) { int price; if( replens[rep] < min_match_len ) continue; - price = rep_match_price + LZe_price_rep( encoder, rep, state, pos_state ); + price = rep_match_price + LZe_price_rep( e, rep, state, pos_state ); for( len = min_match_len; len <= replens[rep]; ++len ) - Tr_update( &encoder->trials[len], price + - Lee_price( &encoder->rep_len_encoder, len, pos_state ), - rep, 0 ); + Tr_update( &e->trials[len], price + + Lee_price( &e->rep_len_encoder, len, pos_state ), rep, 0 ); } if( main_len > replens[0] ) { - const int normal_match_price = match_price + price0( encoder->bm_rep[state] ); + const int normal_match_price = match_price + price0( e->bm_rep[state] ); i = 0, len = max( replens[0] + 1, min_match_len ); - while( len > encoder->pairs[i].len ) ++i; + while( len > e->pairs[i].len ) ++i; while( true ) { - const int dis = encoder->pairs[i].dis; - Tr_update( &encoder->trials[len], normal_match_price + - LZe_price_pair( encoder, dis, len, pos_state ), + const int dis = e->pairs[i].dis; + Tr_update( &e->trials[len], normal_match_price + + LZe_price_pair( e, dis, len, pos_state ), dis + num_rep_distances, 0 ); - if( ++len > encoder->pairs[i].len && ++i >= num_pairs ) break; + if( ++len > e->pairs[i].len && ++i >= num_pairs ) break; } } } - Mf_move_pos( encoder->matchfinder ); + Mf_move_pos( e->matchfinder ); while( true ) /* price optimization loop */ { struct Trial *cur_trial, *next_trial; - int newlen, pos_state, prev_index, prev_index2, available_bytes, len_limit; + int newlen, pos_state, available_bytes, len_limit; int start_len = min_match_len; int next_price, match_price, rep_match_price; State cur_state; @@ -508,120 +500,105 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, if( ++cur >= num_trials ) /* no more initialized trials */ { - LZe_backward( encoder, cur ); + LZe_backward( e, cur ); return cur; } - num_pairs = LZe_read_match_distances( encoder ); - newlen = ( num_pairs > 0 ) ? encoder->pairs[num_pairs-1].len : 0; - if( newlen >= encoder->matchfinder->match_len_limit ) + num_pairs = LZe_read_match_distances( e ); + newlen = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0; + if( newlen >= e->matchfinder->match_len_limit ) { - encoder->pending_num_pairs = num_pairs; - LZe_backward( encoder, cur ); + e->pending_num_pairs = num_pairs; + LZe_backward( e, cur ); return cur; } /* give final values to current trial */ - cur_trial = &encoder->trials[cur]; - prev_index = cur_trial->prev_index; - prev_index2 = cur_trial->prev_index2; + cur_trial = &e->trials[cur]; + { + int dis = cur_trial->dis; + int prev_index = cur_trial->prev_index; + const int prev_index2 = cur_trial->prev_index2; - if( prev_index2 != single_step_trial ) + if( prev_index2 == single_step_trial ) { - --prev_index; - if( prev_index2 >= 0 ) + cur_state = e->trials[prev_index].state; + if( prev_index + 1 == cur ) /* len == 1 */ { - cur_state = encoder->trials[prev_index2].state; - if( cur_trial->dis2 < num_rep_distances ) - cur_state = St_set_rep( cur_state ); - else - cur_state = St_set_match( cur_state ); + if( dis == 0 ) cur_state = St_set_short_rep( cur_state ); + else cur_state = St_set_char( cur_state ); /* literal */ } - else - cur_state = encoder->trials[prev_index].state; - cur_state = St_set_char( cur_state ); + else if( dis < num_rep_distances ) cur_state = St_set_rep( cur_state ); + else cur_state = St_set_match( cur_state ); } - else - cur_state = encoder->trials[prev_index].state; - - if( prev_index == cur - 1 ) + else if( prev_index2 == dual_step_trial ) /* dis == 0 */ { - if( cur_trial->dis == 0 ) - cur_state = St_set_short_rep( cur_state ); - else - cur_state = St_set_char( cur_state ); - for( i = 0; i < num_rep_distances; ++i ) - cur_trial->reps[i] = encoder->trials[prev_index].reps[i]; + --prev_index; + cur_state = e->trials[prev_index].state; + cur_state = St_set_char( cur_state ); + cur_state = St_set_rep( cur_state ); } - else + else /* if( prev_index2 >= 0 ) */ { - int dis; - if( prev_index2 >= 0 ) - { - dis = cur_trial->dis2; - prev_index = prev_index2; - cur_state = St_set_rep( cur_state ); - } - else - { - dis = cur_trial->dis; - if( dis < num_rep_distances ) - cur_state = St_set_rep( cur_state ); - else - cur_state = St_set_match( cur_state ); - } - for( i = 0; i < num_rep_distances; ++i ) - cur_trial->reps[i] = encoder->trials[prev_index].reps[i]; - LZe_mtf_reps( dis, cur_trial->reps ); + prev_index = prev_index2; + cur_state = e->trials[prev_index].state; + if( dis < num_rep_distances ) cur_state = St_set_rep( cur_state ); + else cur_state = St_set_match( cur_state ); + cur_state = St_set_char( cur_state ); + cur_state = St_set_rep( cur_state ); } cur_trial->state = cur_state; + for( i = 0; i < num_rep_distances; ++i ) + cur_trial->reps[i] = e->trials[prev_index].reps[i]; + mtf_reps( dis, cur_trial->reps ); + } - pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; - prev_byte = Mf_peek( encoder->matchfinder, -1 ); - cur_byte = Mf_peek( encoder->matchfinder, 0 ); - match_byte = Mf_peek( encoder->matchfinder, -cur_trial->reps[0]-1 ); + pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask; + prev_byte = Mf_peek( e->matchfinder, 1 ); + cur_byte = Mf_peek( e->matchfinder, 0 ); + match_byte = Mf_peek( e->matchfinder, cur_trial->reps[0] + 1 ); + Mf_move_pos( e->matchfinder ); next_price = cur_trial->price + - price0( encoder->bm_match[cur_state][pos_state] ); + price0( e->bm_match[cur_state][pos_state] ); if( St_is_char( cur_state ) ) - next_price += LZe_price_literal( encoder, prev_byte, cur_byte ); + next_price += LZe_price_literal( e, prev_byte, cur_byte ); else - next_price += LZe_price_matched( encoder, prev_byte, cur_byte, match_byte ); - Mf_move_pos( encoder->matchfinder ); + next_price += LZe_price_matched( e, prev_byte, cur_byte, match_byte ); /* try last updates to next trial */ - next_trial = &encoder->trials[cur+1]; + next_trial = &e->trials[cur+1]; - Tr_update( next_trial, next_price, -1, cur ); + Tr_update( next_trial, next_price, -1, cur ); /* literal */ - match_price = cur_trial->price + price1( encoder->bm_match[cur_state][pos_state] ); - rep_match_price = match_price + price1( encoder->bm_rep[cur_state] ); + match_price = cur_trial->price + price1( e->bm_match[cur_state][pos_state] ); + rep_match_price = match_price + price1( e->bm_rep[cur_state] ); - if( match_byte == cur_byte && next_trial->dis != 0 ) + if( match_byte == cur_byte && next_trial->dis != 0 && + next_trial->prev_index2 == single_step_trial ) { const int price = rep_match_price + - LZe_price_rep_len1( encoder, cur_state, pos_state ); + LZe_price_shortrep( e, cur_state, pos_state ); if( price <= next_trial->price ) { next_trial->price = price; next_trial->dis = 0; next_trial->prev_index = cur; - next_trial->prev_index2 = single_step_trial; } } - available_bytes = min( Mf_available_bytes( encoder->matchfinder ) + 1, + available_bytes = min( Mf_available_bytes( e->matchfinder ) + 1, max_num_trials - 1 - cur ); if( available_bytes < min_match_len ) continue; - len_limit = min( encoder->matchfinder->match_len_limit, available_bytes ); + len_limit = min( e->matchfinder->match_len_limit, available_bytes ); /* try literal + rep0 */ if( match_byte != cur_byte && next_trial->prev_index != cur ) { - const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1; + const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1; const int dis = cur_trial->reps[0] + 1; - const int limit = min( encoder->matchfinder->match_len_limit + 1, + const int limit = min( e->matchfinder->match_len_limit + 1, available_bytes ); len = 1; while( len < limit && data[len-dis] == data[len] ) ++len; @@ -630,40 +607,38 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, const int pos_state2 = ( pos_state + 1 ) & pos_state_mask; const State state2 = St_set_char( cur_state ); const int price = next_price + - price1( encoder->bm_match[state2][pos_state2] ) + - price1( encoder->bm_rep[state2] ) + - LZe_price_rep0_len( encoder, len, state2, pos_state2 ); + price1( e->bm_match[state2][pos_state2] ) + + price1( e->bm_rep[state2] ) + + LZe_price_rep0_len( e, len, state2, pos_state2 ); while( num_trials < cur + 1 + len ) - encoder->trials[++num_trials].price = infinite_price; - Tr_update2( &encoder->trials[cur+1+len], price, 0, cur + 1 ); + e->trials[++num_trials].price = infinite_price; + Tr_update2( &e->trials[cur+1+len], price, cur + 1 ); } } /* try rep distances */ for( rep = 0; rep < num_rep_distances; ++rep ) { - const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1; + const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1; int price; const int dis = cur_trial->reps[rep] + 1; - if( data[-dis] != data[0] || data[1-dis] != data[1] ) continue; + if( data[0-dis] != data[0] || data[1-dis] != data[1] ) continue; for( len = min_match_len; len < len_limit; ++len ) if( data[len-dis] != data[len] ) break; while( num_trials < cur + len ) - encoder->trials[++num_trials].price = infinite_price; - price = rep_match_price + - LZe_price_rep( encoder, rep, cur_state, pos_state ); + e->trials[++num_trials].price = infinite_price; + price = rep_match_price + LZe_price_rep( e, rep, cur_state, pos_state ); for( i = min_match_len; i <= len; ++i ) - Tr_update( &encoder->trials[cur+i], price + - Lee_price( &encoder->rep_len_encoder, i, pos_state ), - rep, cur ); + Tr_update( &e->trials[cur+i], price + + Lee_price( &e->rep_len_encoder, i, pos_state ), rep, cur ); if( rep == 0 ) start_len = len + 1; /* discard shorter matches */ /* try rep + literal + rep0 */ { int len2 = len + 1, pos_state2; - const int limit = min( encoder->matchfinder->match_len_limit + len2, + const int limit = min( e->matchfinder->match_len_limit + len2, available_bytes ); State state2; while( len2 < limit && data[len2-dis] == data[len2] ) ++len2; @@ -672,18 +647,17 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, pos_state2 = ( pos_state + len ) & pos_state_mask; state2 = St_set_rep( cur_state ); - price += Lee_price( &encoder->rep_len_encoder, len, pos_state ) + - price0( encoder->bm_match[state2][pos_state2] ) + - LZe_price_matched( encoder, data[len-1], data[len], data[len-dis] ); + price += Lee_price( &e->rep_len_encoder, len, pos_state ) + + price0( e->bm_match[state2][pos_state2] ) + + LZe_price_matched( e, data[len-1], data[len], data[len-dis] ); pos_state2 = ( pos_state2 + 1 ) & pos_state_mask; state2 = St_set_char( state2 ); - price += price1( encoder->bm_match[state2][pos_state2] ) + - price1( encoder->bm_rep[state2] ) + - LZe_price_rep0_len( encoder, len2, state2, pos_state2 ); + price += price1( e->bm_match[state2][pos_state2] ) + + price1( e->bm_rep[state2] ) + + LZe_price_rep0_len( e, len2, state2, pos_state2 ); while( num_trials < cur + len + 1 + len2 ) - encoder->trials[++num_trials].price = infinite_price; - Tr_update3( &encoder->trials[cur+len+1+len2], price, 0, cur + len + 1, - rep, cur ); + e->trials[++num_trials].price = infinite_price; + Tr_update3( &e->trials[cur+len+1+len2], price, rep, cur + len + 1, cur ); } } @@ -692,28 +666,27 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, { int dis; const int normal_match_price = match_price + - price0( encoder->bm_rep[cur_state] ); + price0( e->bm_rep[cur_state] ); while( num_trials < cur + newlen ) - encoder->trials[++num_trials].price = infinite_price; + e->trials[++num_trials].price = infinite_price; i = 0; - while( start_len > encoder->pairs[i].len ) ++i; - dis = encoder->pairs[i].dis; + while( start_len > e->pairs[i].len ) ++i; + dis = e->pairs[i].dis; for( len = start_len; ; ++len ) { - int price = normal_match_price + - LZe_price_pair( encoder, dis, len, pos_state ); + int price = normal_match_price + LZe_price_pair( e, dis, len, pos_state ); - Tr_update( &encoder->trials[cur+len], price, dis + num_rep_distances, cur ); + Tr_update( &e->trials[cur+len], price, dis + num_rep_distances, cur ); /* try match + literal + rep0 */ - if( len == encoder->pairs[i].len ) + if( len == e->pairs[i].len ) { - const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1; + const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1; const int dis2 = dis + 1; int len2 = len + 1; - const int limit = min( encoder->matchfinder->match_len_limit + len2, + const int limit = min( e->matchfinder->match_len_limit + len2, available_bytes ); while( len2 < limit && data[len2-dis2] == data[len2] ) ++len2; len2 -= len + 1; @@ -721,21 +694,21 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, { int pos_state2 = ( pos_state + len ) & pos_state_mask; State state2 = St_set_match( cur_state ); - price += price0( encoder->bm_match[state2][pos_state2] ) + - LZe_price_matched( encoder, data[len-1], data[len], data[len-dis2] ); + price += price0( e->bm_match[state2][pos_state2] ) + + LZe_price_matched( e, data[len-1], data[len], data[len-dis2] ); pos_state2 = ( pos_state2 + 1 ) & pos_state_mask; state2 = St_set_char( state2 ); - price += price1( encoder->bm_match[state2][pos_state2] ) + - price1( encoder->bm_rep[state2] ) + - LZe_price_rep0_len( encoder, len2, state2, pos_state2 ); + price += price1( e->bm_match[state2][pos_state2] ) + + price1( e->bm_rep[state2] ) + + LZe_price_rep0_len( e, len2, state2, pos_state2 ); while( num_trials < cur + len + 1 + len2 ) - encoder->trials[++num_trials].price = infinite_price; - Tr_update3( &encoder->trials[cur+len+1+len2], price, 0, - cur + len + 1, dis + num_rep_distances, cur ); + e->trials[++num_trials].price = infinite_price; + Tr_update3( &e->trials[cur+len+1+len2], price, + dis + num_rep_distances, cur + len + 1, cur ); } if( ++i >= num_pairs ) break; - dis = encoder->pairs[i].dis; + dis = e->pairs[i].dis; } } } @@ -743,114 +716,110 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, } -bool LZe_encode_member( struct LZ_encoder * const encoder, +bool LZe_encode_member( struct LZ_encoder * const e, const unsigned long long member_size ) { const unsigned long long member_size_limit = member_size - Ft_size - max_marker_size; - const int fill_count = - ( encoder->matchfinder->match_len_limit > 12 ) ? 128 : 512; + const int fill_count = ( e->matchfinder->match_len_limit > 12 ) ? 128 : 512; int fill_counter = 0; int ahead, i; - int rep_distances[num_rep_distances]; + int reps[num_rep_distances]; State state = 0; - for( i = 0; i < num_rep_distances; ++i ) rep_distances[i] = 0; + for( i = 0; i < num_rep_distances; ++i ) reps[i] = 0; - if( Mf_data_position( encoder->matchfinder ) != 0 || - Re_member_position( &encoder->renc ) != Fh_size ) - return false; /* can be called only once */ + if( Mf_data_position( e->matchfinder ) != 0 || + Re_member_position( &e->renc ) != Fh_size ) + return false; /* can be called only once */ - if( !Mf_finished( encoder->matchfinder ) ) /* encode first byte */ + if( !Mf_finished( e->matchfinder ) ) /* encode first byte */ { const uint8_t prev_byte = 0; - const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 ); - Re_encode_bit( &encoder->renc, &encoder->bm_match[state][0], 0 ); - LZe_encode_literal( encoder, prev_byte, cur_byte ); - CRC32_update_byte( &encoder->crc, cur_byte ); - Mf_get_match_pairs( encoder->matchfinder, 0 ); - Mf_move_pos( encoder->matchfinder ); + const uint8_t cur_byte = Mf_peek( e->matchfinder, 0 ); + CRC32_update_byte( &e->crc, cur_byte ); + Re_encode_bit( &e->renc, &e->bm_match[state][0], 0 ); + LZe_encode_literal( e, prev_byte, cur_byte ); + Mf_get_match_pairs( e->matchfinder, 0 ); + Mf_move_pos( e->matchfinder ); } - while( !Mf_finished( encoder->matchfinder ) ) + while( !Mf_finished( e->matchfinder ) ) { - if( encoder->pending_num_pairs == 0 ) + if( e->pending_num_pairs == 0 ) { if( fill_counter <= 0 ) - { LZe_fill_distance_prices( encoder ); fill_counter = fill_count; } - if( encoder->align_price_count <= 0 ) - LZe_fill_align_prices( encoder ); + { LZe_fill_distance_prices( e ); fill_counter = fill_count; } + if( e->align_price_count <= 0 ) LZe_fill_align_prices( e ); } - ahead = LZe_sequence_optimizer( encoder, rep_distances, state ); + ahead = LZe_sequence_optimizer( e, reps, state ); if( ahead <= 0 ) return false; /* can't happen */ - for( i = 0; ; ) + for( i = 0; ahead > 0; ) { const int pos_state = - ( Mf_data_position( encoder->matchfinder ) - ahead ) & pos_state_mask; - const int dis = encoder->trials[i].dis; - const int len = encoder->trials[i].price; + ( Mf_data_position( e->matchfinder ) - ahead ) & pos_state_mask; + const int dis = e->trials[i].dis; + const int len = e->trials[i].price; - bool bit = ( dis < 0 && len == 1 ); - Re_encode_bit( &encoder->renc, - &encoder->bm_match[state][pos_state], !bit ); + bool bit = ( dis < 0 ); + Re_encode_bit( &e->renc, &e->bm_match[state][pos_state], !bit ); if( bit ) /* literal byte */ { - const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -ahead-1 ); - const uint8_t cur_byte = Mf_peek( encoder->matchfinder, -ahead ); - CRC32_update_byte( &encoder->crc, cur_byte ); + const uint8_t prev_byte = Mf_peek( e->matchfinder, ahead + 1 ); + const uint8_t cur_byte = Mf_peek( e->matchfinder, ahead ); + CRC32_update_byte( &e->crc, cur_byte ); if( St_is_char( state ) ) - LZe_encode_literal( encoder, prev_byte, cur_byte ); + LZe_encode_literal( e, prev_byte, cur_byte ); else { const uint8_t match_byte = - Mf_peek( encoder->matchfinder, -ahead-rep_distances[0]-1 ); - LZe_encode_matched( encoder, prev_byte, cur_byte, match_byte ); + Mf_peek( e->matchfinder, ahead + reps[0] + 1 ); + LZe_encode_matched( e, prev_byte, cur_byte, match_byte ); } state = St_set_char( state ); } else /* match or repeated match */ { - CRC32_update_buf( &encoder->crc, Mf_ptr_to_current_pos( encoder->matchfinder ) - ahead, len ); - LZe_mtf_reps( dis, rep_distances ); + CRC32_update_buf( &e->crc, Mf_ptr_to_current_pos( e->matchfinder ) - ahead, len ); + mtf_reps( dis, reps ); bit = ( dis < num_rep_distances ); - Re_encode_bit( &encoder->renc, &encoder->bm_rep[state], bit ); - if( bit ) + Re_encode_bit( &e->renc, &e->bm_rep[state], bit ); + if( bit ) /* repeated match */ { bit = ( dis == 0 ); - Re_encode_bit( &encoder->renc, &encoder->bm_rep0[state], !bit ); + Re_encode_bit( &e->renc, &e->bm_rep0[state], !bit ); if( bit ) - Re_encode_bit( &encoder->renc, &encoder->bm_len[state][pos_state], len > 1 ); + Re_encode_bit( &e->renc, &e->bm_len[state][pos_state], len > 1 ); else { - Re_encode_bit( &encoder->renc, &encoder->bm_rep1[state], dis > 1 ); + Re_encode_bit( &e->renc, &e->bm_rep1[state], dis > 1 ); if( dis > 1 ) - Re_encode_bit( &encoder->renc, &encoder->bm_rep2[state], dis > 2 ); + Re_encode_bit( &e->renc, &e->bm_rep2[state], dis > 2 ); } if( len == 1 ) state = St_set_short_rep( state ); else { - Lee_encode( &encoder->rep_len_encoder, &encoder->renc, len, pos_state ); + Lee_encode( &e->rep_len_encoder, &e->renc, len, pos_state ); state = St_set_rep( state ); } } - else + else /* match */ { - LZe_encode_pair( encoder, dis - num_rep_distances, len, pos_state ); + LZe_encode_pair( e, dis - num_rep_distances, len, pos_state ); --fill_counter; state = St_set_match( state ); } } ahead -= len; i += len; - if( Re_member_position( &encoder->renc ) >= member_size_limit ) + if( Re_member_position( &e->renc ) >= member_size_limit ) { - if( !Mf_dec_pos( encoder->matchfinder, ahead ) ) return false; - LZe_full_flush( encoder, state ); + if( !Mf_dec_pos( e->matchfinder, ahead ) ) return false; + LZe_full_flush( e, state ); return true; } - if( ahead <= 0 ) break; } } - LZe_full_flush( encoder, state ); + LZe_full_flush( e, state ); return true; } |