diff options
Diffstat (limited to 'encoder.c')
-rw-r--r-- | encoder.c | 435 |
1 files changed, 236 insertions, 199 deletions
@@ -1,4 +1,4 @@ -/* Clzip - A data compressor based on the LZMA algorithm +/* Clzip - Data compressor based on the LZMA algorithm Copyright (C) 2010 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify @@ -31,193 +31,197 @@ Dis_slots dis_slots; Prob_prices prob_prices; -bool Mf_read_block( struct Matchfinder * const matchfinder ) +bool Mf_read_block( struct Matchfinder * const mf ) { - const int size = matchfinder->buffer_size - matchfinder->stream_pos; - const int rd = readblock( matchfinder->infd_, matchfinder->buffer + matchfinder->stream_pos, size ); - matchfinder->stream_pos += rd; - if( rd < size ) matchfinder->at_stream_end = true; - return ( rd == size || !errno ); + if( !mf->at_stream_end && mf->stream_pos < mf->buffer_size ) + { + const int size = mf->buffer_size - mf->stream_pos; + const int rd = readblock( mf->infd, mf->buffer + mf->stream_pos, size ); + mf->stream_pos += rd; + if( rd != size && errno ) + { show_error( "Read error", errno, false ); cleanup_and_fail( 1 ); } + mf->at_stream_end = ( rd < size ); + } + return mf->pos < mf->stream_pos; } -void Mf_init( struct Matchfinder * const matchfinder, - const int dict_size, const int len_limit, const int infd ) +void Mf_init( struct Matchfinder * const mf, + const int dict_size, const int len_limit, const int ifd ) { - matchfinder->partial_data_pos = 0; - matchfinder->pos = 0; - matchfinder->cyclic_pos = 0; - matchfinder->stream_pos = 0; - matchfinder->infd_ = infd; - matchfinder->match_len_limit_ = len_limit; - matchfinder->prev_positions = - (int32_t *)malloc( mf_num_prev_positions * sizeof (int32_t) ); - if( !matchfinder->prev_positions ) + const int buffer_size_limit = ( 2 * dict_size ) + before_size + after_size; + int i; + mf->partial_data_pos = 0; + mf->prev_positions = + (int32_t *)malloc( num_prev_positions * sizeof (int32_t) ); + if( !mf->prev_positions ) { - show_error( "not enough memory. Try a smaller dictionary size", 0, false ); + show_error( "Not enough memory. Try a smaller dictionary size.", 0, false ); cleanup_and_fail( 1 ); } - matchfinder->at_stream_end = false; - - const int buffer_size_limit = ( 2 * dict_size ) + mf_before_size + mf_after_size; - matchfinder->buffer_size = max( 65536, dict_size ); - matchfinder->buffer = (uint8_t *)malloc( matchfinder->buffer_size ); - if( !matchfinder->buffer ) + mf->pos = 0; + mf->cyclic_pos = 0; + mf->stream_pos = 0; + mf->match_len_limit_ = len_limit; + mf->cycles = ( len_limit < max_match_len ) ? 16 + ( len_limit / 2 ) : 256; + mf->infd = ifd; + mf->at_stream_end = false; + + mf->buffer_size = max( 65536, dict_size ); + mf->buffer = (uint8_t *)malloc( mf->buffer_size ); + if( !mf->buffer ) { - show_error( "not enough memory. Try a smaller dictionary size", 0, false ); + show_error( "Not enough memory. Try a smaller dictionary size.", 0, false ); cleanup_and_fail( 1 ); } - if( !Mf_read_block( matchfinder ) ) - { show_error( "read error", errno, false ); cleanup_and_fail( 1 ); } - if( !matchfinder->at_stream_end && matchfinder->buffer_size < buffer_size_limit ) + if( Mf_read_block( mf ) && !mf->at_stream_end && + mf->buffer_size < buffer_size_limit ) { - matchfinder->buffer_size = buffer_size_limit; - matchfinder->buffer = - (uint8_t *)realloc( matchfinder->buffer, matchfinder->buffer_size ); - if( !matchfinder->buffer ) + mf->buffer_size = buffer_size_limit; + mf->buffer = (uint8_t *)realloc( mf->buffer, mf->buffer_size ); + if( !mf->buffer ) { - show_error( "not enough memory. Try a smaller dictionary size", 0, false ); + show_error( "Not enough memory. Try a smaller dictionary size.", 0, false ); cleanup_and_fail( 1 ); } - if( !Mf_read_block( matchfinder ) ) - { show_error( "read error", errno, false ); cleanup_and_fail( 1 ); } + Mf_read_block( mf ); } - if( matchfinder->at_stream_end && matchfinder->stream_pos < dict_size ) - matchfinder->dictionary_size_ = max( min_dictionary_size, matchfinder->stream_pos ); - else matchfinder->dictionary_size_ = dict_size; - matchfinder->pos_limit = matchfinder->buffer_size; - if( !matchfinder->at_stream_end ) matchfinder->pos_limit -= mf_after_size; - matchfinder->prev_pos_tree = - (int32_t *)malloc( 2 * matchfinder->dictionary_size_ * sizeof (int32_t) ); - if( !matchfinder->prev_pos_tree ) + if( mf->at_stream_end && mf->stream_pos < dict_size ) + mf->dictionary_size_ = max( min_dictionary_size, mf->stream_pos ); + else mf->dictionary_size_ = dict_size; + mf->pos_limit = mf->buffer_size; + if( !mf->at_stream_end ) mf->pos_limit -= after_size; + mf->prev_pos_tree = + (int32_t *)malloc( 2 * mf->dictionary_size_ * sizeof (int32_t) ); + if( !mf->prev_pos_tree ) { - show_error( "not enough memory. Try a smaller dictionary size", 0, false ); + show_error( "Not enough memory. Try a smaller dictionary size.", 0, false ); cleanup_and_fail( 1 ); } - for( int i = 0; i < mf_num_prev_positions; ++i ) - matchfinder->prev_positions[i] = -1; + for( i = 0; i < num_prev_positions; ++i ) mf->prev_positions[i] = -1; } -bool Mf_reset( struct Matchfinder * const matchfinder ) +void Mf_reset( struct Matchfinder * const mf ) { - const int size = matchfinder->stream_pos - matchfinder->pos; - if( size > 0 ) memmove( matchfinder->buffer, matchfinder->buffer + matchfinder->pos, size ); - matchfinder->partial_data_pos = 0; - matchfinder->stream_pos -= matchfinder->pos; - matchfinder->pos = 0; - matchfinder->cyclic_pos = 0; - for( int i = 0; i < mf_num_prev_positions; ++i ) - matchfinder->prev_positions[i] = -1; - return ( matchfinder->at_stream_end || Mf_read_block( matchfinder ) ); + int i; + const int size = mf->stream_pos - mf->pos; + if( size > 0 ) memmove( mf->buffer, mf->buffer + mf->pos, size ); + mf->partial_data_pos = 0; + mf->stream_pos -= mf->pos; + mf->pos = 0; + mf->cyclic_pos = 0; + for( i = 0; i < num_prev_positions; ++i ) mf->prev_positions[i] = -1; + Mf_read_block( mf ); } -bool Mf_move_pos( struct Matchfinder * const matchfinder ) +void Mf_move_pos( struct Matchfinder * const mf ) { - if( ++matchfinder->cyclic_pos >= matchfinder->dictionary_size_ ) - matchfinder->cyclic_pos = 0; - if( ++matchfinder->pos >= matchfinder->pos_limit ) + if( ++mf->cyclic_pos >= mf->dictionary_size_ ) mf->cyclic_pos = 0; + if( ++mf->pos >= mf->pos_limit ) { - if( matchfinder->pos > matchfinder->stream_pos ) - { matchfinder->pos = matchfinder->stream_pos; return false; } - if( !matchfinder->at_stream_end ) + if( mf->pos > mf->stream_pos ) + internal_error( "pos > stream_pos in Mf_move_pos" ); + if( !mf->at_stream_end ) { - const int offset = matchfinder->pos - matchfinder->dictionary_size_ - mf_before_size; - const int size = matchfinder->stream_pos - offset; - memmove( matchfinder->buffer, matchfinder->buffer + offset, size ); - matchfinder->partial_data_pos += offset; - matchfinder->pos -= offset; - matchfinder->stream_pos -= offset; - for( int i = 0; i < mf_num_prev_positions; ++i ) - if( matchfinder->prev_positions[i] >= 0 ) matchfinder->prev_positions[i] -= offset; - for( int i = 0; i < 2 * matchfinder->dictionary_size_; ++i ) - if( matchfinder->prev_pos_tree[i] >= 0 ) matchfinder->prev_pos_tree[i] -= offset; - return Mf_read_block( matchfinder ); + int i; + const int offset = mf->pos - mf->dictionary_size_ - before_size; + const int size = mf->stream_pos - offset; + memmove( mf->buffer, mf->buffer + offset, size ); + mf->partial_data_pos += offset; + mf->pos -= offset; + mf->stream_pos -= offset; + for( i = 0; i < num_prev_positions; ++i ) + if( mf->prev_positions[i] >= 0 ) mf->prev_positions[i] -= offset; + for( i = 0; i < 2 * mf->dictionary_size_; ++i ) + if( mf->prev_pos_tree[i] >= 0 ) mf->prev_pos_tree[i] -= offset; + Mf_read_block( mf ); } } - return true; } -int Mf_longest_match_len( struct Matchfinder * const matchfinder, - int * const distances ) +int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances ) { - int len_limit = matchfinder->match_len_limit_; - if( len_limit > Mf_available_bytes( matchfinder ) ) + int32_t * ptr0 = mf->prev_pos_tree + ( mf->cyclic_pos << 1 ); + int32_t * ptr1 = ptr0 + 1; + int32_t * newptr; + const uint8_t * newdata; + int len = 0, len0 = 0, len1 = 0; + int maxlen = min_match_len - 1; + const int min_pos = (mf->pos >= mf->dictionary_size_) ? + (mf->pos - mf->dictionary_size_ + 1) : 0; + const uint8_t * const data = mf->buffer + mf->pos; + int count, delta, key2, key3, key4, newpos, tmp; + int len_limit = mf->match_len_limit_; + + if( len_limit > Mf_available_bytes( mf ) ) { - len_limit = Mf_available_bytes( matchfinder ); + len_limit = Mf_available_bytes( mf ); if( len_limit < 4 ) return 0; } - int maxlen = min_match_len - 1; - const int min_pos = (matchfinder->pos >= matchfinder->dictionary_size_) ? - (matchfinder->pos - matchfinder->dictionary_size_ + 1) : 0; - const uint8_t * const data = matchfinder->buffer + matchfinder->pos; - const int key2 = mf_num_prev_positions4 + mf_num_prev_positions3 + - ( ( (int)data[0] << 8 ) | data[1] ); - const uint32_t tmp = crc32[data[0]] ^ data[1] ^ ( (uint32_t)data[2] << 8 ); - const int key3 = mf_num_prev_positions4 + - (int)( tmp & ( mf_num_prev_positions3 - 1 ) ); - const int key4 = (int)( ( tmp ^ ( crc32[data[3]] << 5 ) ) & - ( mf_num_prev_positions4 - 1 ) ); + key2 = num_prev_positions4 + num_prev_positions3 + + ( ( (int)data[0] << 8 ) | data[1] ); + tmp = crc32[data[0]] ^ data[1] ^ ( (uint32_t)data[2] << 8 ); + key3 = num_prev_positions4 + (int)( tmp & ( num_prev_positions3 - 1 ) ); + key4 = (int)( ( tmp ^ ( crc32[data[3]] << 5 ) ) & + ( num_prev_positions4 - 1 ) ); if( distances ) { - int np = matchfinder->prev_positions[key2]; + int np = mf->prev_positions[key2]; if( np >= min_pos ) - { distances[2] = matchfinder->pos - np - 1; maxlen = 2; } + { distances[2] = mf->pos - np - 1; maxlen = 2; } else distances[2] = 0x7FFFFFFF; - np = matchfinder->prev_positions[key3]; - if( np >= min_pos && matchfinder->buffer[np] == data[0] ) - { distances[3] = matchfinder->pos - np - 1; maxlen = 3; } + np = mf->prev_positions[key3]; + if( np >= min_pos && mf->buffer[np] == data[0] ) + { distances[3] = mf->pos - np - 1; maxlen = 3; } else distances[3] = 0x7FFFFFFF; distances[4] = 0x7FFFFFFF; } - matchfinder->prev_positions[key2] = matchfinder->pos; - matchfinder->prev_positions[key3] = matchfinder->pos; - int newpos = matchfinder->prev_positions[key4]; - matchfinder->prev_positions[key4] = matchfinder->pos; + mf->prev_positions[key2] = mf->pos; + mf->prev_positions[key3] = mf->pos; + newpos = mf->prev_positions[key4]; + mf->prev_positions[key4] = mf->pos; - int idx0 = matchfinder->cyclic_pos << 1; - int idx1 = idx0 + 1; - int len = 0, len0 = 0, len1 = 0; - for( int count = 16 + ( matchfinder->match_len_limit_ / 2 ); ; ) + for( count = mf->cycles; ; ) { - if( newpos < min_pos || --count < 0 ) - { matchfinder->prev_pos_tree[idx0] = matchfinder->prev_pos_tree[idx1] = -1; break; } - const uint8_t * const newdata = matchfinder->buffer + newpos; + if( newpos < min_pos || --count < 0 ) { *ptr0 = *ptr1 = -1; break; } + newdata = mf->buffer + newpos; while( len < len_limit && newdata[len] == data[len] ) ++len; - const int delta = matchfinder->pos - newpos; + delta = mf->pos - newpos; if( distances ) while( maxlen < len ) distances[++maxlen] = delta - 1; - const int newidx = ( matchfinder->cyclic_pos - delta + - ( ( matchfinder->cyclic_pos >= delta ) ? 0 : matchfinder->dictionary_size_ ) ) << 1; + newptr = mf->prev_pos_tree + + ( ( mf->cyclic_pos - delta + + ( ( mf->cyclic_pos >= delta ) ? 0 : mf->dictionary_size_ ) ) << 1 ); if( len < len_limit ) { if( newdata[len] < data[len] ) { - matchfinder->prev_pos_tree[idx0] = newpos; - idx0 = newidx + 1; - newpos = matchfinder->prev_pos_tree[idx0]; + *ptr0 = newpos; + ptr0 = newptr + 1; + newpos = *ptr0; len0 = len; if( len1 < len ) len = len1; } else { - matchfinder->prev_pos_tree[idx1] = newpos; - idx1 = newidx; - newpos = matchfinder->prev_pos_tree[idx1]; + *ptr1 = newpos; + ptr1 = newptr; + newpos = *ptr1; len1 = len; if( len0 < len ) len = len0; } } else { - matchfinder->prev_pos_tree[idx0] = matchfinder->prev_pos_tree[newidx]; - matchfinder->prev_pos_tree[idx1] = matchfinder->prev_pos_tree[newidx+1]; + *ptr0 = newptr[0]; + *ptr1 = newptr[1]; break; } } @@ -230,6 +234,20 @@ int Mf_longest_match_len( struct Matchfinder * const matchfinder, } +void Re_flush_data( struct Range_encoder * const range_encoder ) + { + if( range_encoder->pos > 0 ) + { + if( range_encoder->outfd >= 0 && + writeblock( range_encoder->outfd, range_encoder->buffer, + range_encoder->pos ) != range_encoder->pos ) + { show_error( "Write error", errno, false ); cleanup_and_fail( 1 ); } + range_encoder->partial_member_pos += range_encoder->pos; + range_encoder->pos = 0; + } + } + + void Lee_encode( struct Len_encoder * const len_encoder, struct Range_encoder * const range_encoder, int symbol, const int pos_state ) @@ -261,7 +279,8 @@ void Lee_encode( struct Len_encoder * const len_encoder, void LZe_fill_align_prices( struct LZ_encoder * const encoder ) { - for( int i = 0; i < dis_align_size; ++i ) + int i; + for( i = 0; i < dis_align_size; ++i ) encoder->align_prices[i] = price_symbol_reversed( encoder->bm_align, i, dis_align_bits ); encoder->align_price_count = dis_align_size; } @@ -269,9 +288,22 @@ void LZe_fill_align_prices( struct LZ_encoder * const encoder ) void LZe_fill_distance_prices( struct LZ_encoder * const encoder ) { - for( int dis_state = 0; dis_state < max_dis_states; ++dis_state ) + int dis, dis_state; + for( dis = start_dis_model; dis < modeled_distances; ++dis ) + { + const int dis_slot = dis_slots[dis]; + const int direct_bits = ( dis_slot >> 1 ) - 1; + const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + const int price = + price_symbol_reversed( encoder->bm_dis + base - dis_slot, dis - base, direct_bits ); + for( dis_state = 0; dis_state < max_dis_states; ++dis_state ) + encoder->dis_prices[dis_state][dis] = price; + } + + for( dis_state = 0; dis_state < max_dis_states; ++dis_state ) { int * const dsp = encoder->dis_slot_prices[dis_state]; + int * const dp = encoder->dis_prices[dis_state]; const Bit_model * const bmds = encoder->bm_dis_slot[dis_state]; int slot = 0; for( ; slot < end_dis_model && slot < encoder->num_dis_slots; ++slot ) @@ -280,39 +312,32 @@ void LZe_fill_distance_prices( struct LZ_encoder * const encoder ) dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ) + (((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift ); - int * const dp = encoder->dis_prices[dis_state]; - int dis = 0; - for( ; dis < start_dis_model; ++dis ) + for( dis = 0; dis < start_dis_model; ++dis ) dp[dis] = dsp[dis]; for( ; dis < modeled_distances; ++dis ) - { - const int dis_slot = get_slot( dis ); - const int direct_bits = ( dis_slot >> 1 ) - 1; - const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits; - dp[dis] = dsp[dis_slot] + - price_symbol_reversed( encoder->bm_dis + base - dis_slot, dis - base, direct_bits ); - } + dp[dis] += dsp[dis_slots[dis]]; } } -// Return value == number of bytes advanced (ahead). -// trials[0]..trials[retval-1] contain the steps to encode. -// ( trials[0].dis == -1 && trials[0].price == 1 ) means literal. +/* Return value == number of bytes advanced (ahead). + trials[0]..trials[retval-1] contain the steps to encode. + ( trials[0].dis == -1 && trials[0].price == 1 ) means literal. */ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, const int reps[num_rep_distances], const State state ) { - int main_len; - if( encoder->longest_match_found > 0 ) // from previous call + int main_len, i, rep, cur = 0, num_trials; + int replens[num_rep_distances]; + int rep_index = 0; + + if( encoder->longest_match_found > 0 ) /* from previous call */ { main_len = encoder->longest_match_found; encoder->longest_match_found = 0; } else main_len = LZe_read_match_distances( encoder ); - int replens[num_rep_distances]; - int rep_index = 0; - for( int i = 0; i < num_rep_distances; ++i ) + for( i = 0; i < num_rep_distances; ++i ) { replens[i] = Mf_true_match_len( encoder->matchfinder, 0, reps[i] + 1, max_match_len ); if( replens[i] > replens[rep_index] ) rep_index = i; @@ -321,7 +346,7 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, { encoder->trials[0].dis = rep_index; encoder->trials[0].price = replens[rep_index]; - if( !LZe_move_pos( encoder, replens[rep_index], true ) ) return 0; + LZe_move_pos( encoder, replens[rep_index], true ); return replens[rep_index]; } @@ -330,18 +355,20 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, encoder->trials[0].dis = encoder->match_distances[Mf_match_len_limit( encoder->matchfinder )] + num_rep_distances; encoder->trials[0].price = main_len; - if( !LZe_move_pos( encoder, main_len, true ) ) return 0; + LZe_move_pos( encoder, main_len, true ); return main_len; } { const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; + const int match_price = price1( encoder->bm_match[state][pos_state] ); + const int rep_match_price = match_price + price1( encoder->bm_rep[state] ); const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -1 ); const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 ); const uint8_t match_byte = Mf_peek( encoder->matchfinder, -reps[0]-1 ); encoder->trials[0].state = state; - for( int i = 0; i < num_rep_distances; ++i ) encoder->trials[0].reps[i] = reps[i]; + for( i = 0; i < num_rep_distances; ++i ) encoder->trials[0].reps[i] = reps[i]; encoder->trials[1].dis = -1; encoder->trials[1].prev_index = 0; encoder->trials[1].price = price0( encoder->bm_match[state][pos_state] ); @@ -350,9 +377,6 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, else encoder->trials[1].price += Lie_price_matched( &encoder->literal_encoder, prev_byte, cur_byte, match_byte ); - const int match_price = price1( encoder->bm_match[state][pos_state] ); - const int rep_match_price = match_price + price1( encoder->bm_rep[state] ); - if( match_byte == cur_byte ) Tr_update( &encoder->trials[1], 0, 0, rep_match_price + LZe_price_rep_len1( encoder, state, pos_state ) ); @@ -360,7 +384,7 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, { encoder->trials[0].dis = encoder->trials[1].dis; encoder->trials[0].price = 1; - if( !Mf_move_pos( encoder->matchfinder ) ) return 0; + Mf_move_pos( encoder->matchfinder ); return 1; } @@ -371,7 +395,7 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, { main_len = replens[rep_index]; for( ; len <= main_len; ++len ) - encoder->trials[len].price = lze_infinite_price; + encoder->trials[len].price = infinite_price; } else for( ; len <= main_len; ++len ) { @@ -382,28 +406,33 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, } } - for( int rep = 0; rep < num_rep_distances; ++rep ) + for( rep = 0; rep < num_rep_distances; ++rep ) { const int price = rep_match_price + LZe_price_rep( encoder, rep, state, pos_state ); - for( int len = min_match_len; len <= replens[rep]; ++len ) + int len; + for( len = min_match_len; len <= replens[rep]; ++len ) Tr_update( &encoder->trials[len], rep, 0, price + Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) ); } } - int cur = 0; - int num_trials = main_len; - if( !Mf_move_pos( encoder->matchfinder ) ) return 0; + num_trials = main_len; + Mf_move_pos( encoder->matchfinder ); while( true ) { - if( ++cur >= num_trials ) // no more initialized trials + struct Trial *cur_trial, *next_trial; + int newlen, pos_state, prev_index, len_limit; + int next_price, match_price, rep_match_price; + uint8_t prev_byte, cur_byte, match_byte; + + if( ++cur >= num_trials ) /* no more initialized trials */ { LZe_backward( encoder, cur ); return cur; } - const int newlen = LZe_read_match_distances( encoder ); + newlen = LZe_read_match_distances( encoder ); if( newlen >= Mf_match_len_limit( encoder->matchfinder ) ) { encoder->longest_match_found = newlen; @@ -411,12 +440,12 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, return cur; } - struct Trial * const cur_trial = &encoder->trials[cur]; - const int prev_index = cur_trial->prev_index; + cur_trial = &encoder->trials[cur]; + prev_index = cur_trial->prev_index; cur_trial->state = encoder->trials[prev_index].state; - for( int i = 0; i < num_rep_distances; ++i ) + for( i = 0; i < num_rep_distances; ++i ) cur_trial->reps[i] = encoder->trials[prev_index].reps[i]; if( prev_index == cur - 1 ) { @@ -430,35 +459,35 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, LZe_mtf_reps( cur_trial->dis, cur_trial->reps ); } - const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; - const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -1 ); - const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 ); - const uint8_t match_byte = Mf_peek( encoder->matchfinder, -cur_trial->reps[0]-1 ); + pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; + prev_byte = Mf_peek( encoder->matchfinder, -1 ); + cur_byte = Mf_peek( encoder->matchfinder, 0 ); + match_byte = Mf_peek( encoder->matchfinder, -cur_trial->reps[0]-1 ); - int next_price = cur_trial->price + price0( encoder->bm_match[cur_trial->state][pos_state] ); + next_price = cur_trial->price + price0( encoder->bm_match[cur_trial->state][pos_state] ); if( St_is_char( cur_trial->state ) ) next_price += Lie_price_symbol( &encoder->literal_encoder, prev_byte, cur_byte ); else next_price += Lie_price_matched( &encoder->literal_encoder, prev_byte, cur_byte, match_byte ); - if( !Mf_move_pos( encoder->matchfinder ) ) return 0; + Mf_move_pos( encoder->matchfinder ); - struct Trial * const next_trial = &encoder->trials[cur+1]; + next_trial = &encoder->trials[cur+1]; Tr_update( next_trial, -1, cur, next_price ); - const int match_price = cur_trial->price + price1( encoder->bm_match[cur_trial->state][pos_state] ); - const int rep_match_price = match_price + price1( encoder->bm_rep[cur_trial->state] ); + match_price = cur_trial->price + price1( encoder->bm_match[cur_trial->state][pos_state] ); + rep_match_price = match_price + price1( encoder->bm_rep[cur_trial->state] ); if( match_byte == cur_byte && next_trial->dis != 0 ) Tr_update( next_trial, 0, cur, rep_match_price + LZe_price_rep_len1( encoder, cur_trial->state, pos_state ) ); - const int len_limit = min( min( max_num_trials - 1 - cur, + len_limit = min( min( max_num_trials - 1 - cur, Mf_available_bytes( encoder->matchfinder ) ), - Mf_match_len_limit( encoder->matchfinder ) ); + Mf_match_len_limit( encoder->matchfinder ) ); if( len_limit < min_match_len ) continue; - for( int rep = 0; rep < num_rep_distances; ++rep ) + for( rep = 0; rep < num_rep_distances; ++rep ) { const int dis = cur_trial->reps[rep] + 1; int len = 0; @@ -469,7 +498,7 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, const int price = rep_match_price + LZe_price_rep( encoder, rep, cur_trial->state, pos_state ); while( num_trials < cur + len ) - encoder->trials[++num_trials].price = lze_infinite_price; + encoder->trials[++num_trials].price = infinite_price; for( ; len >= min_match_len; --len ) Tr_update( &encoder->trials[cur+len], rep, cur, price + Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) ); @@ -483,10 +512,11 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, { const int normal_match_price = match_price + price0( encoder->bm_rep[cur_trial->state] ); + int len; while( num_trials < cur + newlen ) - encoder->trials[++num_trials].price = lze_infinite_price; + encoder->trials[++num_trials].price = infinite_price; - for( int len = min_match_len; len <= newlen; ++len ) + for( len = min_match_len; len <= newlen; ++len ) Tr_update( &encoder->trials[cur+len], encoder->match_distances[len] + num_rep_distances, cur, normal_match_price + LZe_price_pair( encoder, encoder->match_distances[len], len, pos_state ) ); @@ -495,19 +525,20 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, } - // End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) + /* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */ void LZe_full_flush( struct LZ_encoder * const encoder, const State state ) { + int i; const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; + File_trailer trailer; Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][pos_state], 1 ); Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep[state], 0 ); LZe_encode_pair( encoder, 0xFFFFFFFFU, min_match_len, pos_state ); Re_flush( &encoder->range_encoder ); - File_trailer trailer; Ft_set_data_crc( trailer, LZe_crc( encoder ) ); Ft_set_data_size( trailer, Mf_data_position( encoder->matchfinder ) ); Ft_set_member_size( trailer, LZe_member_position( encoder ) + Ft_size ); - for( int i = 0; i < Ft_size; ++i ) + for( i = 0; i < Ft_size; ++i ) Re_put_byte( &encoder->range_encoder, trailer[i] ); Re_flush_data( &encoder->range_encoder ); } @@ -516,12 +547,13 @@ void LZe_full_flush( struct LZ_encoder * const encoder, const State state ) void LZe_init( struct LZ_encoder * const encoder, struct Matchfinder * const mf, const File_header header, const int outfd ) { + int i, j; encoder->longest_match_found = 0; encoder->crc_ = 0xFFFFFFFFU; - for( int i = 0; i < St_states; ++i ) + for( i = 0; i < states; ++i ) { - for( int j = 0; j < pos_states; ++j ) + for( j = 0; j < pos_states; ++j ) { Bm_init( &encoder->bm_match[i][j] ); Bm_init( &encoder->bm_len[i][j] ); @@ -531,12 +563,12 @@ void LZe_init( struct LZ_encoder * const encoder, struct Matchfinder * const mf, Bm_init( &encoder->bm_rep1[i] ); Bm_init( &encoder->bm_rep2[i] ); } - for( int i = 0; i < max_dis_states; ++i ) - for( int j = 0; j < 1<<dis_slot_bits; ++j ) + for( i = 0; i < max_dis_states; ++i ) + for( j = 0; j < 1<<dis_slot_bits; ++j ) Bm_init( &encoder->bm_dis_slot[i][j] ); - for( int i = 0; i < modeled_distances-end_dis_model; ++i ) + for( i = 0; i < modeled_distances-end_dis_model+1; ++i ) Bm_init( &encoder->bm_dis[i] ); - for( int i = 0; i < dis_align_size; ++i ) + for( i = 0; i < dis_align_size; ++i ) Bm_init( &encoder->bm_align[i] ); encoder->matchfinder = mf; @@ -544,36 +576,41 @@ void LZe_init( struct LZ_encoder * const encoder, struct Matchfinder * const mf, Lee_init( &encoder->len_encoder, Mf_match_len_limit( encoder->matchfinder ) ), Lee_init( &encoder->rep_match_len_encoder, Mf_match_len_limit( encoder->matchfinder ) ), Lie_init( &encoder->literal_encoder ); - encoder->num_dis_slots = 2 * Fh_real_bits( Mf_dictionary_size( encoder->matchfinder ) - 1 ); + encoder->num_dis_slots = 2 * real_bits( Mf_dictionary_size( encoder->matchfinder ) - 1 ); LZe_fill_align_prices( encoder ); - for( int i = 0; i < Fh_size; ++i ) + for( i = 0; i < Fh_size; ++i ) Re_put_byte( &encoder->range_encoder, header[i] ); } -bool LZe_encode_member( struct LZ_encoder * const encoder, const long long member_size ) +bool LZe_encode_member( struct LZ_encoder * const encoder, + const long long member_size ) { - if( LZe_member_position( encoder ) != Fh_size ) - return false; // can be called only once const long long member_size_limit = - member_size - Ft_size - lze_max_marker_size; + member_size - Ft_size - max_marker_size; + const int fill_count = + ( Mf_match_len_limit( encoder->matchfinder ) > 12 ) ? 512 : 2048; int fill_counter = 0; + int ahead; + int i; int rep_distances[num_rep_distances]; State state = 0; - for( int i = 0; i < num_rep_distances; ++i ) rep_distances[i] = 0; + for( i = 0; i < num_rep_distances; ++i ) rep_distances[i] = 0; + + if( Mf_data_position( encoder->matchfinder ) != 0 || + LZe_member_position( encoder ) != Fh_size ) + return false; /* can be called only once */ - // encode first byte - if( Mf_data_position( encoder->matchfinder ) == 0 && - !Mf_finished( encoder->matchfinder ) ) + if( !Mf_finished( encoder->matchfinder ) ) /* encode first byte */ { - Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][0], 0 ); const uint8_t prev_byte = 0; const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 ); + Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][0], 0 ); Lie_encode( &encoder->literal_encoder, &encoder->range_encoder, prev_byte, cur_byte ); CRC32_update_byte( &encoder->crc_, cur_byte ); - if( !LZe_move_pos( encoder, 1, false ) ) return false; + LZe_move_pos( encoder, 1, false ); } while( true ) @@ -581,13 +618,13 @@ bool LZe_encode_member( struct LZ_encoder * const encoder, const long long membe if( Mf_finished( encoder->matchfinder ) ) { LZe_full_flush( encoder, state ); return true; } if( fill_counter <= 0 ) - { LZe_fill_distance_prices( encoder ); fill_counter = 512; } + { LZe_fill_distance_prices( encoder ); fill_counter = fill_count; } - int ahead = LZe_sequence_optimizer( encoder, rep_distances, state ); + ahead = LZe_sequence_optimizer( encoder, rep_distances, state ); if( ahead <= 0 ) return false; fill_counter -= ahead; - for( int i = 0; ; ) + for( i = 0; ; ) { const int pos_state = ( Mf_data_position( encoder->matchfinder ) - ahead ) & pos_state_mask; const int dis = encoder->trials[i].dis; @@ -595,7 +632,7 @@ bool LZe_encode_member( struct LZ_encoder * const encoder, const long long membe bool bit = ( dis < 0 && len == 1 ); Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][pos_state], !bit ); - if( bit ) // literal byte + if( bit ) /* literal byte */ { const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -ahead-1 ); const uint8_t cur_byte = Mf_peek( encoder->matchfinder, -ahead ); @@ -605,11 +642,11 @@ bool LZe_encode_member( struct LZ_encoder * const encoder, const long long membe else { const uint8_t match_byte = Mf_peek( encoder->matchfinder, -ahead-rep_distances[0]-1 ); - Lie_encode_matched( &encoder->literal_encoder, &encoder->range_encoder, prev_byte, match_byte, cur_byte ); + Lie_encode_matched( &encoder->literal_encoder, &encoder->range_encoder, prev_byte, cur_byte, match_byte ); } St_set_char( &state ); } - else // match or repeated match + else /* match or repeated match */ { CRC32_update_buf( &encoder->crc_, Mf_ptr_to_current_pos( encoder->matchfinder ) - ahead, len ); LZe_mtf_reps( dis, rep_distances ); |