From e9eff495255f6597233d54ad40ac1efe3f4d416f Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 10:34:27 +0100 Subject: Merging upstream version 1.14. Signed-off-by: Daniel Baumann --- fast_encoder.cc | 238 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 119 insertions(+), 119 deletions(-) (limited to 'fast_encoder.cc') diff --git a/fast_encoder.cc b/fast_encoder.cc index 2d99b04..954e671 100644 --- a/fast_encoder.cc +++ b/fast_encoder.cc @@ -1,5 +1,5 @@ /* Lzip - Data compressor based on the LZMA algorithm - Copyright (C) 2008, 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -39,7 +39,7 @@ int Fmatchfinder::longest_match_len( int * const distance ) if( len_limit < 4 ) return 0; } - key4 = ( ( key4 << 4 ) ^ buffer[pos+3] ) & ( num_prev_pos - 1 ); + key4 = ( ( key4 << 4 ) ^ buffer[pos+3] ) & key4_mask; int newpos = prev_positions[key4]; prev_positions[key4] = pos; @@ -49,18 +49,18 @@ int Fmatchfinder::longest_match_len( int * const distance ) for( int count = 4; ; ) { - if( newpos < (pos - dictionary_size_ + 1) || newpos < 0 || --count < 0 ) + const int delta = pos - newpos; + if( delta > dictionary_size_ || newpos < 0 || --count < 0 ) { *ptr0 = -1; break; } int len = 0; if( buffer[maxlen+newpos] == buffer[maxlen+pos] ) while( len < len_limit && buffer[len+newpos] == buffer[len+pos] ) ++len; - const int delta = pos - newpos; if( maxlen < len ) { maxlen = len; *distance = delta - 1; } int32_t * const newptr = pos_array + ( cyclic_pos - delta + - ( ( cyclic_pos >= delta ) ? 0 : dictionary_size_ ) ); + ( ( cyclic_pos >= delta ) ? 0 : dictionary_size_ + 1 ) ); if( len < len_limit ) { @@ -80,133 +80,48 @@ int Fmatchfinder::longest_match_len( int * const distance ) } -void Fmatchfinder::longest_match_len() - { - int len_limit = match_len_limit_; - if( len_limit > available_bytes() ) - { - len_limit = available_bytes(); - if( len_limit < 4 ) return; - } - - key4 = ( ( key4 << 4 ) ^ buffer[pos+3] ) & ( num_prev_pos - 1 ); - - const int newpos = prev_positions[key4]; - prev_positions[key4] = pos; - - int32_t * const ptr0 = pos_array + cyclic_pos; - - if( newpos < (pos - dictionary_size_ + 1) || newpos < 0 ) - *ptr0 = -1; - else if( buffer[len_limit-1+newpos] != buffer[len_limit-1+pos] || - std::memcmp( buffer + newpos, buffer + pos, len_limit - 1 ) ) - *ptr0 = newpos; - else - { - int idx = cyclic_pos - pos + newpos; - if( idx < 0 ) idx += dictionary_size_; - *ptr0 = pos_array[idx]; - } - } - - -void FLZ_encoder::sequence_optimizer( int reps[num_rep_distances], - State & state ) +void Fmatchfinder::longest_match_len( int n ) { - int match_distance; - const int main_len = fmatchfinder.longest_match_len( &match_distance ); - const int pos_state = fmatchfinder.data_position() & pos_state_mask; - int dis = 0; - int len = 0; - - for( int i = 0; i < num_rep_distances; ++i ) - { - const int tlen = - fmatchfinder.true_match_len( 0, reps[i] + 1, max_match_len ); - if( tlen > len ) { len = tlen; dis = i; } - } - if( len > min_match_len && len + 4 > main_len ) + while( --n >= 0 ) { - crc32.update( crc_, fmatchfinder.ptr_to_current_pos(), len ); - mtf_reps( dis, reps ); - range_encoder.encode_bit( bm_match[state()][pos_state], 1 ); - range_encoder.encode_bit( bm_rep[state()], 1 ); - const bool bit = ( dis == 0 ); - range_encoder.encode_bit( bm_rep0[state()], !bit ); - if( bit ) - range_encoder.encode_bit( bm_len[state()][pos_state], 1 ); - else + int len_limit = match_len_limit_ - 1; // len_limit - 1 + if( len_limit >= available_bytes() ) { - range_encoder.encode_bit( bm_rep1[state()], dis > 1 ); - if( dis > 1 ) - range_encoder.encode_bit( bm_rep2[state()], dis > 2 ); + len_limit = available_bytes() - 1; + if( len_limit < 3 ) { move_pos(); continue; } } - rep_match_len_encoder.encode( range_encoder, len, pos_state ); - state.set_rep(); - move_pos( len ); - return; - } - if( main_len > min_match_len || - ( main_len == min_match_len && match_distance < modeled_distances ) ) - { - crc32.update( crc_, fmatchfinder.ptr_to_current_pos(), main_len ); - dis = match_distance; - mtf_reps( dis + num_rep_distances, reps ); - range_encoder.encode_bit( bm_match[state()][pos_state], 1 ); - range_encoder.encode_bit( bm_rep[state()], 0 ); - encode_pair( dis, main_len, pos_state ); - state.set_match(); - move_pos( main_len ); - return; - } + key4 = ( ( key4 << 4 ) ^ buffer[pos+3] ) & key4_mask; - const uint8_t prev_byte = fmatchfinder[-1]; - const uint8_t cur_byte = fmatchfinder[0]; - const uint8_t match_byte = fmatchfinder[-reps[0]-1]; - crc32.update( crc_, cur_byte ); - fmatchfinder.move_pos(); + const int newpos = prev_positions[key4]; + prev_positions[key4] = pos; - if( match_byte == cur_byte ) - { - int price = price0( bm_match[state()][pos_state] ); - if( state.is_char() ) - price += literal_encoder.price_symbol( prev_byte, cur_byte ); + int32_t * const ptr0 = pos_array + cyclic_pos; + + if( pos - newpos > dictionary_size_ || newpos < 0 ) + *ptr0 = -1; + else if( buffer[len_limit+newpos] != buffer[len_limit+pos] || + std::memcmp( buffer + newpos, buffer + pos, len_limit ) != 0 ) + *ptr0 = newpos; else - price += literal_encoder.price_matched( prev_byte, cur_byte, match_byte ); - const int short_rep_price = price1( bm_match[state()][pos_state] ) + - price1( bm_rep[state()] ) + - price0( bm_rep0[state()] ) + - price0( bm_len[state()][pos_state] ); - if( short_rep_price < price ) { - range_encoder.encode_bit( bm_match[state()][pos_state], 1 ); - range_encoder.encode_bit( bm_rep[state()], 1 ); - range_encoder.encode_bit( bm_rep0[state()], 0 ); - range_encoder.encode_bit( bm_len[state()][pos_state], 0 ); - state.set_short_rep(); - return; + int idx = cyclic_pos - pos + newpos; + if( idx < 0 ) idx += dictionary_size_ + 1; + *ptr0 = pos_array[idx]; } + move_pos(); } - - // literal byte - range_encoder.encode_bit( bm_match[state()][pos_state], 0 ); - if( state.is_char() ) - literal_encoder.encode( range_encoder, prev_byte, cur_byte ); - else - literal_encoder.encode_matched( range_encoder, - prev_byte, cur_byte, match_byte ); - state.set_char(); } -bool FLZ_encoder::encode_member( const long long member_size ) +bool FLZ_encoder::encode_member( const unsigned long long member_size ) { - const long long member_size_limit = + const unsigned long long member_size_limit = member_size - File_trailer::size() - max_marker_size; - int rep_distances[num_rep_distances]; + int dis = 0; + int reps[num_rep_distances]; State state; - for( int i = 0; i < num_rep_distances; ++i ) rep_distances[i] = 0; + for( int i = 0; i < num_rep_distances; ++i ) reps[i] = 0; if( fmatchfinder.data_position() != 0 || range_encoder.member_position() != File_header::size ) @@ -217,15 +132,100 @@ bool FLZ_encoder::encode_member( const long long member_size ) const uint8_t prev_byte = 0; const uint8_t cur_byte = fmatchfinder[0]; range_encoder.encode_bit( bm_match[state()][0], 0 ); - literal_encoder.encode( range_encoder, prev_byte, cur_byte ); + encode_literal( prev_byte, cur_byte ); crc32.update( crc_, cur_byte ); - fmatchfinder.longest_match_len(); - fmatchfinder.move_pos(); + fmatchfinder.longest_match_len( 1 ); } while( !fmatchfinder.finished() && range_encoder.member_position() < member_size_limit ) - sequence_optimizer( rep_distances, state ); + { + int match_distance; + const int main_len = fmatchfinder.longest_match_len( &match_distance ); + const int pos_state = fmatchfinder.data_position() & pos_state_mask; + int len = 0; + + for( int i = 0; i < num_rep_distances; ++i ) + { + const int tlen = + fmatchfinder.true_match_len( 0, reps[i] + 1, max_match_len ); + if( tlen > len ) { len = tlen; dis = i; } + } + if( len > min_match_len && len + 4 > main_len ) + { + crc32.update( crc_, fmatchfinder.ptr_to_current_pos(), len ); + range_encoder.encode_bit( bm_match[state()][pos_state], 1 ); + range_encoder.encode_bit( bm_rep[state()], 1 ); + const bool bit = ( dis == 0 ); + range_encoder.encode_bit( bm_rep0[state()], !bit ); + if( bit ) + range_encoder.encode_bit( bm_len[state()][pos_state], 1 ); + else + { + const int distance = reps[dis]; + for( int i = dis; i > 0; --i ) reps[i] = reps[i-1]; + reps[0] = distance; + range_encoder.encode_bit( bm_rep1[state()], dis > 1 ); + if( dis > 1 ) + range_encoder.encode_bit( bm_rep2[state()], dis > 2 ); + } + rep_match_len_encoder.encode( range_encoder, len, pos_state ); + state.set_rep(); + move_pos( len ); + continue; + } + + if( main_len > min_match_len || + ( main_len == min_match_len && match_distance < modeled_distances ) ) + { + crc32.update( crc_, fmatchfinder.ptr_to_current_pos(), main_len ); + dis = match_distance; + range_encoder.encode_bit( bm_match[state()][pos_state], 1 ); + range_encoder.encode_bit( bm_rep[state()], 0 ); + encode_pair( dis, main_len, pos_state ); + state.set_match(); + move_pos( main_len ); + for( int i = num_rep_distances - 1; i > 0; --i ) reps[i] = reps[i-1]; + reps[0] = dis; + continue; + } + + const uint8_t prev_byte = fmatchfinder[-1]; + const uint8_t cur_byte = fmatchfinder[0]; + const uint8_t match_byte = fmatchfinder[-reps[0]-1]; + crc32.update( crc_, cur_byte ); + fmatchfinder.move_pos(); + + if( match_byte == cur_byte ) + { + int price = price0( bm_match[state()][pos_state] ); + if( state.is_char() ) + price += price_literal( prev_byte, cur_byte ); + else + price += price_matched( prev_byte, cur_byte, match_byte ); + const int short_rep_price = price1( bm_match[state()][pos_state] ) + + price1( bm_rep[state()] ) + + price0( bm_rep0[state()] ) + + price0( bm_len[state()][pos_state] ); + if( short_rep_price < price ) + { + range_encoder.encode_bit( bm_match[state()][pos_state], 1 ); + range_encoder.encode_bit( bm_rep[state()], 1 ); + range_encoder.encode_bit( bm_rep0[state()], 0 ); + range_encoder.encode_bit( bm_len[state()][pos_state], 0 ); + state.set_short_rep(); + continue; + } + } + + // literal byte + range_encoder.encode_bit( bm_match[state()][pos_state], 0 ); + if( state.is_char() ) + encode_literal( prev_byte, cur_byte ); + else + encode_matched( prev_byte, cur_byte, match_byte ); + state.set_char(); + } full_flush( fmatchfinder.data_position(), state ); return true; -- cgit v1.2.3