From baab8c4746bececfa4d7a048ed723a97405dab79 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 6 Nov 2015 12:33:49 +0100 Subject: Merging upstream version 1.3. Signed-off-by: Daniel Baumann --- encoder.h | 277 ++++++++++++++++++++++++++++++++------------------------------ 1 file changed, 144 insertions(+), 133 deletions(-) (limited to 'encoder.h') diff --git a/encoder.h b/encoder.h index e7a6481..13df958 100644 --- a/encoder.h +++ b/encoder.h @@ -1,5 +1,5 @@ /* Clzip - Data compressor based on the LZMA algorithm - Copyright (C) 2010, 2011 Antonio Diaz Diaz. + Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -142,7 +142,7 @@ enum { /* bytes to keep in buffer before dictionary */ num_prev_positions3 = 1 << 18, num_prev_positions2 = 1 << 16, num_prev_positions = num_prev_positions4 + num_prev_positions3 + - num_prev_positions2 }; + num_prev_positions2 }; struct Matchfinder { @@ -150,43 +150,44 @@ struct Matchfinder uint8_t * buffer; /* input buffer */ int32_t * prev_positions; /* last seen position of key */ int32_t * prev_pos_tree; - int dictionary_size_; /* bytes to keep in buffer before pos */ + int dictionary_size; /* bytes to keep in buffer before pos */ int buffer_size; int pos; /* current pos in buffer */ int cyclic_pos; /* current pos in dictionary */ int stream_pos; /* first byte not yet read from file */ int pos_limit; /* when reached, a new block must be read */ - int match_len_limit_; + int match_len_limit; int cycles; int infd; /* input file descriptor */ bool at_stream_end; /* stream_pos shows real end of file */ }; bool Mf_read_block( struct Matchfinder * const mf ); +void Mf_normalize_pos( struct Matchfinder * const mf ); -void Mf_init( struct Matchfinder * const mf, +bool Mf_init( struct Matchfinder * const mf, const int dict_size, const int len_limit, const int ifd ); static inline void Mf_free( struct Matchfinder * const mf ) { free( mf->prev_pos_tree ); mf->prev_pos_tree = 0; - free( mf->prev_positions ); mf->prev_positions = 0; free( mf->buffer ); mf->buffer = 0; + free( mf->prev_positions ); mf->prev_positions = 0; } -static inline uint8_t Mf_peek( struct Matchfinder * const mf, const int i ) +static inline uint8_t Mf_peek( const struct Matchfinder * const mf, const int i ) { return mf->buffer[mf->pos+i]; } -static inline int Mf_available_bytes( struct Matchfinder * const mf ) + +static inline int Mf_available_bytes( const struct Matchfinder * const mf ) { return mf->stream_pos - mf->pos; } -static inline long long Mf_data_position( struct Matchfinder * const mf ) + +static inline long long Mf_data_position( const struct Matchfinder * const mf ) { return mf->partial_data_pos + mf->pos; } -static inline int Mf_dictionary_size( struct Matchfinder * const mf ) - { return mf->dictionary_size_; } -static inline bool Mf_finished( struct Matchfinder * const mf ) + +static inline bool Mf_finished( const struct Matchfinder * const mf ) { return mf->at_stream_end && mf->pos >= mf->stream_pos; } -static inline int Mf_match_len_limit( struct Matchfinder * const mf ) - { return mf->match_len_limit_; } -static inline const uint8_t * Mf_ptr_to_current_pos( struct Matchfinder * const mf ) + +static inline const uint8_t * Mf_ptr_to_current_pos( const struct Matchfinder * const mf ) { return mf->buffer + mf->pos; } static inline bool Mf_dec_pos( struct Matchfinder * const mf, @@ -195,25 +196,30 @@ static inline bool Mf_dec_pos( struct Matchfinder * const mf, if( ahead < 0 || mf->pos < ahead ) return false; mf->pos -= ahead; mf->cyclic_pos -= ahead; - if( mf->cyclic_pos < 0 ) mf->cyclic_pos += mf->dictionary_size_; + if( mf->cyclic_pos < 0 ) mf->cyclic_pos += mf->dictionary_size; return true; } -static inline int Mf_true_match_len( struct Matchfinder * const mf, +static inline int Mf_true_match_len( const struct Matchfinder * const mf, const int index, const int distance, int len_limit ) { - const uint8_t * const data = mf->buffer + mf->pos + index - distance; + const uint8_t * const data = mf->buffer + mf->pos + index; int i = 0; if( index + len_limit > Mf_available_bytes( mf ) ) len_limit = Mf_available_bytes( mf ) - index; - while( i < len_limit && data[i] == data[i+distance] ) ++i; + while( i < len_limit && data[i-distance] == data[i] ) ++i; return i; } +static inline void Mf_move_pos( struct Matchfinder * const mf ) + { + if( ++mf->cyclic_pos >= mf->dictionary_size ) mf->cyclic_pos = 0; + if( ++mf->pos >= mf->pos_limit ) Mf_normalize_pos( mf ); + } + void Mf_reset( struct Matchfinder * const mf ); -void Mf_move_pos( struct Matchfinder * const mf ); int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances ); @@ -231,89 +237,85 @@ struct Range_encoder uint8_t cache; }; -void Re_flush_data( struct Range_encoder * const range_encoder ); +void Re_flush_data( struct Range_encoder * const renc ); -static inline void Re_put_byte( struct Range_encoder * const range_encoder, +static inline void Re_put_byte( struct Range_encoder * const renc, const uint8_t b ) { - range_encoder->buffer[range_encoder->pos] = b; - if( ++range_encoder->pos >= re_buffer_size ) Re_flush_data( range_encoder ); + renc->buffer[renc->pos] = b; + if( ++renc->pos >= re_buffer_size ) Re_flush_data( renc ); } -static inline void Re_shift_low( struct Range_encoder * const range_encoder ) +static inline void Re_shift_low( struct Range_encoder * const renc ) { - const uint32_t carry = range_encoder->low >> 32; - if( range_encoder->low < 0xFF000000U || carry == 1 ) + const bool carry = ( renc->low > 0xFFFFFFFFU ); + if( carry || renc->low < 0xFF000000U ) { - Re_put_byte( range_encoder, range_encoder->cache + carry ); - for( ; range_encoder->ff_count > 0; --range_encoder->ff_count ) - Re_put_byte( range_encoder, 0xFF + carry ); - range_encoder->cache = range_encoder->low >> 24; + Re_put_byte( renc, renc->cache + carry ); + for( ; renc->ff_count > 0; --renc->ff_count ) + Re_put_byte( renc, 0xFF + carry ); + renc->cache = renc->low >> 24; } - else ++range_encoder->ff_count; - range_encoder->low = ( range_encoder->low & 0x00FFFFFFU ) << 8; + else ++renc->ff_count; + renc->low = ( renc->low & 0x00FFFFFFU ) << 8; } -static inline void Re_init( struct Range_encoder * const range_encoder, - const int ofd ) - { - range_encoder->low = 0; - range_encoder->partial_member_pos = 0; - range_encoder->buffer = (uint8_t *)malloc( re_buffer_size ); - if( !range_encoder->buffer ) - { - show_error( "Not enough memory. Try a smaller dictionary size.", 0, false ); - cleanup_and_fail( 1 ); - } - range_encoder->pos = 0; - range_encoder->range = 0xFFFFFFFFU; - range_encoder->ff_count = 0; - range_encoder->outfd = ofd; - range_encoder->cache = 0; +static inline bool Re_init( struct Range_encoder * const renc, const int ofd ) + { + renc->low = 0; + renc->partial_member_pos = 0; + renc->buffer = (uint8_t *)malloc( re_buffer_size ); + if( !renc->buffer ) return false; + renc->pos = 0; + renc->range = 0xFFFFFFFFU; + renc->ff_count = 0; + renc->outfd = ofd; + renc->cache = 0; + return true; } -static inline void Re_free( struct Range_encoder * const range_encoder ) - { free( range_encoder->buffer ); range_encoder->buffer = 0; } +static inline void Re_free( struct Range_encoder * const renc ) + { free( renc->buffer ); } -static inline long long Re_member_position( struct Range_encoder * const range_encoder ) - { return range_encoder->partial_member_pos + range_encoder->pos + range_encoder->ff_count; } +static inline long long Re_member_position( const struct Range_encoder * const renc ) + { return renc->partial_member_pos + renc->pos + renc->ff_count; } -static inline void Re_flush( struct Range_encoder * const range_encoder ) - { int i; for( i = 0; i < 5; ++i ) Re_shift_low( range_encoder ); } +static inline void Re_flush( struct Range_encoder * const renc ) + { int i; for( i = 0; i < 5; ++i ) Re_shift_low( renc ); } -static inline void Re_encode( struct Range_encoder * const range_encoder, +static inline void Re_encode( struct Range_encoder * const renc, const int symbol, const int num_bits ) { int i; for( i = num_bits - 1; i >= 0; --i ) { - range_encoder->range >>= 1; - if( (symbol >> i) & 1 ) range_encoder->low += range_encoder->range; - if( range_encoder->range <= 0x00FFFFFFU ) - { range_encoder->range <<= 8; Re_shift_low( range_encoder ); } + renc->range >>= 1; + if( (symbol >> i) & 1 ) renc->low += renc->range; + if( renc->range <= 0x00FFFFFFU ) + { renc->range <<= 8; Re_shift_low( renc ); } } } -static inline void Re_encode_bit( struct Range_encoder * const range_encoder, +static inline void Re_encode_bit( struct Range_encoder * const renc, Bit_model * const probability, const int bit ) { - const uint32_t bound = ( range_encoder->range >> bit_model_total_bits ) * *probability; + const uint32_t bound = ( renc->range >> bit_model_total_bits ) * *probability; if( !bit ) { - range_encoder->range = bound; + renc->range = bound; *probability += (bit_model_total - *probability) >> bit_model_move_bits; } else { - range_encoder->low += bound; - range_encoder->range -= bound; + renc->low += bound; + renc->range -= bound; *probability -= *probability >> bit_model_move_bits; } - if( range_encoder->range <= 0x00FFFFFFU ) - { range_encoder->range <<= 8; Re_shift_low( range_encoder ); } + if( renc->range <= 0x00FFFFFFU ) + { renc->range <<= 8; Re_shift_low( renc ); } } -static inline void Re_encode_tree( struct Range_encoder * const range_encoder, +static inline void Re_encode_tree( struct Range_encoder * const renc, Bit_model bm[], const int symbol, const int num_bits ) { int mask = ( 1 << ( num_bits - 1 ) ); @@ -322,13 +324,13 @@ static inline void Re_encode_tree( struct Range_encoder * const range_encoder, for( i = num_bits; i > 0; --i, mask >>= 1 ) { const int bit = ( symbol & mask ); - Re_encode_bit( range_encoder, &bm[model], bit ); + Re_encode_bit( renc, &bm[model], bit ); model <<= 1; if( bit ) model |= 1; } } -static inline void Re_encode_tree_reversed( struct Range_encoder * const range_encoder, +static inline void Re_encode_tree_reversed( struct Range_encoder * const renc, Bit_model bm[], int symbol, const int num_bits ) { int model = 1; @@ -336,13 +338,13 @@ static inline void Re_encode_tree_reversed( struct Range_encoder * const range_e for( i = num_bits; i > 0; --i ) { const int bit = symbol & 1; - Re_encode_bit( range_encoder, &bm[model], bit ); + Re_encode_bit( renc, &bm[model], bit ); model = ( model << 1 ) | bit; symbol >>= 1; } } -static inline void Re_encode_matched( struct Range_encoder * const range_encoder, +static inline void Re_encode_matched( struct Range_encoder * const renc, Bit_model bm[], int symbol, int match_byte ) { int model = 1; @@ -351,14 +353,14 @@ static inline void Re_encode_matched( struct Range_encoder * const range_encoder { const int match_bit = ( match_byte >> i ) & 1; int bit = ( symbol >> i ) & 1; - Re_encode_bit( range_encoder, &bm[(match_bit<<8)+model+0x100], bit ); + Re_encode_bit( renc, &bm[(match_bit<<8)+model+0x100], bit ); model = ( model << 1 ) | bit; if( match_bit != bit ) { while( --i >= 0 ) { bit = ( symbol >> i ) & 1; - Re_encode_bit( range_encoder, &bm[model], bit ); + Re_encode_bit( renc, &bm[model], bit ); model = ( model << 1 ) | bit; } break; @@ -393,11 +395,11 @@ static inline void Lee_update_prices( struct Len_encoder * const len_encoder, pps[len] = tmp + price0( len_encoder->choice2 ) + price_symbol( len_encoder->bm_mid[pos_state], len - len_low_symbols, len_mid_bits ); for( ; len < len_encoder->len_symbols; ++len ) - /* using 4 slots per value makes "Lee_price" faster */ - len_encoder->prices[3][len] = len_encoder->prices[2][len] = - len_encoder->prices[1][len] = len_encoder->prices[0][len] = - tmp + price1( len_encoder->choice2 ) + - price_symbol( len_encoder->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); + /* using 4 slots per value makes "Lee_price" faster */ + len_encoder->prices[3][len] = len_encoder->prices[2][len] = + len_encoder->prices[1][len] = len_encoder->prices[0][len] = + tmp + price1( len_encoder->choice2 ) + + price_symbol( len_encoder->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); len_encoder->counters[pos_state] = len_encoder->len_symbols; } @@ -420,10 +422,10 @@ static inline void Lee_init( struct Len_encoder * const len_encoder, } void Lee_encode( struct Len_encoder * const len_encoder, - struct Range_encoder * const range_encoder, int symbol, - const int pos_state ); + struct Range_encoder * const renc, + int symbol, const int pos_state ); -static inline int Lee_price( struct Len_encoder * const len_encoder, +static inline int Lee_price( const struct Len_encoder * const len_encoder, const int symbol, const int pos_state ) { return len_encoder->prices[pos_state][symbol - min_match_len]; } @@ -433,34 +435,38 @@ struct Literal_encoder Bit_model bm_literal[1<> ( 8 - literal_context_bits ) ); } - -static inline void Lie_init( struct Literal_encoder * const literal_encoder ) +static inline void Lie_init( struct Literal_encoder * const lienc ) { int i, j; for( i = 0; i < 1<bm_literal[i][j] ); + Bm_init( &lienc->bm_literal[i][j] ); } -static inline void Lie_encode( struct Literal_encoder * const literal_encoder, - struct Range_encoder * const range_encoder, +static inline int Lie_state( const uint8_t prev_byte ) + { return ( prev_byte >> ( 8 - literal_context_bits ) ); } + +static inline void Lie_encode( struct Literal_encoder * const lienc, + struct Range_encoder * const renc, uint8_t prev_byte, uint8_t symbol ) - { Re_encode_tree( range_encoder, literal_encoder->bm_literal[Lie_state(prev_byte)], symbol, 8 ); } + { Re_encode_tree( renc, lienc->bm_literal[Lie_state(prev_byte)], symbol, 8 ); } -static inline void Lie_encode_matched( struct Literal_encoder * const literal_encoder, - struct Range_encoder * const range_encoder, - uint8_t prev_byte, uint8_t symbol, uint8_t match_byte ) - { Re_encode_matched( range_encoder, literal_encoder->bm_literal[Lie_state(prev_byte)], symbol, match_byte ); } +static inline void Lie_encode_matched( struct Literal_encoder * const lienc, + struct Range_encoder * const renc, + uint8_t prev_byte, uint8_t symbol, + uint8_t match_byte ) + { Re_encode_matched( renc, lienc->bm_literal[Lie_state(prev_byte)], + symbol, match_byte ); } -static inline int Lie_price_symbol( struct Literal_encoder * const literal_encoder, +static inline int Lie_price_symbol( const struct Literal_encoder * const lienc, uint8_t prev_byte, uint8_t symbol ) - { return price_symbol( literal_encoder->bm_literal[Lie_state(prev_byte)], symbol, 8 ); } + { return price_symbol( lienc->bm_literal[Lie_state(prev_byte)], symbol, 8 ); } -static inline int Lie_price_matched( struct Literal_encoder * const literal_encoder, - uint8_t prev_byte, uint8_t symbol, uint8_t match_byte ) - { return price_matched( literal_encoder->bm_literal[Lie_state(prev_byte)], symbol, match_byte ); } +static inline int Lie_price_matched( const struct Literal_encoder * const lienc, + uint8_t prev_byte, uint8_t symbol, + uint8_t match_byte ) + { return price_matched( lienc->bm_literal[Lie_state(prev_byte)], + symbol, match_byte ); } enum { infinite_price = 0x0FFFFFFF, @@ -487,7 +493,7 @@ static inline void Tr_update( struct Trial * const trial, struct LZ_encoder { int longest_match_found; - uint32_t crc_; + uint32_t crc; Bit_model bm_match[states][pos_states]; Bit_model bm_rep[states]; @@ -515,11 +521,20 @@ struct LZ_encoder int align_price_count; }; +void LZe_full_flush( struct LZ_encoder * const encoder, const State state ); + void LZe_fill_align_prices( struct LZ_encoder * const encoder ); void LZe_fill_distance_prices( struct LZ_encoder * const encoder ); -static inline uint32_t LZe_crc( struct LZ_encoder * const encoder ) - { return encoder->crc_ ^ 0xFFFFFFFFU; } +bool LZe_init( struct LZ_encoder * const encoder, + struct Matchfinder * const mf, + const File_header header, const int outfd ); + +static inline void LZe_free( struct LZ_encoder * const encoder ) + { Re_free( &encoder->range_encoder ); } + +static inline uint32_t LZe_crc( const struct LZ_encoder * const encoder ) + { return encoder->crc ^ 0xFFFFFFFFU; } /* move-to-front dis in/into reps */ static inline void LZe_mtf_reps( const int dis, int reps[num_rep_distances] ) @@ -538,13 +553,15 @@ static inline void LZe_mtf_reps( const int dis, int reps[num_rep_distances] ) } } -static inline int LZe_price_rep_len1( struct LZ_encoder * const encoder, +static inline int LZe_price_rep_len1( const struct LZ_encoder * const encoder, const State state, const int pos_state ) { - return price0( encoder->bm_rep0[state] ) + price0( encoder->bm_len[state][pos_state] ); + return price0( encoder->bm_rep0[state] ) + + price0( encoder->bm_len[state][pos_state] ); } -static inline int LZe_price_rep( struct LZ_encoder * const encoder, const int rep, +static inline int LZe_price_rep( const struct LZ_encoder * const encoder, + const int rep, const State state, const int pos_state ) { int price; @@ -561,7 +578,7 @@ static inline int LZe_price_rep( struct LZ_encoder * const encoder, const int re return price; } -static inline int LZe_price_dis( struct LZ_encoder * const encoder, +static inline int LZe_price_dis( const struct LZ_encoder * const encoder, const int dis, const int dis_state ) { if( dis < modeled_distances ) @@ -571,7 +588,7 @@ static inline int LZe_price_dis( struct LZ_encoder * const encoder, encoder->align_prices[dis & (dis_align_size - 1)]; } -static inline int LZe_price_pair( struct LZ_encoder * const encoder, +static inline int LZe_price_pair( const struct LZ_encoder * const encoder, const int dis, const int len, const int pos_state ) { @@ -598,12 +615,15 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder, const uint32_t direct_dis = dis - base; if( dis_slot < end_dis_model ) - Re_encode_tree_reversed( &encoder->range_encoder, encoder->bm_dis + base - dis_slot, - direct_dis, direct_bits ); + Re_encode_tree_reversed( &encoder->range_encoder, + encoder->bm_dis + base - dis_slot, + direct_dis, direct_bits ); else { - Re_encode( &encoder->range_encoder, direct_dis >> dis_align_bits, direct_bits - dis_align_bits ); - Re_encode_tree_reversed( &encoder->range_encoder, encoder->bm_align, direct_dis, dis_align_bits ); + Re_encode( &encoder->range_encoder, direct_dis >> dis_align_bits, + direct_bits - dis_align_bits ); + Re_encode_tree_reversed( &encoder->range_encoder, encoder->bm_align, + direct_dis, dis_align_bits ); if( --encoder->align_price_count <= 0 ) LZe_fill_align_prices( encoder ); } } @@ -611,19 +631,21 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder, static inline int LZe_read_match_distances( struct LZ_encoder * const encoder ) { - int len = Mf_longest_match_len( encoder->matchfinder, encoder->match_distances ); - if( len == Mf_match_len_limit( encoder->matchfinder ) ) - len += Mf_true_match_len( encoder->matchfinder, len, encoder->match_distances[len] + 1, max_match_len - len ); + int len = Mf_longest_match_len( encoder->matchfinder, + encoder->match_distances ); + if( len == encoder->matchfinder->match_len_limit && len < max_match_len ) + len += Mf_true_match_len( encoder->matchfinder, len, + encoder->match_distances[len] + 1, + max_match_len - len ); return len; } -static inline void LZe_move_pos( struct LZ_encoder * const encoder, - int n, bool skip ) +static inline void LZe_move_pos( struct LZ_encoder * const encoder, int n ) { + if( --n >= 0 ) Mf_move_pos( encoder->matchfinder ); while( --n >= 0 ) { - if( skip ) skip = false; - else Mf_longest_match_len( encoder->matchfinder, 0 ); + Mf_longest_match_len( encoder->matchfinder, 0 ); Mf_move_pos( encoder->matchfinder ); } } @@ -642,19 +664,8 @@ static inline void LZe_backward( struct LZ_encoder * const encoder, int cur ) } int LZe_sequence_optimizer( struct LZ_encoder * const encoder, - const int reps[num_rep_distances], const State state ); - -void LZe_full_flush( struct LZ_encoder * const encoder, const State state ); - -void LZe_init( struct LZ_encoder * const encoder, struct Matchfinder * const mf, - const File_header header, const int outfd ); - -static inline void LZe_free( struct LZ_encoder * const encoder ) - { - Re_free( &encoder->range_encoder ); - } - -bool LZe_encode_member( struct LZ_encoder * const encoder, const long long member_size ); + const int reps[num_rep_distances], + const State state ); -static inline long long LZe_member_position( struct LZ_encoder * const encoder ) - { return Re_member_position( &encoder->range_encoder ); } +bool LZe_encode_member( struct LZ_encoder * const encoder, + const long long member_size ); -- cgit v1.2.3