summaryrefslogtreecommitdiffstats
path: root/encoder.cc
diff options
context:
space:
mode:
Diffstat (limited to 'encoder.cc')
-rw-r--r--encoder.cc166
1 files changed, 83 insertions, 83 deletions
diff --git a/encoder.cc b/encoder.cc
index 79dd5cf..67e9d95 100644
--- a/encoder.cc
+++ b/encoder.cc
@@ -1,5 +1,5 @@
/* Lzip - Data compressor based on the LZMA algorithm
- Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz.
+ Copyright (C) 2008, 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -33,7 +33,7 @@ Dis_slots dis_slots;
Prob_prices prob_prices;
-bool Matchfinder::read_block()
+bool Matchfinder_base::read_block()
{
if( !at_stream_end && stream_pos < buffer_size )
{
@@ -41,26 +41,51 @@ bool Matchfinder::read_block()
const int rd = readblock( infd, buffer + stream_pos, size );
stream_pos += rd;
if( rd != size && errno ) throw Error( "Read error" );
- at_stream_end = ( rd < size );
+ if( rd < size ) { at_stream_end = true; pos_limit = buffer_size; }
}
return pos < stream_pos;
}
-Matchfinder::Matchfinder( const int dict_size, const int len_limit,
- const int ifd )
+void Matchfinder_base::normalize_pos()
+ {
+ if( pos > stream_pos )
+ internal_error( "pos > stream_pos in Matchfinder_base::normalize_pos" );
+ if( !at_stream_end )
+ {
+ const int offset = pos - dictionary_size_ - before_size;
+ const int size = stream_pos - offset;
+ std::memmove( buffer, buffer + offset, size );
+ partial_data_pos += offset;
+ pos -= offset;
+ stream_pos -= offset;
+ for( int i = 0; i < num_prev_positions; ++i )
+ if( prev_positions[i] >= 0 ) prev_positions[i] -= offset;
+ for( int i = 0; i < pos_array_size; ++i )
+ if( pos_array[i] >= 0 ) pos_array[i] -= offset;
+ read_block();
+ }
+ }
+
+
+Matchfinder_base::Matchfinder_base( const int before, const int dict_size,
+ const int dict_factor, const int len_limit,
+ const int num_prev_pos, const int ifd,
+ const int pos_array_factor )
:
partial_data_pos( 0 ),
- prev_positions( new int32_t[num_prev_positions] ),
+ prev_positions( new int32_t[num_prev_pos] ),
+ num_prev_positions( num_prev_pos ),
+ before_size( before ),
+ match_len_limit_( len_limit ),
+ infd( ifd ),
pos( 0 ),
cyclic_pos( 0 ),
stream_pos( 0 ),
- match_len_limit_( len_limit ),
- cycles( ( len_limit < max_match_len ) ? 16 + ( len_limit / 2 ) : 256 ),
- infd( ifd ),
at_stream_end( false )
{
- const int buffer_size_limit = ( 2 * dict_size ) + before_size + after_size;
+ const int buffer_size_limit =
+ ( dict_size * dict_factor ) + before_size + after_size;
buffer_size = std::max( 65536, dict_size );
buffer = (uint8_t *)std::malloc( buffer_size );
if( !buffer ) { delete[] prev_positions; throw std::bad_alloc(); }
@@ -78,14 +103,15 @@ Matchfinder::Matchfinder( const int dict_size, const int len_limit,
else dictionary_size_ = dict_size;
pos_limit = buffer_size;
if( !at_stream_end ) pos_limit -= after_size;
- prev_pos_tree = new( std::nothrow ) int32_t[2*dictionary_size_];
- if( !prev_pos_tree )
- { std::free( buffer ); delete[] prev_positions; throw std::bad_alloc(); }
for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1;
+ pos_array_size = pos_array_factor * dictionary_size_;
+ pos_array = new( std::nothrow ) int32_t[pos_array_size];
+ if( !pos_array )
+ { std::free( buffer ); delete[] prev_positions; throw std::bad_alloc(); }
}
-void Matchfinder::reset()
+void Matchfinder_base::reset()
{
const int size = stream_pos - pos;
if( size > 0 ) std::memmove( buffer, buffer + pos, size );
@@ -98,28 +124,13 @@ void Matchfinder::reset()
}
-void Matchfinder::move_pos()
+bool Matchfinder::dec_pos( const int ahead ) throw()
{
- if( ++cyclic_pos >= dictionary_size_ ) cyclic_pos = 0;
- if( ++pos >= pos_limit )
- {
- if( pos > stream_pos )
- internal_error( "pos > stream_pos in Matchfinder::move_pos" );
- if( !at_stream_end )
- {
- const int offset = pos - dictionary_size_ - before_size;
- const int size = stream_pos - offset;
- std::memmove( buffer, buffer + offset, size );
- partial_data_pos += offset;
- pos -= offset;
- stream_pos -= offset;
- for( int i = 0; i < num_prev_positions; ++i )
- if( prev_positions[i] >= 0 ) prev_positions[i] -= offset;
- for( int i = 0; i < 2 * dictionary_size_; ++i )
- if( prev_pos_tree[i] >= 0 ) prev_pos_tree[i] -= offset;
- read_block();
- }
- }
+ if( ahead < 0 || pos < ahead ) return false;
+ pos -= ahead;
+ cyclic_pos -= ahead;
+ if( cyclic_pos < 0 ) cyclic_pos += dictionary_size_;
+ return true;
}
@@ -162,7 +173,7 @@ int Matchfinder::longest_match_len( int * const distances ) throw()
int newpos = prev_positions[key4];
prev_positions[key4] = pos;
- int32_t * ptr0 = prev_pos_tree + ( cyclic_pos << 1 );
+ int32_t * ptr0 = pos_array + ( cyclic_pos << 1 );
int32_t * ptr1 = ptr0 + 1;
int len = 0, len0 = 0, len1 = 0;
@@ -175,7 +186,7 @@ int Matchfinder::longest_match_len( int * const distances ) throw()
const int delta = pos - newpos;
if( distances ) while( maxlen < len ) distances[++maxlen] = delta - 1;
- int32_t * const newptr = prev_pos_tree +
+ int32_t * const newptr = pos_array +
( ( cyclic_pos - delta +
( ( cyclic_pos >= delta ) ? 0 : dictionary_size_ ) ) << 1 );
@@ -251,6 +262,25 @@ void Len_encoder::encode( Range_encoder & range_encoder, int symbol,
}
+ // End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len)
+void LZ_encoder_base::full_flush( const long long data_position,
+ const State & state )
+ {
+ const int pos_state = data_position & pos_state_mask;
+ range_encoder.encode_bit( bm_match[state()][pos_state], 1 );
+ range_encoder.encode_bit( bm_rep[state()], 0 );
+ encode_pair( 0xFFFFFFFFU, min_match_len, pos_state );
+ range_encoder.flush();
+ File_trailer trailer;
+ trailer.data_crc( crc() );
+ trailer.data_size( data_position );
+ trailer.member_size( range_encoder.member_position() + File_trailer::size() );
+ for( int i = 0; i < File_trailer::size(); ++i )
+ range_encoder.put_byte( trailer.data[i] );
+ range_encoder.flush_data();
+ }
+
+
void LZ_encoder::fill_align_prices() throw()
{
for( int i = 0; i < dis_align_size; ++i )
@@ -300,7 +330,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
const State & state )
{
int main_len;
- if( longest_match_found > 0 ) // from previous call
+ if( longest_match_found > 0 ) // from previous call
{
main_len = longest_match_found;
longest_match_found = 0;
@@ -318,7 +348,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
{
trials[0].dis = rep_index;
trials[0].price = replens[rep_index];
- move_pos( replens[rep_index], true );
+ move_pos( replens[rep_index] );
return replens[rep_index];
}
@@ -327,7 +357,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
trials[0].dis = match_distances[matchfinder.match_len_limit()] +
num_rep_distances;
trials[0].price = main_len;
- move_pos( main_len, true );
+ move_pos( main_len );
return main_len;
}
@@ -415,6 +445,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
for( int i = 0; i < num_rep_distances; ++i )
cur_trial.reps[i] = trials[prev_index].reps[i];
+
if( prev_index == cur - 1 )
{
if( cur_trial.dis == 0 ) cur_trial.state.set_short_rep();
@@ -507,42 +538,6 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
}
- // End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len)
-void LZ_encoder::full_flush( const State & state )
- {
- const int pos_state = matchfinder.data_position() & pos_state_mask;
- range_encoder.encode_bit( bm_match[state()][pos_state], 1 );
- range_encoder.encode_bit( bm_rep[state()], 0 );
- encode_pair( 0xFFFFFFFFU, min_match_len, pos_state );
- range_encoder.flush();
- File_trailer trailer;
- trailer.data_crc( crc() );
- trailer.data_size( matchfinder.data_position() );
- trailer.member_size( range_encoder.member_position() + File_trailer::size() );
- for( int i = 0; i < File_trailer::size(); ++i )
- range_encoder.put_byte( trailer.data[i] );
- range_encoder.flush_data();
- }
-
-
-LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header,
- const int outfd )
- :
- longest_match_found( 0 ),
- crc_( 0xFFFFFFFFU ),
- matchfinder( mf ),
- range_encoder( outfd ),
- len_encoder( matchfinder.match_len_limit() ),
- rep_match_len_encoder( matchfinder.match_len_limit() ),
- num_dis_slots( 2 * real_bits( matchfinder.dictionary_size() - 1 ) )
- {
- fill_align_prices();
-
- for( int i = 0; i < File_header::size; ++i )
- range_encoder.put_byte( header.data[i] );
- }
-
-
bool LZ_encoder::encode_member( const long long member_size )
{
const long long member_size_limit =
@@ -564,17 +559,17 @@ bool LZ_encoder::encode_member( const long long member_size )
range_encoder.encode_bit( bm_match[state()][0], 0 );
literal_encoder.encode( range_encoder, prev_byte, cur_byte );
crc32.update( crc_, cur_byte );
- move_pos( 1 );
+ matchfinder.longest_match_len();
+ matchfinder.move_pos();
}
- while( true )
+ while( !matchfinder.finished() )
{
- if( matchfinder.finished() ) { full_flush( state ); return true; }
if( fill_counter <= 0 )
{ fill_distance_prices(); fill_counter = fill_count; }
int ahead = sequence_optimizer( rep_distances, state );
- if( ahead <= 0 ) return false;
+ if( ahead <= 0 ) return false; // can't happen
fill_counter -= ahead;
for( int i = 0; ; )
@@ -586,7 +581,7 @@ bool LZ_encoder::encode_member( const long long member_size )
bool bit = ( dis < 0 && len == 1 );
range_encoder.encode_bit( bm_match[state()][pos_state], !bit );
- if( bit ) // literal byte
+ if( bit ) // literal byte
{
const uint8_t prev_byte = matchfinder[-ahead-1];
const uint8_t cur_byte = matchfinder[-ahead];
@@ -601,7 +596,7 @@ bool LZ_encoder::encode_member( const long long member_size )
}
state.set_char();
}
- else // match or repeated match
+ else // match or repeated match
{
crc32.update( crc_, matchfinder.ptr_to_current_pos() - ahead, len );
mtf_reps( dis, rep_distances );
@@ -629,6 +624,9 @@ bool LZ_encoder::encode_member( const long long member_size )
else
{
encode_pair( dis - num_rep_distances, len, pos_state );
+ if( dis_slots[dis - num_rep_distances] >= end_dis_model &&
+ --align_price_count <= 0 )
+ fill_align_prices();
state.set_match();
}
}
@@ -636,10 +634,12 @@ bool LZ_encoder::encode_member( const long long member_size )
if( range_encoder.member_position() >= member_size_limit )
{
if( !matchfinder.dec_pos( ahead ) ) return false;
- full_flush( state );
+ full_flush( matchfinder.data_position(), state );
return true;
}
if( ahead <= 0 ) break;
}
}
+ full_flush( matchfinder.data_position(), state );
+ return true;
}