From 418b05fcd64815e6928cb9751285fa2d29a34361 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <mail@daniel-baumann.ch>
Date: Sat, 7 Nov 2015 10:31:21 +0100
Subject: Adding upstream version 1.13~rc2.

Signed-off-by: Daniel Baumann <mail@daniel-baumann.ch>
---
 encoder.h | 249 +++++++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 150 insertions(+), 99 deletions(-)

(limited to 'encoder.h')

diff --git a/encoder.h b/encoder.h
index 86c23c2..21c683d 100644
--- a/encoder.h
+++ b/encoder.h
@@ -1,5 +1,5 @@
 /*  Lzip - Data compressor based on the LZMA algorithm
-    Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz.
+    Copyright (C) 2008, 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -140,41 +140,43 @@ inline int price_matched( const Bit_model bm[], const int symbol,
   }
 
 
-class Matchfinder
+class Matchfinder_base
   {
-  enum { // bytes to keep in buffer before dictionary
-         before_size = max_num_trials + 1,
-         // bytes to keep in buffer after pos
-         after_size = max_match_len,
-         num_prev_positions4 = 1 << 20,
-         num_prev_positions3 = 1 << 18,
-         num_prev_positions2 = 1 << 16,
-         num_prev_positions = num_prev_positions4 + num_prev_positions3 +
-                              num_prev_positions2 };
+  Matchfinder_base( const Matchfinder_base & );	// declared as private
+  void operator=( const Matchfinder_base & );	// declared as private
+
+  bool read_block();
+  void normalize_pos();
+
+protected:
+  enum { after_size = max_match_len };	// bytes to keep in buffer after pos
 
   long long partial_data_pos;
-  uint8_t * buffer;		// input buffer
   int32_t * const prev_positions;	// last seen position of key
-  int32_t * prev_pos_tree;
-  int dictionary_size_;		// bytes to keep in buffer before pos
+  uint8_t * buffer;			// input buffer
+  int32_t * pos_array;			// may be tree or chain
+  const int num_prev_positions;
+  const int before_size;	// bytes to keep in buffer before dictionary
+  const int match_len_limit_;
+  const int infd;		// input file descriptor
   int buffer_size;
+  int dictionary_size_;		// bytes to keep in buffer before pos
   int pos;			// current pos in buffer
   int cyclic_pos;		// current pos in dictionary
-  int stream_pos;		// first byte not yet read from file
   int pos_limit;		// when reached, a new block must be read
-  const int match_len_limit_;
-  const int cycles;
-  const int infd;		// input file descriptor
+  int stream_pos;		// first byte not yet read from file
+  int pos_array_size;
   bool at_stream_end;		// stream_pos shows real end of file
 
-  bool read_block();
-
-public:
-  Matchfinder( const int dict_size, const int len_limit, const int ifd );
+  Matchfinder_base( const int before, const int dict_size,
+                    const int dict_factor, const int len_limit,
+                    const int num_prev_pos, const int ifd,
+                    const int pos_array_factor );
 
-  ~Matchfinder()
-    { delete[] prev_pos_tree; std::free( buffer ); delete[] prev_positions; }
+  ~Matchfinder_base()
+    { delete[] pos_array; std::free( buffer ); delete[] prev_positions; }
 
+public:
   uint8_t operator[]( const int i ) const throw() { return buffer[pos+i]; }
   int available_bytes() const throw() { return stream_pos - pos; }
   long long data_position() const throw() { return partial_data_pos + pos; }
@@ -183,27 +185,47 @@ public:
   int match_len_limit() const throw() { return match_len_limit_; }
   const uint8_t * ptr_to_current_pos() const throw() { return buffer + pos; }
 
-  bool dec_pos( const int ahead ) throw()
-    {
-    if( ahead < 0 || pos < ahead ) return false;
-    pos -= ahead;
-    cyclic_pos -= ahead;
-    if( cyclic_pos < 0 ) cyclic_pos += dictionary_size_;
-    return true;
-    }
-
   int true_match_len( const int index, const int distance, int len_limit ) const throw()
     {
     if( index + len_limit > available_bytes() )
       len_limit = available_bytes() - index;
-    const uint8_t * const data = buffer + pos + index - distance;
+    const uint8_t * const data = buffer + pos + index;
     int i = 0;
-    while( i < len_limit && data[i] == data[i+distance] ) ++i;
+    while( i < len_limit && data[i-distance] == data[i] ) ++i;
     return i;
     }
 
   void reset();
-  void move_pos();
+  void move_pos()
+    {
+    if( ++cyclic_pos >= dictionary_size_ ) cyclic_pos = 0;
+    if( ++pos >= pos_limit ) normalize_pos();
+    }
+  };
+
+
+class Matchfinder : public Matchfinder_base
+  {
+  enum { before = max_num_trials + 1,
+         dict_factor = 2,
+         num_prev_positions4 = 1 << 20,
+         num_prev_positions3 = 1 << 18,
+         num_prev_positions2 = 1 << 16,
+         num_prev_pos = num_prev_positions4 + num_prev_positions3 +
+                        num_prev_positions2,
+         pos_array_factor = 2 };
+
+  const int cycles;
+
+public:
+  Matchfinder( const int dict_size, const int len_limit, const int ifd )
+    :
+    Matchfinder_base( before, dict_size, dict_factor, len_limit,
+                      num_prev_pos, ifd, pos_array_factor ),
+    cycles( ( len_limit < max_match_len ) ? 16 + ( len_limit / 2 ) : 256 )
+    {}
+
+  bool dec_pos( const int ahead ) throw();
   int longest_match_len( int * const distances = 0 ) throw();
   };
 
@@ -233,8 +255,11 @@ class Range_encoder
     low = ( low & 0x00FFFFFFU ) << 8;
     }
 
+  Range_encoder( const Range_encoder & );
+  void operator=( const Range_encoder & );
+
 public:
-  Range_encoder( const int ofd )
+  explicit Range_encoder( const int ofd )
     :
     low( 0 ),
     partial_member_pos( 0 ),
@@ -367,7 +392,7 @@ class Len_encoder
     }
 
 public:
-  Len_encoder( const int len_limit )
+  explicit Len_encoder( const int len_limit )
     : len_symbols( len_limit + 1 - min_match_len )
     {
     for( int i = 0; i < pos_states; ++i ) update_prices( i );
@@ -408,24 +433,12 @@ public:
   };
 
 
-class LZ_encoder
+class LZ_encoder_base
   {
-  enum { infinite_price = 0x0FFFFFFF,
-         max_marker_size = 16,
+protected:
+  enum { max_marker_size = 16,
          num_rep_distances = 4 };	// must be 4
 
-  struct Trial
-    {
-    State state;
-    int dis;
-    int prev_index;	// index of prev trial in trials[]
-    int price;		// dual use var; cumulative price, match length
-    int reps[num_rep_distances];
-    void update( const int d, const int p_i, const int pr ) throw()
-      { if( pr < price ) { dis = d; prev_index = p_i; price = pr; } }
-    };
-
-  int longest_match_found;
   uint32_t crc_;
 
   Bit_model bm_match[State::states][pos_states];
@@ -438,26 +451,28 @@ class LZ_encoder
   Bit_model bm_dis[modeled_distances-end_dis_model+1];
   Bit_model bm_align[dis_align_size];
 
-  Matchfinder & matchfinder;
   Range_encoder range_encoder;
   Len_encoder len_encoder;
   Len_encoder rep_match_len_encoder;
   Literal_encoder literal_encoder;
 
   const int num_dis_slots;
-  int match_distances[max_match_len+1];
-  Trial trials[max_num_trials];
-
-  int dis_slot_prices[max_dis_states][2*max_dictionary_bits];
-  int dis_prices[max_dis_states][modeled_distances];
-  int align_prices[dis_align_size];
-  int align_price_count;
-
-  void fill_align_prices() throw();
-  void fill_distance_prices() throw();
 
   uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFFU; }
 
+  LZ_encoder_base( const File_header & header, const int dictionary_size,
+                   const int match_len_limit, const int outfd )
+    :
+    crc_( 0xFFFFFFFFU ),
+    range_encoder( outfd ),
+    len_encoder( match_len_limit ),
+    rep_match_len_encoder( match_len_limit ),
+    num_dis_slots( 2 * real_bits( dictionary_size - 1 ) )
+    {
+    for( int i = 0; i < File_header::size; ++i )
+      range_encoder.put_byte( header.data[i] );
+    }
+
        // move-to-front dis in/into reps
   void mtf_reps( const int dis, int reps[num_rep_distances] ) throw()
     {
@@ -474,6 +489,65 @@ class LZ_encoder
       }
     }
 
+  void encode_pair( const uint32_t dis, const int len, const int pos_state ) throw()
+    {
+    len_encoder.encode( range_encoder, len, pos_state );
+    const int dis_slot = dis_slots[dis];
+    range_encoder.encode_tree( bm_dis_slot[get_dis_state(len)], dis_slot, dis_slot_bits );
+
+    if( dis_slot >= start_dis_model )
+      {
+      const int direct_bits = ( dis_slot >> 1 ) - 1;
+      const uint32_t base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
+      const uint32_t direct_dis = dis - base;
+
+      if( dis_slot < end_dis_model )
+        range_encoder.encode_tree_reversed( bm_dis + base - dis_slot,
+                                            direct_dis, direct_bits );
+      else
+        {
+        range_encoder.encode( direct_dis >> dis_align_bits, direct_bits - dis_align_bits );
+        range_encoder.encode_tree_reversed( bm_align, direct_dis, dis_align_bits );
+        }
+      }
+    }
+
+  void full_flush( const long long data_position, const State & state );
+
+public:
+  long long member_position() const throw()
+    { return range_encoder.member_position(); }
+  };
+
+
+class LZ_encoder : public LZ_encoder_base
+  {
+  enum { infinite_price = 0x0FFFFFFF };
+
+  struct Trial
+    {
+    State state;
+    int dis;
+    int prev_index;	// index of prev trial in trials[]
+    int price;		// dual use var; cumulative price, match length
+    int reps[num_rep_distances];
+    void update( const int d, const int p_i, const int pr ) throw()
+      { if( pr < price ) { dis = d; prev_index = p_i; price = pr; } }
+    };
+
+  Matchfinder & matchfinder;
+  int longest_match_found;
+  int match_distances[max_match_len+1];
+  Trial trials[max_num_trials];
+
+  int dis_slot_prices[max_dis_states][2*max_dictionary_bits];
+  int dis_prices[max_dis_states][modeled_distances];
+  int align_prices[dis_align_size];
+  int align_price_count;
+
+  void fill_align_prices() throw();
+  void fill_distance_prices() throw();
+
   int price_rep_len1( const State & state, const int pos_state ) const throw()
     {
     return price0( bm_rep0[state()] ) + price0( bm_len[state()][pos_state] );
@@ -512,44 +586,21 @@ class LZ_encoder
            price_dis( dis, get_dis_state( len ) );
     }
 
-  void encode_pair( const uint32_t dis, const int len, const int pos_state ) throw()
-    {
-    len_encoder.encode( range_encoder, len, pos_state );
-    const int dis_slot = dis_slots[dis];
-    range_encoder.encode_tree( bm_dis_slot[get_dis_state(len)], dis_slot, dis_slot_bits );
-
-    if( dis_slot >= start_dis_model )
-      {
-      const int direct_bits = ( dis_slot >> 1 ) - 1;
-      const uint32_t base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
-      const uint32_t direct_dis = dis - base;
-
-      if( dis_slot < end_dis_model )
-        range_encoder.encode_tree_reversed( bm_dis + base - dis_slot,
-                                            direct_dis, direct_bits );
-      else
-        {
-        range_encoder.encode( direct_dis >> dis_align_bits, direct_bits - dis_align_bits );
-        range_encoder.encode_tree_reversed( bm_align, direct_dis, dis_align_bits );
-        if( --align_price_count <= 0 ) fill_align_prices();
-        }
-      }
-    }
-
   int read_match_distances() throw()
     {
     int len = matchfinder.longest_match_len( match_distances );
-    if( len == matchfinder.match_len_limit() )
-      len += matchfinder.true_match_len( len, match_distances[len] + 1, max_match_len - len );
+    if( len == matchfinder.match_len_limit() && len < max_match_len )
+      len += matchfinder.true_match_len( len, match_distances[len] + 1,
+                                         max_match_len - len );
     return len;
     }
 
-  void move_pos( int n, bool skip = false )
+  void move_pos( int n )
     {
+    if( --n >= 0 ) matchfinder.move_pos();
     while( --n >= 0 )
       {
-      if( skip ) skip = false;
-      else matchfinder.longest_match_len();
+      matchfinder.longest_match_len();
       matchfinder.move_pos();
       }
     }
@@ -570,13 +621,13 @@ class LZ_encoder
   int sequence_optimizer( const int reps[num_rep_distances],
                           const State & state );
 
-  void full_flush( const State & state );
-
 public:
-  LZ_encoder( Matchfinder & mf, const File_header & header, const int outfd );
+  LZ_encoder( Matchfinder & mf, const File_header & header, const int outfd )
+    :
+    LZ_encoder_base( header, mf.dictionary_size(), mf.match_len_limit(), outfd ),
+    matchfinder( mf ),
+    longest_match_found( 0 )
+    { fill_align_prices(); }
 
   bool encode_member( const long long member_size );
-
-  long long member_position() const throw()
-    { return range_encoder.member_position(); }
   };
-- 
cgit v1.2.3