summaryrefslogtreecommitdiffstats
path: root/decoder.h
diff options
context:
space:
mode:
Diffstat (limited to 'decoder.h')
-rw-r--r--decoder.h463
1 files changed, 463 insertions, 0 deletions
diff --git a/decoder.h b/decoder.h
new file mode 100644
index 0000000..4b91fec
--- /dev/null
+++ b/decoder.h
@@ -0,0 +1,463 @@
+/* Lzlib - Compression library for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This library is free software. Redistribution and use in source and
+ binary forms, with or without modification, are permitted provided
+ that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions, and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+enum { rd_min_available_bytes = 10 };
+
+struct Range_decoder
+ {
+ struct Circular_buffer cb; /* input buffer */
+ unsigned long long member_position;
+ uint32_t code;
+ uint32_t range;
+ bool at_stream_end;
+ bool reload_pending;
+ };
+
+static inline bool Rd_init( struct Range_decoder * const rdec )
+ {
+ if( !Cb_init( &rdec->cb, 65536 + rd_min_available_bytes ) ) return false;
+ rdec->member_position = 0;
+ rdec->code = 0;
+ rdec->range = 0xFFFFFFFFU;
+ rdec->at_stream_end = false;
+ rdec->reload_pending = false;
+ return true;
+ }
+
+static inline void Rd_free( struct Range_decoder * const rdec )
+ { Cb_free( &rdec->cb ); }
+
+static inline bool Rd_finished( const struct Range_decoder * const rdec )
+ { return rdec->at_stream_end && Cb_empty( &rdec->cb ); }
+
+static inline void Rd_finish( struct Range_decoder * const rdec )
+ { rdec->at_stream_end = true; }
+
+static inline bool Rd_enough_available_bytes( const struct Range_decoder * const rdec )
+ { return Cb_used_bytes( &rdec->cb ) >= rd_min_available_bytes; }
+
+static inline unsigned Rd_available_bytes( const struct Range_decoder * const rdec )
+ { return Cb_used_bytes( &rdec->cb ); }
+
+static inline unsigned Rd_free_bytes( const struct Range_decoder * const rdec )
+ { return rdec->at_stream_end ? 0 : Cb_free_bytes( &rdec->cb ); }
+
+static inline unsigned long long Rd_purge( struct Range_decoder * const rdec )
+ {
+ const unsigned long long size =
+ rdec->member_position + Cb_used_bytes( &rdec->cb );
+ Cb_reset( &rdec->cb );
+ rdec->member_position = 0; rdec->at_stream_end = true;
+ return size;
+ }
+
+static inline void Rd_reset( struct Range_decoder * const rdec )
+ { Cb_reset( &rdec->cb );
+ rdec->member_position = 0; rdec->at_stream_end = false; }
+
+
+/* Seek for a member header and update 'get'. Set '*skippedp' to the number
+ of bytes skipped. Return true if a valid header is found.
+*/
+static bool Rd_find_header( struct Range_decoder * const rdec,
+ unsigned * const skippedp )
+ {
+ *skippedp = 0;
+ while( rdec->cb.get != rdec->cb.put )
+ {
+ if( rdec->cb.buffer[rdec->cb.get] == lzip_magic[0] )
+ {
+ unsigned get = rdec->cb.get;
+ int i;
+ Lzip_header header;
+ for( i = 0; i < Lh_size; ++i )
+ {
+ if( get == rdec->cb.put ) return false; /* not enough data */
+ header[i] = rdec->cb.buffer[get];
+ if( ++get >= rdec->cb.buffer_size ) get = 0;
+ }
+ if( Lh_check( header ) ) return true;
+ }
+ if( ++rdec->cb.get >= rdec->cb.buffer_size ) rdec->cb.get = 0;
+ ++*skippedp;
+ }
+ return false;
+ }
+
+
+static inline int Rd_write_data( struct Range_decoder * const rdec,
+ const uint8_t * const inbuf, const int size )
+ {
+ if( rdec->at_stream_end || size <= 0 ) return 0;
+ return Cb_write_data( &rdec->cb, inbuf, size );
+ }
+
+static inline uint8_t Rd_get_byte( struct Range_decoder * const rdec )
+ {
+ /* 0xFF avoids decoder error if member is truncated at EOS marker */
+ if( Rd_finished( rdec ) ) return 0xFF;
+ ++rdec->member_position;
+ return Cb_get_byte( &rdec->cb );
+ }
+
+static inline int Rd_read_data( struct Range_decoder * const rdec,
+ uint8_t * const outbuf, const int size )
+ {
+ const int sz = Cb_read_data( &rdec->cb, outbuf, size );
+ if( sz > 0 ) rdec->member_position += sz;
+ return sz;
+ }
+
+static inline bool Rd_unread_data( struct Range_decoder * const rdec,
+ const unsigned size )
+ {
+ if( size > rdec->member_position || !Cb_unread_data( &rdec->cb, size ) )
+ return false;
+ rdec->member_position -= size;
+ return true;
+ }
+
+static bool Rd_try_reload( struct Range_decoder * const rdec )
+ {
+ if( rdec->reload_pending && Rd_available_bytes( rdec ) >= 5 )
+ {
+ rdec->reload_pending = false;
+ rdec->code = 0;
+ rdec->range = 0xFFFFFFFFU;
+ Rd_get_byte( rdec ); /* discard first byte of the LZMA stream */
+ int i; for( i = 0; i < 4; ++i )
+ rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
+ }
+ return !rdec->reload_pending;
+ }
+
+static inline void Rd_normalize( struct Range_decoder * const rdec )
+ {
+ if( rdec->range <= 0x00FFFFFFU )
+ { rdec->range <<= 8; rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); }
+ }
+
+static inline unsigned Rd_decode( struct Range_decoder * const rdec,
+ const int num_bits )
+ {
+ unsigned symbol = 0;
+ int i;
+ for( i = num_bits; i > 0; --i )
+ {
+ Rd_normalize( rdec );
+ rdec->range >>= 1;
+/* symbol <<= 1; */
+/* if( rdec->code >= rdec->range ) { rdec->code -= rdec->range; symbol |= 1; } */
+ const bool bit = ( rdec->code >= rdec->range );
+ symbol <<= 1; symbol += bit;
+ rdec->code -= rdec->range & ( 0U - bit );
+ }
+ return symbol;
+ }
+
+static inline unsigned Rd_decode_bit( struct Range_decoder * const rdec,
+ Bit_model * const probability )
+ {
+ Rd_normalize( rdec );
+ const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability;
+ if( rdec->code < bound )
+ {
+ rdec->range = bound;
+ *probability += ( bit_model_total - *probability ) >> bit_model_move_bits;
+ return 0;
+ }
+ else
+ {
+ rdec->code -= bound;
+ rdec->range -= bound;
+ *probability -= *probability >> bit_model_move_bits;
+ return 1;
+ }
+ }
+
+static inline void Rd_decode_symbol_bit( struct Range_decoder * const rdec,
+ Bit_model * const probability, unsigned * symbol )
+ {
+ Rd_normalize( rdec );
+ *symbol <<= 1;
+ const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability;
+ if( rdec->code < bound )
+ {
+ rdec->range = bound;
+ *probability += ( bit_model_total - *probability ) >> bit_model_move_bits;
+ }
+ else
+ {
+ rdec->code -= bound;
+ rdec->range -= bound;
+ *probability -= *probability >> bit_model_move_bits;
+ *symbol |= 1;
+ }
+ }
+
+static inline void Rd_decode_symbol_bit_reversed( struct Range_decoder * const rdec,
+ Bit_model * const probability, unsigned * model,
+ unsigned * symbol, const int i )
+ {
+ Rd_normalize( rdec );
+ *model <<= 1;
+ const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability;
+ if( rdec->code < bound )
+ {
+ rdec->range = bound;
+ *probability += ( bit_model_total - *probability ) >> bit_model_move_bits;
+ }
+ else
+ {
+ rdec->code -= bound;
+ rdec->range -= bound;
+ *probability -= *probability >> bit_model_move_bits;
+ *model |= 1;
+ *symbol |= 1 << i;
+ }
+ }
+
+static inline unsigned Rd_decode_tree6( struct Range_decoder * const rdec,
+ Bit_model bm[] )
+ {
+ unsigned symbol = 1;
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ return symbol & 0x3F;
+ }
+
+static inline unsigned Rd_decode_tree8( struct Range_decoder * const rdec,
+ Bit_model bm[] )
+ {
+ unsigned symbol = 1;
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ return symbol & 0xFF;
+ }
+
+static inline unsigned
+Rd_decode_tree_reversed( struct Range_decoder * const rdec,
+ Bit_model bm[], const int num_bits )
+ {
+ unsigned model = 1;
+ unsigned symbol = 0;
+ int i;
+ for( i = 0; i < num_bits; ++i )
+ Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, i );
+ return symbol;
+ }
+
+static inline unsigned
+Rd_decode_tree_reversed4( struct Range_decoder * const rdec, Bit_model bm[] )
+ {
+ unsigned model = 1;
+ unsigned symbol = 0;
+ Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 0 );
+ Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 1 );
+ Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 2 );
+ Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 3 );
+ return symbol;
+ }
+
+static inline unsigned Rd_decode_matched( struct Range_decoder * const rdec,
+ Bit_model bm[], unsigned match_byte )
+ {
+ unsigned symbol = 1;
+ unsigned mask = 0x100;
+ while( true )
+ {
+ const unsigned match_bit = ( match_byte <<= 1 ) & mask;
+ const unsigned bit = Rd_decode_bit( rdec, &bm[symbol+match_bit+mask] );
+ symbol <<= 1; symbol += bit;
+ if( symbol > 0xFF ) return symbol & 0xFF;
+ mask &= ~(match_bit ^ (bit << 8)); /* if( match_bit != bit ) mask = 0; */
+ }
+ }
+
+static inline unsigned Rd_decode_len( struct Range_decoder * const rdec,
+ struct Len_model * const lm,
+ const int pos_state )
+ {
+ Bit_model * bm;
+ unsigned mask, offset, symbol = 1;
+
+ if( Rd_decode_bit( rdec, &lm->choice1 ) == 0 )
+ { bm = lm->bm_low[pos_state]; mask = 7; offset = 0; goto len3; }
+ if( Rd_decode_bit( rdec, &lm->choice2 ) == 0 )
+ { bm = lm->bm_mid[pos_state]; mask = 7; offset = len_low_symbols; goto len3; }
+ bm = lm->bm_high; mask = 0xFF; offset = len_low_symbols + len_mid_symbols;
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+len3:
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ return ( symbol & mask ) + min_match_len + offset;
+ }
+
+
+enum { lzd_min_free_bytes = max_match_len };
+
+struct LZ_decoder
+ {
+ struct Circular_buffer cb;
+ unsigned long long partial_data_pos;
+ struct Range_decoder * rdec;
+ unsigned dictionary_size;
+ uint32_t crc;
+ bool check_trailer_pending;
+ bool member_finished;
+ bool pos_wrapped;
+ unsigned rep0; /* rep[0-3] latest four distances */
+ unsigned rep1; /* used for efficient coding of */
+ unsigned rep2; /* repeated distances */
+ unsigned rep3;
+ State state;
+
+ Bit_model bm_literal[1<<literal_context_bits][0x300];
+ Bit_model bm_match[states][pos_states];
+ Bit_model bm_rep[states];
+ Bit_model bm_rep0[states];
+ Bit_model bm_rep1[states];
+ Bit_model bm_rep2[states];
+ Bit_model bm_len[states][pos_states];
+ Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
+ Bit_model bm_dis[modeled_distances-end_dis_model+1];
+ Bit_model bm_align[dis_align_size];
+
+ struct Len_model match_len_model;
+ struct Len_model rep_len_model;
+ };
+
+static inline bool LZd_enough_free_bytes( const struct LZ_decoder * const d )
+ { return Cb_free_bytes( &d->cb ) >= lzd_min_free_bytes; }
+
+static inline uint8_t LZd_peek_prev( const struct LZ_decoder * const d )
+ { return d->cb.buffer[((d->cb.put > 0) ? d->cb.put : d->cb.buffer_size)-1]; }
+
+static inline uint8_t LZd_peek( const struct LZ_decoder * const d,
+ const unsigned distance )
+ {
+ const unsigned i = ( ( d->cb.put > distance ) ? 0 : d->cb.buffer_size ) +
+ d->cb.put - distance - 1;
+ return d->cb.buffer[i];
+ }
+
+static inline void LZd_put_byte( struct LZ_decoder * const d, const uint8_t b )
+ {
+ CRC32_update_byte( &d->crc, b );
+ d->cb.buffer[d->cb.put] = b;
+ if( ++d->cb.put >= d->cb.buffer_size )
+ { d->partial_data_pos += d->cb.put; d->cb.put = 0; d->pos_wrapped = true; }
+ }
+
+static inline void LZd_copy_block( struct LZ_decoder * const d,
+ const unsigned distance, unsigned len )
+ {
+ unsigned lpos = d->cb.put, i = lpos - distance - 1;
+ bool fast, fast2;
+ if( lpos > distance )
+ {
+ fast = ( len < d->cb.buffer_size - lpos );
+ fast2 = ( fast && len <= lpos - i );
+ }
+ else
+ {
+ i += d->cb.buffer_size;
+ fast = ( len < d->cb.buffer_size - i ); /* (i == pos) may happen */
+ fast2 = ( fast && len <= i - lpos );
+ }
+ if( fast ) /* no wrap */
+ {
+ const unsigned tlen = len;
+ if( fast2 ) /* no wrap, no overlap */
+ memcpy( d->cb.buffer + lpos, d->cb.buffer + i, len );
+ else
+ for( ; len > 0; --len ) d->cb.buffer[lpos++] = d->cb.buffer[i++];
+ CRC32_update_buf( &d->crc, d->cb.buffer + d->cb.put, tlen );
+ d->cb.put += tlen;
+ }
+ else for( ; len > 0; --len )
+ {
+ LZd_put_byte( d, d->cb.buffer[i] );
+ if( ++i >= d->cb.buffer_size ) i = 0;
+ }
+ }
+
+static inline bool LZd_init( struct LZ_decoder * const d,
+ struct Range_decoder * const rde,
+ const unsigned dict_size )
+ {
+ if( !Cb_init( &d->cb, max( 65536, dict_size ) + lzd_min_free_bytes ) )
+ return false;
+ d->partial_data_pos = 0;
+ d->rdec = rde;
+ d->dictionary_size = dict_size;
+ d->crc = 0xFFFFFFFFU;
+ d->check_trailer_pending = false;
+ d->member_finished = false;
+ d->pos_wrapped = false;
+ /* prev_byte of first byte; also for LZd_peek( 0 ) on corrupt file */
+ d->cb.buffer[d->cb.buffer_size-1] = 0;
+ d->rep0 = 0;
+ d->rep1 = 0;
+ d->rep2 = 0;
+ d->rep3 = 0;
+ d->state = 0;
+
+ Bm_array_init( d->bm_literal[0], (1 << literal_context_bits) * 0x300 );
+ Bm_array_init( d->bm_match[0], states * pos_states );
+ Bm_array_init( d->bm_rep, states );
+ Bm_array_init( d->bm_rep0, states );
+ Bm_array_init( d->bm_rep1, states );
+ Bm_array_init( d->bm_rep2, states );
+ Bm_array_init( d->bm_len[0], states * pos_states );
+ Bm_array_init( d->bm_dis_slot[0], len_states * (1 << dis_slot_bits) );
+ Bm_array_init( d->bm_dis, modeled_distances - end_dis_model + 1 );
+ Bm_array_init( d->bm_align, dis_align_size );
+ Lm_init( &d->match_len_model );
+ Lm_init( &d->rep_len_model );
+ return true;
+ }
+
+static inline void LZd_free( struct LZ_decoder * const d )
+ { Cb_free( &d->cb ); }
+
+static inline bool LZd_member_finished( const struct LZ_decoder * const d )
+ { return d->member_finished && Cb_empty( &d->cb ); }
+
+static inline unsigned LZd_crc( const struct LZ_decoder * const d )
+ { return d->crc ^ 0xFFFFFFFFU; }
+
+static inline unsigned long long
+LZd_data_position( const struct LZ_decoder * const d )
+ { return d->partial_data_pos + d->cb.put; }