diff options
Diffstat (limited to '')
-rw-r--r-- | lzd.cc (renamed from decoder.cc) | 182 |
1 files changed, 139 insertions, 43 deletions
@@ -8,6 +8,24 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ +/* + Exit status: 0 for a normal exit, 1 for environmental problems + (file not found, invalid flags, I/O errors, etc), 2 to indicate a + corrupt or invalid input file. +*/ + +#include <algorithm> +#include <cerrno> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <stdint.h> +#include <unistd.h> +#if defined(__MSVCRT__) || defined(__OS2__) +#include <fcntl.h> +#include <io.h> +#endif + class State { @@ -24,20 +42,20 @@ public: static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; st = next[st]; } - - void set_match() { st = ( ( st < 7 ) ? 7 : 10 ); } - void set_rep() { st = ( ( st < 7 ) ? 8 : 11 ); } - void set_short_rep() { st = ( ( st < 7 ) ? 9 : 11 ); } + void set_match() { st = ( st < 7 ) ? 7 : 10; } + void set_rep() { st = ( st < 7 ) ? 8 : 11; } + void set_short_rep() { st = ( st < 7 ) ? 9 : 11; } }; enum { + min_dictionary_size = 1 << 12, + max_dictionary_size = 1 << 29, literal_context_bits = 3, pos_state_bits = 2, pos_states = 1 << pos_state_bits, pos_state_mask = pos_states - 1, - max_dis_states = 4, dis_slot_bits = 6, start_dis_model = 4, end_dis_model = 14, @@ -52,13 +70,14 @@ enum { len_mid_symbols = 1 << len_mid_bits, len_high_symbols = 1 << len_high_bits, max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols, + min_match_len = 2, // must be 2 + max_dis_states = 4, bit_model_move_bits = 5, bit_model_total_bits = 11, bit_model_total = 1 << bit_model_total_bits }; - struct Bit_model { int probability; @@ -75,6 +94,39 @@ struct Len_model }; +class CRC32 + { + uint32_t data[256]; // Table of CRCs of all 8-bit messages. + +public: + CRC32() + { + for( unsigned n = 0; n < 256; ++n ) + { + unsigned c = n; + for( int k = 0; k < 8; ++k ) + { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; } + data[n] = c; + } + } + + void update( uint32_t & crc, const uint8_t * const buffer, const int size ) const + { + for( int i = 0; i < size; ++i ) + crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 ); + } + }; + +const CRC32 crc32; + + +typedef uint8_t File_header[6]; // 0-3 magic, 4 version, 5 coded_dict_size + +typedef uint8_t File_trailer[20]; + // 0-3 CRC32 of the uncompressed data + // 4-11 size of the uncompressed data + // 12-19 member size including header and trailer + class Range_decoder { uint32_t code; @@ -83,9 +135,11 @@ class Range_decoder public: Range_decoder() : code( 0 ), range( 0xFFFFFFFFU ) { - for( int i = 0; i < 5; ++i ) code = (code << 8) | std::getc( stdin ); + for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); } + uint8_t get_byte() { return std::getc( stdin ); } + int decode( const int num_bits ) { int symbol = 0; @@ -95,7 +149,7 @@ public: symbol <<= 1; if( code >= range ) { code -= range; symbol |= 1; } if( range <= 0x00FFFFFFU ) // normalize - { range <<= 8; code = (code << 8) | std::getc( stdin ); } + { range <<= 8; code = (code << 8) | get_byte(); } } return symbol; } @@ -118,7 +172,7 @@ public: symbol = 1; } if( range <= 0x00FFFFFFU ) // normalize - { range <<= 8; code = (code << 8) | std::getc( stdin ); } + { range <<= 8; code = (code << 8) | get_byte(); } return symbol; } @@ -164,12 +218,11 @@ public: int decode_len( Len_model & lm, const int pos_state ) { if( decode_bit( lm.choice1 ) == 0 ) - return min_match_len + - decode_tree( lm.bm_low[pos_state], len_low_bits ); + return decode_tree( lm.bm_low[pos_state], len_low_bits ); if( decode_bit( lm.choice2 ) == 0 ) - return min_match_len + len_low_symbols + + return len_low_symbols + decode_tree( lm.bm_mid[pos_state], len_mid_bits ); - return min_match_len + len_low_symbols + len_mid_symbols + + return len_low_symbols + len_mid_symbols + decode_tree( lm.bm_high, len_high_bits ); } }; @@ -189,8 +242,8 @@ class LZ_decoder uint8_t get_byte( const unsigned distance ) const { - int i = pos - distance - 1; - if( i < 0 ) i += dictionary_size; + unsigned i = pos - distance - 1; + if( pos <= distance ) i += dictionary_size; return buffer[i]; } @@ -220,32 +273,6 @@ public: }; -class CRC32 - { - uint32_t data[256]; // Table of CRCs of all 8-bit messages. - -public: - CRC32() - { - for( unsigned n = 0; n < 256; ++n ) - { - unsigned c = n; - for( int k = 0; k < 8; ++k ) - { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; } - data[n] = c; - } - } - - void update( uint32_t & crc, const uint8_t * buffer, const int size ) const - { - for( int i = 0; i < size; ++i ) - crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 ); - } - }; - -const CRC32 crc32; - - void LZ_decoder::flush_data() { if( pos > stream_pos ) @@ -322,13 +349,13 @@ bool LZ_decoder::decode_member() // Returns false if error rep1 = rep0; rep0 = distance; } - len = rdec.decode_len( rep_len_model, pos_state ); + len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); state.set_rep(); } else { rep3 = rep2; rep2 = rep1; rep1 = rep0; - len = rdec.decode_len( match_len_model, pos_state ); + len = min_match_len + rdec.decode_len( match_len_model, pos_state ); const int dis_state = std::min( len - min_match_len, max_dis_states - 1 ); const int dis_slot = rdec.decode_tree( bm_dis_slot[dis_state], dis_slot_bits ); @@ -361,3 +388,72 @@ bool LZ_decoder::decode_member() // Returns false if error } return false; } + + +int main( const int argc, const char * const argv[] ) + { + if( argc > 1 ) + { + std::printf( "Lzd %s - Educational decompressor for lzip files.\n", + PROGVERSION ); + std::printf( "Study the source to learn how a lzip decompressor works.\n" + "See the lzip manual for an explanation of the code.\n" + "It is not safe to use lzd for any real work.\n" + "\nUsage: %s < file.lz > file\n", argv[0] ); + std::printf( "Lzd decompresses from standard input to standard output.\n" + "\nCopyright (C) 2013 Antonio Diaz Diaz.\n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n" + "Report bugs to lzip-bug@nongnu.org\n" + "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" ); + return 0; + } + +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + for( bool first_member = true; ; first_member = false ) + { + File_header header; + for( int i = 0; i < 6; ++i ) + header[i] = std::getc( stdin ); + if( std::feof( stdin ) || std::memcmp( header, "LZIP", 4 ) != 0 ) + { + if( first_member ) + { std::fprintf( stderr, "Bad magic number (file not in lzip format)\n" ); + return 2; } + break; + } + if( header[4] != 1 ) + { + std::fprintf( stderr, "Version %d member format not supported.\n", + header[4] ); + return 2; + } + unsigned dict_size = 1 << ( header[5] & 0x1F ); + dict_size -= ( dict_size / 16 ) * ( ( header[5] >> 5 ) & 7 ); + if( dict_size < min_dictionary_size || dict_size > max_dictionary_size ) + { std::fprintf( stderr, "Invalid dictionary size in member header\n" ); + return 2; } + + LZ_decoder decoder( dict_size ); + if( !decoder.decode_member() ) + { std::fprintf( stderr, "Data error\n" ); return 2; } + + File_trailer trailer; + for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin ); + unsigned crc = 0; + for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; } + unsigned long long data_size = 0; + for( int i = 11; i >= 4; --i ) { data_size <<= 8; data_size += trailer[i]; } + if( crc != decoder.crc() || data_size != decoder.data_position() ) + { std::fprintf( stderr, "CRC error\n" ); return 2; } + } + + if( std::fclose( stdout ) != 0 ) + { std::fprintf( stderr, "Can't close stdout: %s\n", std::strerror( errno ) ); + return 1; } + return 0; + } |