diff options
Diffstat (limited to 'lzd.cc')
-rw-r--r-- | lzd.cc | 171 |
1 files changed, 98 insertions, 73 deletions
@@ -1,25 +1,25 @@ -/* Lzd - Educational decompressor for the lzip format - Copyright (C) 2013-2019 Antonio Diaz Diaz. +/* Lzd - Educational decompressor for the lzip format + Copyright (C) 2013-2024 Antonio Diaz Diaz. - This program is free software. Redistribution and use in source and - binary forms, with or without modification, are permitted provided - that the following conditions are met: + This program is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ /* - Exit status: 0 for a normal exit, 1 for environmental problems - (file not found, invalid flags, I/O errors, etc), 2 to indicate a - corrupt or invalid input file. + Exit status: 0 for a normal exit, 1 for environmental problems + (file not found, invalid command-line options, I/O errors, etc), 2 to + indicate a corrupt or invalid input file. */ #include <algorithm> @@ -29,7 +29,7 @@ #include <cstring> #include <stdint.h> #include <unistd.h> -#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ #include <fcntl.h> #include <io.h> #endif @@ -47,7 +47,7 @@ public: void set_char() { - static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; + const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; st = next[st]; } void set_match() { st = ( st < 7 ) ? 7 : 10; } @@ -69,7 +69,7 @@ enum { dis_slot_bits = 6, start_dis_model = 4, end_dis_model = 14, - modeled_distances = 1 << (end_dis_model / 2), // 128 + modeled_distances = 1 << ( end_dis_model / 2 ), // 128 dis_align_bits = 4, dis_align_size = 1 << dis_align_bits, @@ -130,25 +130,31 @@ public: const CRC32 crc32; -typedef uint8_t Lzip_header[6]; // 0-3 magic, 4 version, 5 coded_dict_size - -typedef uint8_t Lzip_trailer[20]; +enum { header_size = 6, trailer_size = 20 }; +typedef uint8_t Lzip_header[header_size]; // 0-3 magic bytes + // 4 version + // 5 coded dictionary size +typedef uint8_t Lzip_trailer[trailer_size]; // 0-3 CRC32 of the uncompressed data // 4-11 size of the uncompressed data // 12-19 member size including header and trailer class Range_decoder { + unsigned long long member_pos; uint32_t code; uint32_t range; public: - Range_decoder() : code( 0 ), range( 0xFFFFFFFFU ) + Range_decoder() + : member_pos( header_size ), code( 0 ), range( 0xFFFFFFFFU ) { - for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); + get_byte(); // discard first byte of the LZMA stream + for( int i = 0; i < 4; ++i ) code = ( code << 8 ) | get_byte(); } - uint8_t get_byte() { return std::getc( stdin ); } + uint8_t get_byte() { ++member_pos; return std::getc( stdin ); } + unsigned long long member_position() const { return member_pos; } unsigned decode( const int num_bits ) { @@ -159,30 +165,31 @@ public: symbol <<= 1; if( code >= range ) { code -= range; symbol |= 1; } if( range <= 0x00FFFFFFU ) // normalize - { range <<= 8; code = (code << 8) | get_byte(); } + { range <<= 8; code = ( code << 8 ) | get_byte(); } } return symbol; } - unsigned decode_bit( Bit_model & bm ) + bool decode_bit( Bit_model & bm ) { - unsigned symbol; + bool symbol; const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; if( code < bound ) { range = bound; - bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits; + bm.probability += + ( bit_model_total - bm.probability ) >> bit_model_move_bits; symbol = 0; } else { - range -= bound; code -= bound; + range -= bound; bm.probability -= bm.probability >> bit_model_move_bits; symbol = 1; } if( range <= 0x00FFFFFFU ) // normalize - { range <<= 8; code = (code << 8) | get_byte(); } + { range <<= 8; code = ( code << 8 ) | get_byte(); } return symbol; } @@ -191,7 +198,7 @@ public: unsigned symbol = 1; for( int i = 0; i < num_bits; ++i ) symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); - return symbol - (1 << num_bits); + return symbol - ( 1 << num_bits ); } unsigned decode_tree_reversed( Bit_model bm[], const int num_bits ) @@ -211,8 +218,8 @@ public: unsigned symbol = 1; for( int i = 7; i >= 0; --i ) { - const unsigned match_bit = ( match_byte >> i ) & 1; - const unsigned bit = decode_bit( bm[symbol+(match_bit<<8)+0x100] ); + const bool match_bit = ( match_byte >> i ) & 1; + const bool bit = decode_bit( bm[symbol+(match_bit<<8)+0x100] ); symbol = ( symbol << 1 ) | bit; if( match_bit != bit ) { @@ -227,11 +234,12 @@ public: unsigned decode_len( Len_model & lm, const int pos_state ) { if( decode_bit( lm.choice1 ) == 0 ) - return decode_tree( lm.bm_low[pos_state], len_low_bits ); + return min_match_len + + decode_tree( lm.bm_low[pos_state], len_low_bits ); if( decode_bit( lm.choice2 ) == 0 ) - return len_low_symbols + + return min_match_len + len_low_symbols + decode_tree( lm.bm_mid[pos_state], len_mid_bits ); - return len_low_symbols + len_mid_symbols + + return min_match_len + len_low_symbols + len_mid_symbols + decode_tree( lm.bm_high, len_high_bits ); } }; @@ -278,7 +286,11 @@ public: ~LZ_decoder() { delete[] buffer; } unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; } - unsigned long long data_position() const { return partial_data_pos + pos; } + unsigned long long data_position() const + { return partial_data_pos + pos; } + uint8_t get_byte() { return rdec.get_byte(); } + unsigned long long member_position() const + { return rdec.member_position(); } bool decode_member(); }; @@ -290,7 +302,6 @@ void LZ_decoder::flush_data() { const unsigned size = pos - stream_pos; crc32.update_buf( crc_, buffer + stream_pos, size ); - errno = 0; if( std::fwrite( buffer + stream_pos, 1, size, stdout ) != size ) { std::fprintf( stderr, "Write error: %s\n", std::strerror( errno ) ); std::exit( 1 ); } @@ -301,7 +312,7 @@ void LZ_decoder::flush_data() } -bool LZ_decoder::decode_member() // Returns false if error +bool LZ_decoder::decode_member() // Return false if error { Bit_model bm_literal[1<<literal_context_bits][0x300]; Bit_model bm_match[State::states][pos_states]; @@ -363,12 +374,12 @@ bool LZ_decoder::decode_member() // Returns false if error rep0 = distance; } state.set_rep(); - len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); + len = rdec.decode_len( rep_len_model, pos_state ); } else // match { rep3 = rep2; rep2 = rep1; rep1 = rep0; - len = min_match_len + rdec.decode_len( match_len_model, pos_state ); + len = rdec.decode_len( match_len_model, pos_state ); const int len_state = std::min( len - min_match_len, len_states - 1 ); rep0 = rdec.decode_tree( bm_dis_slot[len_state], dis_slot_bits ); if( rep0 >= start_dis_model ) @@ -381,12 +392,13 @@ bool LZ_decoder::decode_member() // Returns false if error direct_bits ); else { - rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; + rep0 += + rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; rep0 += rdec.decode_tree_reversed( bm_align, dis_align_bits ); if( rep0 == 0xFFFFFFFFU ) // marker found { flush_data(); - return ( len == min_match_len ); // End Of Stream marker + return len == min_match_len; // End Of Stream marker } } } @@ -403,61 +415,74 @@ bool LZ_decoder::decode_member() // Returns false if error int main( const int argc, const char * const argv[] ) { - if( argc > 1 ) + if( argc > 2 || ( argc == 2 && std::strcmp( argv[1], "-d" ) != 0 ) ) { - std::printf( "Lzd %s - Educational decompressor for the lzip format.\n", - PROGVERSION ); - std::printf( "Study the source to learn how a lzip decompressor works.\n" - "See the lzip manual for an explanation of the code.\n" - "It is not safe to use lzd for any real work.\n" - "\nUsage: %s < file.lz > file\n", argv[0] ); - std::printf( "Lzd decompresses from standard input to standard output.\n" - "\nCopyright (C) 2019 Antonio Diaz Diaz.\n" - "This is free software: you are free to change and redistribute it.\n" - "There is NO WARRANTY, to the extent permitted by law.\n" - "Report bugs to lzip-bug@nongnu.org\n" - "Lzd home page: http://www.nongnu.org/lzip/lzd.html\n" ); + std::printf( + "Lzd %s - Educational decompressor for the lzip format.\n" + "Study the source code to learn how a lzip decompressor works.\n" + "See the lzip manual for an explanation of the code.\n" + "\nUsage: %s [-d] < file.lz > file\n" + "Lzd decompresses from standard input to standard output.\n" + "\nCopyright (C) 2024 Antonio Diaz Diaz.\n" + "License 2-clause BSD.\n" + "This is free software: you are free to change and redistribute " + "it.\nThere is NO WARRANTY, to the extent permitted by law.\n" + "Report bugs to lzip-bug@nongnu.org\n" + "Lzd home page: http://www.nongnu.org/lzip/lzd.html\n", + PROGVERSION, argv[0] ); return 0; } -#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); #endif for( bool first_member = true; ; first_member = false ) { - Lzip_header header; // verify header - for( int i = 0; i < 6; ++i ) header[i] = std::getc( stdin ); + Lzip_header header; // check header + for( int i = 0; i < header_size; ++i ) header[i] = std::getc( stdin ); if( std::feof( stdin ) || std::memcmp( header, "LZIP\x01", 5 ) != 0 ) { if( first_member ) - { std::fputs( "Bad magic number (file not in lzip format).\n", stderr ); - return 2; } - break; + { std::fputs( "Bad magic number (file not in lzip format).\n", + stderr ); return 2; } + break; // ignore trailing data } unsigned dict_size = 1 << ( header[5] & 0x1F ); dict_size -= ( dict_size / 16 ) * ( ( header[5] >> 5 ) & 7 ); if( dict_size < min_dictionary_size || dict_size > max_dictionary_size ) - { std::fputs( "Invalid dictionary size in member header.\n", stderr ); - return 2; } + { std::fputs( "Invalid dictionary size in member header.\n", + stderr ); return 2; } LZ_decoder decoder( dict_size ); // decode LZMA stream if( !decoder.decode_member() ) { std::fputs( "Data error\n", stderr ); return 2; } - Lzip_trailer trailer; // verify trailer - for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin ); + Lzip_trailer trailer; // check trailer + for( int i = 0; i < trailer_size; ++i ) trailer[i] = decoder.get_byte(); + int retval = 0; unsigned crc = 0; - for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; } + for( int i = 3; i >= 0; --i ) crc = ( crc << 8 ) + trailer[i]; + if( crc != decoder.crc() ) + { std::fputs( "CRC mismatch\n", stderr ); retval = 2; } + unsigned long long data_size = 0; - for( int i = 11; i >= 4; --i ) { data_size <<= 8; data_size += trailer[i]; } - if( crc != decoder.crc() || data_size != decoder.data_position() ) - { std::fputs( "CRC error\n", stderr ); return 2; } + for( int i = 11; i >= 4; --i ) + data_size = ( data_size << 8 ) + trailer[i]; + if( data_size != decoder.data_position() ) + { std::fputs( "Data size mismatch\n", stderr ); retval = 2; } + + unsigned long long member_size = 0; + for( int i = 19; i >= 12; --i ) + member_size = ( member_size << 8 ) + trailer[i]; + if( member_size != decoder.member_position() ) + { std::fputs( "Member size mismatch\n", stderr ); retval = 2; } + if( retval ) return retval; } if( std::fclose( stdout ) != 0 ) - { std::fprintf( stderr, "Error closing stdout: %s\n", std::strerror( errno ) ); - return 1; } + { std::fprintf( stderr, "Error closing stdout: %s\n", + std::strerror( errno ) ); return 1; } return 0; } |