diff options
Diffstat (limited to 'mtester.cc')
-rw-r--r-- | mtester.cc | 342 |
1 files changed, 152 insertions, 190 deletions
@@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. + Copyright (C) 2009-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -50,32 +50,6 @@ const char * format_byte( const uint8_t byte ) } // end namespace -void LZ_mtester::flush_data() - { - if( pos > stream_pos ) - { - const int size = pos - stream_pos; - crc32.update_buf( crc_, buffer + stream_pos, size ); - if( outfd >= 0 && writeblock( outfd, buffer + stream_pos, size ) != size ) - throw Error( "Write error" ); - if( pos >= dictionary_size ) - { partial_data_pos += pos; pos = 0; pos_wrapped = true; } - stream_pos = pos; - } - } - - -bool LZ_mtester::verify_trailer() - { - const File_trailer * const trailer = rdec.get_trailer(); - if( !trailer ) return false; - - return ( trailer->data_crc() == crc() && - trailer->data_size() == data_position() && - trailer->member_size() == member_position() ); - } - - void LZ_mtester::print_block( const int len ) { std::fputs( " \"", stdout ); @@ -100,91 +74,117 @@ void LZ_mtester::duplicate_buffer() } +void LZ_mtester::flush_data() + { + if( pos > stream_pos ) + { + const int size = pos - stream_pos; + crc32.update_buf( crc_, buffer + stream_pos, size ); + if( outfd >= 0 && writeblock( outfd, buffer + stream_pos, size ) != size ) + throw Error( "Write error" ); + if( pos >= dictionary_size ) + { partial_data_pos += pos; pos = 0; pos_wrapped = true; } + stream_pos = pos; + } + } + + +bool LZ_mtester::verify_trailer() + { + const Lzip_trailer * const trailer = rdec.get_trailer(); + + return ( trailer && + trailer->data_crc() == crc() && + trailer->data_size() == data_position() && + trailer->member_size() == member_position() ); + } + + /* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF, 3 = trailer error, 4 = unknown marker found, -1 = pos_limit reached. */ int LZ_mtester::test_member( const unsigned long pos_limit ) { - if( pos_limit < File_header::size + 5 ) return -1; - if( member_position() == File_header::size ) rdec.load(); + if( pos_limit < Lzip_header::size + 5 ) return -1; + if( member_position() == Lzip_header::size ) rdec.load(); while( !rdec.finished() ) { if( member_position() >= pos_limit ) { flush_data(); return -1; } const int pos_state = data_position() & pos_state_mask; if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit { + // literal byte Bit_model * const bm = bm_literal[get_lit_state(peek_prev())]; if( state.is_char_set_char() ) put_byte( rdec.decode_tree8( bm ) ); else put_byte( rdec.decode_matched( bm, peek( rep0 ) ) ); + continue; } - else // match or repeated match + // match or repeated match + int len; + if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit { - int len; - if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit + if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit { - if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit - { - if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit - { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; } - } + if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit + { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; } + } + else + { + unsigned distance; + if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit + distance = rep1; else { - unsigned distance; - if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit - distance = rep1; + if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit + distance = rep2; else - { - if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit - distance = rep2; - else - { distance = rep3; rep3 = rep2; } - rep2 = rep1; - } - rep1 = rep0; - rep0 = distance; + { distance = rep3; rep3 = rep2; } + rep2 = rep1; } - state.set_rep(); - len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); + rep1 = rep0; + rep0 = distance; } - else // match + state.set_rep(); + len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); + } + else // match + { + len = min_match_len + rdec.decode_len( match_len_model, pos_state ); + unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); + if( distance >= start_dis_model ) { - len = min_match_len + rdec.decode_len( match_len_model, pos_state ); - unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); - if( distance >= start_dis_model ) + const unsigned dis_slot = distance; + const int direct_bits = ( dis_slot >> 1 ) - 1; + distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + if( dis_slot < end_dis_model ) + distance += rdec.decode_tree_reversed( + bm_dis + ( distance - dis_slot ), direct_bits ); + else { - const unsigned dis_slot = distance; - const int direct_bits = ( dis_slot >> 1 ) - 1; - distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits; - if( dis_slot < end_dis_model ) - distance += rdec.decode_tree_reversed( - bm_dis + ( distance - dis_slot ), direct_bits ); - else + distance += + rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; + distance += rdec.decode_tree_reversed4( bm_align ); + if( distance == 0xFFFFFFFFU ) // marker found { - distance += - rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; - distance += rdec.decode_tree_reversed4( bm_align ); - if( distance == 0xFFFFFFFFU ) // marker found + rdec.normalize(); + flush_data(); + if( len == min_match_len ) // End Of Stream marker { - rdec.normalize(); - flush_data(); - if( len == min_match_len ) // End Of Stream marker - { - if( verify_trailer() ) return 0; else return 3; - } - return 4; + if( verify_trailer() ) return 0; else return 3; } - if( distance > max_rep0 ) max_rep0 = distance; + return 4; } + if( distance > max_rep0 ) max_rep0 = distance; } - rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance; - state.set_match(); - if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) ) - { flush_data(); return 1; } } - copy_block( rep0, len ); + rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance; + state.set_match(); + if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) ) + { flush_data(); return 1; } } + copy_block( rep0, len ); } flush_data(); return 2; @@ -204,6 +204,7 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, const int pos_state = data_position() & pos_state_mask; if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit { + // literal byte Bit_model * const bm = bm_literal[get_lit_state(peek_prev())]; if( state.is_char_set_char() ) { @@ -223,138 +224,99 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, mp, dp, format_byte( cur_byte ), dp - rep0 - 1, format_byte( match_byte ) ); } + continue; } - else // match or repeated match + // match or repeated match + int len; + if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit { - int len; - if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit + int rep = 0; + if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit { - int rep = 0; - if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit + if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit { - if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit - { - if( show_packets ) - std::printf( "%6llu %6llu shortrep %s %6u (%6llu)\n", - mp, dp, format_byte( peek( rep0 ) ), - rep0 + 1, dp - rep0 - 1 ); - state.set_short_rep(); put_byte( peek( rep0 ) ); continue; - } + if( show_packets ) + std::printf( "%6llu %6llu shortrep %s %6u (%6llu)\n", + mp, dp, format_byte( peek( rep0 ) ), + rep0 + 1, dp - rep0 - 1 ); + state.set_short_rep(); put_byte( peek( rep0 ) ); continue; } + } + else + { + unsigned distance; + if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit + { distance = rep1; rep = 1; } else { - unsigned distance; - if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit - { distance = rep1; rep = 1; } + if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit + { distance = rep2; rep = 2; } else - { - if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit - { distance = rep2; rep = 2; } - else - { distance = rep3; rep3 = rep2; rep = 3; } - rep2 = rep1; - } - rep1 = rep0; - rep0 = distance; + { distance = rep3; rep3 = rep2; rep = 3; } + rep2 = rep1; } - state.set_rep(); - len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); - if( show_packets ) - std::printf( "%6llu %6llu rep%c %6u,%3d (%6llu)", - mp, dp, rep + '0', rep0 + 1, len, dp - rep0 - 1 ); + rep1 = rep0; + rep0 = distance; } - else // match + state.set_rep(); + len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); + if( show_packets ) + std::printf( "%6llu %6llu rep%c %6u,%3d (%6llu)", + mp, dp, rep + '0', rep0 + 1, len, dp - rep0 - 1 ); + } + else // match + { + len = min_match_len + rdec.decode_len( match_len_model, pos_state ); + unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); + if( distance >= start_dis_model ) { - len = min_match_len + rdec.decode_len( match_len_model, pos_state ); - unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); - if( distance >= start_dis_model ) + const unsigned dis_slot = distance; + const int direct_bits = ( dis_slot >> 1 ) - 1; + distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + if( dis_slot < end_dis_model ) + distance += rdec.decode_tree_reversed( + bm_dis + ( distance - dis_slot ), direct_bits ); + else { - const unsigned dis_slot = distance; - const int direct_bits = ( dis_slot >> 1 ) - 1; - distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits; - if( dis_slot < end_dis_model ) - distance += rdec.decode_tree_reversed( - bm_dis + ( distance - dis_slot ), direct_bits ); - else + distance += + rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; + distance += rdec.decode_tree_reversed4( bm_align ); + if( distance == 0xFFFFFFFFU ) // marker found { - distance += - rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; - distance += rdec.decode_tree_reversed4( bm_align ); - if( distance == 0xFFFFFFFFU ) // marker found + rdec.normalize(); + flush_data(); + if( show_packets ) + std::printf( "%6llu %6llu marker code '%d'\n", mp, dp, len ); + if( len == min_match_len ) // End Of Stream marker { - rdec.normalize(); - flush_data(); if( show_packets ) - std::printf( "%6llu %6llu marker code '%d'\n", mp, dp, len ); - if( len == min_match_len ) // End Of Stream marker - { - if( show_packets ) - std::printf( "%6llu %6llu member trailer\n", - mpos + member_position(), dpos + data_position() ); - if( verify_trailer() ) return 0; - if( show_packets ) std::fputs( "trailer error\n", stdout ); - return 3; - } - if( len == min_match_len + 1 ) // Sync Flush marker - { - rdec.load(); continue; - } - return 4; + std::printf( "%6llu %6llu member trailer\n", + mpos + member_position(), dpos + data_position() ); + if( verify_trailer() ) return 0; + if( show_packets ) std::fputs( "trailer error\n", stdout ); + return 3; } - if( distance > max_rep0 ) max_rep0 = distance; + if( len == min_match_len + 1 ) // Sync Flush marker + { + rdec.load(); continue; + } + return 4; } + if( distance > max_rep0 ) max_rep0 = distance; } - rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance; - state.set_match(); - if( show_packets ) - std::printf( "%6llu %6llu match %6u,%3d (%6lld)", - mp, dp, rep0 + 1, len, dp - rep0 - 1 ); - if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) ) - { flush_data(); if( show_packets ) std::fputc( '\n', stdout ); - return 1; } } - copy_block( rep0, len ); - if( show_packets ) print_block( len ); + rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance; + state.set_match(); + if( show_packets ) + std::printf( "%6llu %6llu match %6u,%3d (%6lld)", + mp, dp, rep0 + 1, len, dp - rep0 - 1 ); + if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) ) + { flush_data(); if( show_packets ) std::fputc( '\n', stdout ); + return 1; } } + copy_block( rep0, len ); + if( show_packets ) print_block( len ); } flush_data(); return 2; } - - -uint8_t * read_member( const int infd, const long long mpos, - const long long msize ) - { - if( msize <= 0 || msize > LONG_MAX ) - { show_error( "Member is larger than LONG_MAX." ); return 0; } - if( !safe_seek( infd, mpos ) ) return 0; - uint8_t * const buffer = new uint8_t[msize]; - - if( readblock( infd, buffer, msize ) != msize ) - { show_error( "Error reading input file", errno ); - delete[] buffer; return 0; } - return buffer; - } - - -const LZ_mtester * prepare_master( const uint8_t * const buffer, - const long buffer_size, - const unsigned long pos_limit, - const unsigned dictionary_size ) - { - LZ_mtester * const master = - new LZ_mtester( buffer, buffer_size, dictionary_size ); - if( master->test_member( pos_limit ) == -1 ) return master; - delete master; - return 0; - } - - -bool test_member_rest( const LZ_mtester & master, long * const failure_posp ) - { - LZ_mtester mtester( master ); - mtester.duplicate_buffer(); - if( mtester.test_member() == 0 && mtester.finished() ) return true; - if( failure_posp ) *failure_posp = mtester.member_position(); - return false; - } |