From 43dcb3aa74b5cbea293b296f161006f739b5a754 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 25 Jan 2021 14:57:10 +0100 Subject: Merging upstream version 1.12. Signed-off-by: Daniel Baumann --- lzcheck.c | 382 ++++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 247 insertions(+), 135 deletions(-) (limited to 'lzcheck.c') diff --git a/lzcheck.c b/lzcheck.c index deb4f2a..7e00e6c 100644 --- a/lzcheck.c +++ b/lzcheck.c @@ -1,15 +1,14 @@ -/* Lzcheck - Test program for the lzlib library - Copyright (C) 2009-2019 Antonio Diaz Diaz. +/* Lzcheck - Test program for the library lzlib + Copyright (C) 2009-2021 Antonio Diaz Diaz. - This program is free software: you have unlimited permission - to copy, distribute and modify it. + This program is free software: you have unlimited permission + to copy, distribute, and modify it. - Usage is: - lzcheck filename.txt... + Usage: lzcheck [-m|-s] filename.txt... - This program reads each specified text file and then compresses it, - line by line, to test the flushing mechanism and the member - restart/reset/sync functions. + This program reads each text file specified and then compresses it, + line by line, to test the flushing mechanism and the member + restart/reset/sync functions. */ #define _FILE_OFFSET_BITS 64 @@ -21,21 +20,19 @@ #include #include #include +#include #include "lzlib.h" -#ifndef min - #define min(x,y) ((x) <= (y) ? (x) : (y)) -#endif - +const unsigned long long member_size = INT64_MAX; enum { buffer_size = 32768 }; uint8_t in_buffer[buffer_size]; uint8_t mid_buffer[buffer_size]; uint8_t out_buffer[buffer_size]; -void show_line( const uint8_t * const buffer, const int size ) +static void show_line( const uint8_t * const buffer, const int size ) { int i; for( i = 0; i < size; ++i ) @@ -44,177 +41,281 @@ void show_line( const uint8_t * const buffer, const int size ) } -int lzcheck( FILE * const file, const int dictionary_size ) +static struct LZ_Encoder * xopen_encoder( const int dictionary_size ) { const int match_len_limit = 16; - const unsigned long long member_size = 0x7FFFFFFFFFFFFFFFULL; /* INT64_MAX */ - struct LZ_Encoder * encoder; - struct LZ_Decoder * decoder; - int retval = 0; - - encoder = LZ_compress_open( dictionary_size, match_len_limit, member_size ); + struct LZ_Encoder * const encoder = + LZ_compress_open( dictionary_size, match_len_limit, member_size ); if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) { - const bool mem_error = ( LZ_compress_errno( encoder ) == LZ_mem_error ); + const bool bad_arg = + encoder && ( LZ_compress_errno( encoder ) == LZ_bad_argument ); LZ_compress_close( encoder ); - if( mem_error ) + if( bad_arg ) { - fputs( "lzcheck: Not enough memory.\n", stderr ); - return 1; + fputs( "lzcheck: internal error: Invalid argument to encoder.\n", stderr ); + exit( 3 ); } - fputs( "lzcheck: internal error: Invalid argument to encoder.\n", stderr ); - return 3; + fputs( "lzcheck: Not enough memory.\n", stderr ); + exit( 1 ); } + return encoder; + } - decoder = LZ_decompress_open(); +static struct LZ_Decoder * xopen_decoder( void ) + { + struct LZ_Decoder * const decoder = LZ_decompress_open(); if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) { LZ_decompress_close( decoder ); fputs( "lzcheck: Not enough memory.\n", stderr ); - return 1; + exit( 1 ); } + return decoder; + } - while( retval <= 1 ) - { - int l, r; - const int read_size = fread( in_buffer, 1, buffer_size, file ); - if( read_size <= 0 ) break; /* end of file */ - for( l = 0, r = 1; r <= read_size; l = r, ++r ) +static void xclose_encoder( struct LZ_Encoder * const encoder, + const bool finish ) + { + if( finish ) + { + unsigned long long size = 0; + LZ_compress_finish( encoder ); + while( true ) { - int in_size, mid_size, out_size; - while( r < read_size && in_buffer[r-1] != '\n' ) ++r; - in_size = LZ_compress_write( encoder, in_buffer + l, r - l ); - if( in_size < r - l ) r = l + in_size; - LZ_compress_sync_flush( encoder ); - mid_size = LZ_compress_read( encoder, mid_buffer, buffer_size ); - if( mid_size < 0 ) + const int rd = LZ_compress_read( encoder, mid_buffer, buffer_size ); + if( rd < 0 ) { - fprintf( stderr, "lzcheck: LZ_compress_read error: %s\n", + fprintf( stderr, "lzcheck: xclose: LZ_compress_read error: %s\n", LZ_strerror( LZ_compress_errno( encoder ) ) ); - retval = 3; break; + exit( 3 ); } - LZ_decompress_write( decoder, mid_buffer, mid_size ); - out_size = LZ_decompress_read( decoder, out_buffer, buffer_size ); - if( out_size < 0 ) + size += rd; + if( LZ_compress_finished( encoder ) == 1 ) break; + } + if( size > 0 ) + { + fprintf( stderr, "lzcheck: %lld bytes remain in encoder.\n", size ); + exit( 3 ); + } + } + if( LZ_compress_close( encoder ) < 0 ) exit( 1 ); + } + + +static void xclose_decoder( struct LZ_Decoder * const decoder, + const bool finish ) + { + if( finish ) + { + unsigned long long size = 0; + LZ_decompress_finish( decoder ); + while( true ) + { + const int rd = LZ_decompress_read( decoder, out_buffer, buffer_size ); + if( rd < 0 ) { - fprintf( stderr, "lzcheck: LZ_decompress_read error: %s\n", + fprintf( stderr, "lzcheck: xclose: LZ_decompress_read error: %s\n", LZ_strerror( LZ_decompress_errno( decoder ) ) ); - retval = 3; break; + exit( 3 ); } + size += rd; + if( LZ_decompress_finished( decoder ) == 1 ) break; + } + if( size > 0 ) + { + fprintf( stderr, "lzcheck: %lld bytes remain in decoder.\n", size ); + exit( 3 ); + } + } + if( LZ_decompress_close( decoder ) < 0 ) exit( 1 ); + } - if( out_size != in_size || memcmp( in_buffer + l, out_buffer, out_size ) ) - { - fprintf( stderr, "lzcheck: Sync error at pos %d in_size = %d, " - "out_size = %d\n", - l, in_size, out_size ); - show_line( in_buffer + l, in_size ); - show_line( out_buffer, out_size ); - retval = 1; - } + +/* Returns the next (usually newline-terminated) chunk of data from file. + The size returned in *sizep is always <= buffer_size. + If sizep is a null pointer, rewinds the file, resets state, and returns. + If file is at EOF, returns an empty line. */ +static const uint8_t * next_line( FILE * const file, int * const sizep ) + { + static int l = 0; + static int read_size = 0; + int r; + + if( !sizep ) { rewind( file ); l = read_size = 0; return in_buffer; } + if( l >= read_size ) + { + l = 0; read_size = fread( in_buffer, 1, buffer_size, file ); + if( l >= read_size ) { *sizep = 0; return in_buffer; } /* end of file */ + } + + for( r = l + 1; r < read_size && in_buffer[r-1] != '\n'; ++r ); + *sizep = r - l; l = r; + return in_buffer + l - *sizep; + } + + +static int check_sync_flush( FILE * const file, const int dictionary_size ) + { + struct LZ_Encoder * const encoder = xopen_encoder( dictionary_size ); + struct LZ_Decoder * const decoder = xopen_decoder(); + int retval = 0; + + while( retval <= 1 ) /* test LZ_compress_sync_flush */ + { + int in_size, mid_size, out_size; + int line_size; + const uint8_t * const line_buf = next_line( file, &line_size ); + if( line_size <= 0 ) break; /* end of file */ + + in_size = LZ_compress_write( encoder, line_buf, line_size ); + if( in_size < line_size ) + fprintf( stderr, "lzcheck: sync: LZ_compress_write only accepted %d of %d bytes\n", + in_size, line_size ); + LZ_compress_sync_flush( encoder ); + if( line_buf[0] & 1 ) /* read all data at once or byte by byte */ + mid_size = LZ_compress_read( encoder, mid_buffer, buffer_size ); + else for( mid_size = 0; mid_size < buffer_size; ) + { + const int rd = LZ_compress_read( encoder, mid_buffer + mid_size, 1 ); + if( rd > 0 ) mid_size += rd; + else { if( rd < 0 ) { mid_size = -1; } break; } + } + if( mid_size < 0 ) + { + fprintf( stderr, "lzcheck: LZ_compress_read error: %s\n", + LZ_strerror( LZ_compress_errno( encoder ) ) ); + retval = 3; break; + } + LZ_decompress_write( decoder, mid_buffer, mid_size ); + out_size = LZ_decompress_read( decoder, out_buffer, buffer_size ); + if( out_size < 0 ) + { + fprintf( stderr, "lzcheck: LZ_decompress_read error: %s\n", + LZ_strerror( LZ_decompress_errno( decoder ) ) ); + retval = 3; break; + } + + if( out_size != in_size || memcmp( line_buf, out_buffer, out_size ) ) + { + fprintf( stderr, "lzcheck: LZ_compress_sync_flush error: " + "in_size = %d, out_size = %d\n", in_size, out_size ); + show_line( line_buf, in_size ); + show_line( out_buffer, out_size ); + retval = 1; } } if( retval <= 1 ) { - rewind( file ); + int rd = 0; if( LZ_compress_finish( encoder ) < 0 || - LZ_compress_finish( encoder ) < 0 || - LZ_decompress_write( decoder, mid_buffer, - LZ_compress_read( encoder, mid_buffer, buffer_size ) ) < 0 || - LZ_decompress_read( decoder, out_buffer, buffer_size ) != 0 || - LZ_compress_finish( encoder ) < 0 || - LZ_compress_restart_member( encoder, member_size ) < 0 ) + ( rd = LZ_compress_read( encoder, mid_buffer, buffer_size ) ) < 0 ) { - fprintf( stderr, "lzcheck: Can't finish member: %s\n", - LZ_strerror( LZ_decompress_errno( decoder ) ) ); + fprintf( stderr, "lzcheck: Can't drain encoder: %s\n", + LZ_strerror( LZ_compress_errno( encoder ) ) ); retval = 3; } + LZ_decompress_write( decoder, mid_buffer, rd ); } - while( retval <= 1 ) + xclose_decoder( decoder, retval == 0 ); + xclose_encoder( encoder, retval == 0 ); + return retval; + } + + +/* Test member by member decompression without calling LZ_decompress_finish, + inserting leading garbage before some members, and resetting the + decompressor sometimes. Test that the increase in total_in_size when + syncing to member is equal to the size of the leading garbage skipped. +*/ +static int check_members( FILE * const file, const int dictionary_size ) + { + struct LZ_Encoder * const encoder = xopen_encoder( dictionary_size ); + struct LZ_Decoder * const decoder = xopen_decoder(); + int retval = 0; + + while( retval <= 1 ) /* test LZ_compress_restart_member */ { - int l, r, size; - const int read_size = fread( in_buffer, 1, buffer_size / 2, file ); - if( read_size <= 0 ) break; /* end of file */ + unsigned long long garbage_begin = 0; /* avoid warning from gcc 3.3.6 */ + int leading_garbage, in_size, mid_size, out_size; + int line_size; + const uint8_t * const line_buf = next_line( file, &line_size ); + if( line_size <= 0 && /* end of file, write at least 1 member */ + LZ_decompress_total_in_size( decoder ) != 0 ) break; - for( l = 0, r = 1; r <= read_size; l = r, ++r ) + if( LZ_compress_finished( encoder ) == 1 ) { - int leading_garbage, in_size, mid_size, out_size; - while( r < read_size && in_buffer[r-1] != '\n' ) ++r; - leading_garbage = (l == 0) ? min( r, read_size ) / 2 : 0; - in_size = LZ_compress_write( encoder, in_buffer + l, r - l ); - if( in_size < r - l ) r = l + in_size; - LZ_compress_sync_flush( encoder ); - if( leading_garbage ) - memset( mid_buffer, in_buffer[0], leading_garbage ); - mid_size = LZ_compress_read( encoder, mid_buffer + leading_garbage, - buffer_size - leading_garbage ); - if( mid_size < 0 ) + if( LZ_compress_restart_member( encoder, member_size ) < 0 ) { - fprintf( stderr, "lzcheck: LZ_compress_read error: %s\n", + fprintf( stderr, "lzcheck: Can't restart member: %s\n", LZ_strerror( LZ_compress_errno( encoder ) ) ); retval = 3; break; } - LZ_decompress_write( decoder, mid_buffer, mid_size + leading_garbage ); - out_size = LZ_decompress_read( decoder, out_buffer, buffer_size ); - if( out_size < 0 ) + if( line_size >= 2 && line_buf[1] == 'h' ) + LZ_decompress_reset( decoder ); + } + in_size = LZ_compress_write( encoder, line_buf, line_size ); + if( in_size < line_size ) + fprintf( stderr, "lzcheck: member: LZ_compress_write only accepted %d of %d bytes\n", + in_size, line_size ); + LZ_compress_finish( encoder ); + if( line_size * 3 < buffer_size && line_buf[0] == 't' ) + { leading_garbage = line_size; + memset( mid_buffer, in_buffer[0], leading_garbage ); + garbage_begin = LZ_decompress_total_in_size( decoder ); } + else leading_garbage = 0; + mid_size = LZ_compress_read( encoder, mid_buffer + leading_garbage, + buffer_size - leading_garbage ); + if( mid_size < 0 ) + { + fprintf( stderr, "lzcheck: member: LZ_compress_read error: %s\n", + LZ_strerror( LZ_compress_errno( encoder ) ) ); + retval = 3; break; + } + LZ_decompress_write( decoder, mid_buffer, leading_garbage + mid_size ); + out_size = LZ_decompress_read( decoder, out_buffer, buffer_size ); + if( out_size < 0 ) + { + if( leading_garbage && + ( LZ_decompress_errno( decoder ) == LZ_header_error || + LZ_decompress_errno( decoder ) == LZ_data_error ) ) { - if( LZ_decompress_errno( decoder ) == LZ_header_error || - LZ_decompress_errno( decoder ) == LZ_data_error ) + LZ_decompress_sync_to_member( decoder ); /* skip leading garbage */ + const unsigned long long garbage_end = + LZ_decompress_total_in_size( decoder ); + if( garbage_end - garbage_begin != (unsigned)leading_garbage ) { - LZ_decompress_sync_to_member( decoder ); /* remove leading garbage */ - out_size = LZ_decompress_read( decoder, out_buffer, buffer_size ); - } - if( out_size < 0 ) - { - fprintf( stderr, "lzcheck: LZ_decompress_read error: %s\n", - LZ_strerror( LZ_decompress_errno( decoder ) ) ); + fprintf( stderr, "lzcheck: member: LZ_decompress_sync_to_member error:\n" + " garbage_begin = %llu garbage_end = %llu " + "difference = %llu expected = %d\n", garbage_begin, + garbage_end, garbage_end - garbage_begin, leading_garbage ); retval = 3; break; } + out_size = LZ_decompress_read( decoder, out_buffer, buffer_size ); } - - if( out_size != in_size || memcmp( in_buffer + l, out_buffer, out_size ) ) + if( out_size < 0 ) { - fprintf( stderr, "lzcheck: Sync error at pos %d in_size = %d, " - "out_size = %d, leading garbage = %d\n", - l, in_size, out_size, leading_garbage ); - show_line( in_buffer + l, in_size ); - show_line( out_buffer, out_size ); - retval = 1; + fprintf( stderr, "lzcheck: member: LZ_decompress_read error: %s\n", + LZ_strerror( LZ_decompress_errno( decoder ) ) ); + retval = 3; break; } } - if( retval >= 3 ) break; - if( LZ_compress_finish( encoder ) < 0 || - LZ_decompress_write( decoder, mid_buffer, - LZ_compress_read( encoder, mid_buffer, buffer_size ) ) < 0 || - LZ_decompress_read( decoder, out_buffer, buffer_size ) != 0 || - LZ_decompress_reset( decoder ) < 0 || - LZ_compress_restart_member( encoder, member_size ) < 0 ) + if( out_size != in_size || memcmp( line_buf, out_buffer, out_size ) ) { - fprintf( stderr, "lzcheck: Can't restart member: %s\n", - LZ_strerror( LZ_decompress_errno( decoder ) ) ); - retval = 3; break; - } - - size = min( 100, read_size ); - if( LZ_compress_write( encoder, in_buffer, size ) != size || - LZ_compress_finish( encoder ) < 0 || - LZ_decompress_write( decoder, mid_buffer, - LZ_compress_read( encoder, mid_buffer, buffer_size ) ) < 0 || - LZ_decompress_read( decoder, out_buffer, 0 ) != 0 || - LZ_decompress_sync_to_member( decoder ) < 0 || - LZ_compress_restart_member( encoder, member_size ) < 0 ) - { - fprintf( stderr, "lzcheck: Can't seek to next member: %s\n", - LZ_strerror( LZ_decompress_errno( decoder ) ) ); - retval = 3; break; + fprintf( stderr, "lzcheck: LZ_compress_restart_member error: " + "in_size = %d, out_size = %d\n", in_size, out_size ); + show_line( line_buf, in_size ); + show_line( out_buffer, out_size ); + retval = 1; } } - LZ_decompress_close( decoder ); - LZ_compress_close( encoder ); + xclose_decoder( decoder, retval == 0 ); + xclose_encoder( encoder, retval == 0 ); return retval; } @@ -223,7 +324,11 @@ int main( const int argc, const char * const argv[] ) { int retval = 0, i; int open_failures = 0; - const bool verbose = ( argc > 2 ); + const char opt = ( argc > 2 && + ( strcmp( argv[1], "-m" ) == 0 || strcmp( argv[1], "-s" ) == 0 ) ) ? + argv[1][1] : 0; + const int first = opt ? 2 : 1; + const bool verbose = ( opt != 0 || argc > first + 1 ); if( argc < 2 ) { @@ -231,8 +336,10 @@ int main( const int argc, const char * const argv[] ) return 1; } - for( i = 1; i < argc && retval == 0; ++ i ) + for( i = first; i < argc && retval == 0; ++i ) { + struct stat st; + if( stat( argv[i], &st ) != 0 || !S_ISREG( st.st_mode ) ) continue; FILE * file = fopen( argv[i], "rb" ); if( !file ) { @@ -241,9 +348,14 @@ int main( const int argc, const char * const argv[] ) } if( verbose ) fprintf( stderr, " Testing file '%s'\n", argv[i] ); - retval = lzcheck( file, 65535 ); /* 65535,16 chooses fast encoder */ - if( retval == 0 ) - { rewind( file ); retval = lzcheck( file, 1 << 20 ); } + /* 65535,16 chooses fast encoder */ + if( opt != 'm' ) retval = check_sync_flush( file, 65535 ); + if( retval == 0 && opt != 'm' ) + { next_line( file, 0 ); retval = check_sync_flush( file, 1 << 20 ); } + if( retval == 0 && opt != 's' ) + { next_line( file, 0 ); retval = check_members( file, 65535 ); } + if( retval == 0 && opt != 's' ) + { next_line( file, 0 ); retval = check_members( file, 1 << 20 ); } fclose( file ); } if( open_failures > 0 && verbose ) -- cgit v1.2.3