/* Lzcheck - Test program for the library lzlib Copyright (C) 2009-2024 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute, and modify it. Usage: lzcheck [-m|-s] filename.txt... This program reads each text file specified and then compresses it, line by line, to test the flushing mechanism and the member restart/reset/sync functions. */ #define _FILE_OFFSET_BITS 64 #include #include #include #include #include #include #include #include #include "lzlib.h" const unsigned long long member_size = INT64_MAX; enum { buffer_size = 32749 }; /* largest prime < 32768 */ uint8_t in_buffer[buffer_size]; uint8_t mid_buffer[buffer_size]; uint8_t out_buffer[buffer_size]; static void show_line( const uint8_t * const buffer, const int size ) { int i; for( i = 0; i < size; ++i ) fputc( isprint( buffer[i] ) ? buffer[i] : '.', stderr ); fputc( '\n', stderr ); } static struct LZ_Encoder * xopen_encoder( const int dictionary_size ) { const int match_len_limit = 16; struct LZ_Encoder * const encoder = LZ_compress_open( dictionary_size, match_len_limit, member_size ); if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) { const bool bad_arg = encoder && ( LZ_compress_errno( encoder ) == LZ_bad_argument ); LZ_compress_close( encoder ); if( bad_arg ) { fputs( "lzcheck: internal error: Invalid argument to encoder.\n", stderr ); exit( 3 ); } fputs( "lzcheck: Not enough memory.\n", stderr ); exit( 1 ); } return encoder; } static struct LZ_Decoder * xopen_decoder( void ) { struct LZ_Decoder * const decoder = LZ_decompress_open(); if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) { LZ_decompress_close( decoder ); fputs( "lzcheck: Not enough memory.\n", stderr ); exit( 1 ); } return decoder; } static void xclose_encoder( struct LZ_Encoder * const encoder, const bool finish ) { if( finish ) { unsigned long long size = 0; LZ_compress_finish( encoder ); while( true ) { const int rd = LZ_compress_read( encoder, mid_buffer, buffer_size ); if( rd < 0 ) { fprintf( stderr, "lzcheck: xclose: LZ_compress_read error: %s\n", LZ_strerror( LZ_compress_errno( encoder ) ) ); exit( 3 ); } size += rd; if( LZ_compress_finished( encoder ) == 1 ) break; } if( size > 0 ) { fprintf( stderr, "lzcheck: %lld bytes remain in encoder.\n", size ); exit( 3 ); } } if( LZ_compress_close( encoder ) < 0 ) exit( 1 ); } static void xclose_decoder( struct LZ_Decoder * const decoder, const bool finish ) { if( finish ) { unsigned long long size = 0; LZ_decompress_finish( decoder ); while( true ) { const int rd = LZ_decompress_read( decoder, out_buffer, buffer_size ); if( rd < 0 ) { fprintf( stderr, "lzcheck: xclose: LZ_decompress_read error: %s\n", LZ_strerror( LZ_decompress_errno( decoder ) ) ); exit( 3 ); } size += rd; if( LZ_decompress_finished( decoder ) == 1 ) break; } if( size > 0 ) { fprintf( stderr, "lzcheck: %lld bytes remain in decoder.\n", size ); exit( 3 ); } } if( LZ_decompress_close( decoder ) < 0 ) exit( 1 ); } /* Return the next (usually newline-terminated) chunk of data from file. The size returned in *sizep is always <= buffer_size. If sizep is a null pointer, rewind the file, reset state, and return. If file is at EOF, return an empty line. */ static const uint8_t * next_line( FILE * const file, int * const sizep ) { static int l = 0; static int read_size = 0; int r; if( !sizep ) { rewind( file ); l = read_size = 0; return in_buffer; } if( l >= read_size ) { l = 0; read_size = fread( in_buffer, 1, buffer_size, file ); if( l >= read_size ) { *sizep = 0; return in_buffer; } /* end of file */ } for( r = l + 1; r < read_size && in_buffer[r-1] != '\n'; ++r ); *sizep = r - l; l = r; return in_buffer + l - *sizep; } static int check_sync_flush( FILE * const file, const int dictionary_size ) { struct LZ_Encoder * const encoder = xopen_encoder( dictionary_size ); struct LZ_Decoder * const decoder = xopen_decoder(); int retval = 0; while( retval <= 1 ) /* test LZ_compress_sync_flush */ { int in_size, mid_size, out_size; int line_size; const uint8_t * const line_buf = next_line( file, &line_size ); if( line_size <= 0 ) break; /* end of file */ in_size = LZ_compress_write( encoder, line_buf, line_size ); if( in_size < 0 ) { fprintf( stderr, "lzcheck: LZ_compress_write error: %s\n", LZ_strerror( LZ_compress_errno( encoder ) ) ); retval = 3; break; } if( in_size < line_size ) { fprintf( stderr, "lzcheck: sync: LZ_compress_write only accepted %d " "of %d bytes\n", in_size, line_size ); mid_size = LZ_compress_read( encoder, mid_buffer, buffer_size ); const int wr = LZ_compress_write( encoder, line_buf + in_size, line_size - in_size ); if( wr < 0 ) { fprintf( stderr, "lzcheck: LZ_compress_write error: %s\n", LZ_strerror( LZ_compress_errno( encoder ) ) ); retval = 3; break; } if( wr + in_size != line_size ) { fprintf( stderr, "lzcheck: sync: LZ_compress_write only accepted %d " "of %d remaining bytes\n", wr, line_size - in_size ); retval = 3; break; } in_size += wr; LZ_compress_sync_flush( encoder ); const int rd = LZ_compress_read( encoder, mid_buffer + mid_size, buffer_size - mid_size ); if( rd > 0 ) mid_size += rd; else if( rd < 0 ) mid_size = -1; } else { LZ_compress_sync_flush( encoder ); if( line_buf[0] & 1 ) /* read all data at once or byte by byte */ mid_size = LZ_compress_read( encoder, mid_buffer, buffer_size ); else for( mid_size = 0; mid_size < buffer_size; ) { const int rd = LZ_compress_read( encoder, mid_buffer + mid_size, 1 ); if( rd > 0 ) mid_size += rd; else { if( rd < 0 ) { mid_size = -1; } break; } } } if( mid_size < 0 ) { fprintf( stderr, "lzcheck: LZ_compress_read error: %s\n", LZ_strerror( LZ_compress_errno( encoder ) ) ); retval = 3; break; } LZ_decompress_write( decoder, mid_buffer, mid_size ); out_size = LZ_decompress_read( decoder, out_buffer, buffer_size ); if( out_size < 0 ) { fprintf( stderr, "lzcheck: LZ_decompress_read error: %s\n", LZ_strerror( LZ_decompress_errno( decoder ) ) ); retval = 3; break; } if( out_size != in_size || memcmp( line_buf, out_buffer, out_size ) ) { fprintf( stderr, "lzcheck: LZ_compress_sync_flush error: " "in_size = %d, out_size = %d\n", in_size, out_size ); show_line( line_buf, in_size ); show_line( out_buffer, out_size ); retval = 1; } } if( retval <= 1 ) { int rd = 0; if( LZ_compress_finish( encoder ) < 0 || ( rd = LZ_compress_read( encoder, mid_buffer, buffer_size ) ) < 0 ) { fprintf( stderr, "lzcheck: Can't drain encoder: %s\n", LZ_strerror( LZ_compress_errno( encoder ) ) ); retval = 3; } LZ_decompress_write( decoder, mid_buffer, rd ); } xclose_decoder( decoder, retval == 0 ); xclose_encoder( encoder, retval == 0 ); return retval; } /* Test member by member decompression without calling LZ_decompress_finish, inserting leading garbage before some members, and resetting the decompressor sometimes. Test that the increase in total_in_size when syncing to member is equal to the size of the leading garbage skipped. */ static int check_members( FILE * const file, const int dictionary_size ) { struct LZ_Encoder * const encoder = xopen_encoder( dictionary_size ); struct LZ_Decoder * const decoder = xopen_decoder(); int retval = 0; while( retval <= 1 ) /* test LZ_compress_restart_member */ { unsigned long long garbage_begin = 0; /* avoid warning from gcc 3.3.6 */ int leading_garbage, in_size, mid_size, out_size; int line_size; const uint8_t * const line_buf = next_line( file, &line_size ); if( line_size <= 0 && /* end of file, write at least 1 member */ LZ_decompress_total_in_size( decoder ) != 0 ) break; if( LZ_compress_finished( encoder ) == 1 ) { if( LZ_compress_restart_member( encoder, member_size ) < 0 ) { fprintf( stderr, "lzcheck: Can't restart member: %s\n", LZ_strerror( LZ_compress_errno( encoder ) ) ); retval = 3; break; } if( line_size >= 2 && line_buf[1] == 'h' ) LZ_decompress_reset( decoder ); } in_size = LZ_compress_write( encoder, line_buf, line_size ); if( in_size < line_size ) fprintf( stderr, "lzcheck: member: LZ_compress_write only accepted %d of %d bytes\n", in_size, line_size ); LZ_compress_finish( encoder ); if( line_size * 3 < buffer_size && line_buf[0] == 't' ) { leading_garbage = line_size; memset( mid_buffer, in_buffer[0], leading_garbage ); garbage_begin = LZ_decompress_total_in_size( decoder ); } else leading_garbage = 0; mid_size = LZ_compress_read( encoder, mid_buffer + leading_garbage, buffer_size - leading_garbage ); if( mid_size < 0 ) { fprintf( stderr, "lzcheck: member: LZ_compress_read error: %s\n", LZ_strerror( LZ_compress_errno( encoder ) ) ); retval = 3; break; } LZ_decompress_write( decoder, mid_buffer, leading_garbage + mid_size ); out_size = LZ_decompress_read( decoder, out_buffer, buffer_size ); if( out_size < 0 ) { if( leading_garbage && ( LZ_decompress_errno( decoder ) == LZ_header_error || LZ_decompress_errno( decoder ) == LZ_data_error ) ) { LZ_decompress_sync_to_member( decoder ); /* skip leading garbage */ const unsigned long long garbage_end = LZ_decompress_total_in_size( decoder ); if( garbage_end - garbage_begin != (unsigned)leading_garbage ) { fprintf( stderr, "lzcheck: member: LZ_decompress_sync_to_member error:\n" " garbage_begin = %llu garbage_end = %llu " "difference = %llu expected = %d\n", garbage_begin, garbage_end, garbage_end - garbage_begin, leading_garbage ); retval = 3; break; } out_size = LZ_decompress_read( decoder, out_buffer, buffer_size ); } if( out_size < 0 ) { fprintf( stderr, "lzcheck: member: LZ_decompress_read error: %s\n", LZ_strerror( LZ_decompress_errno( decoder ) ) ); retval = 3; break; } } if( out_size != in_size || memcmp( line_buf, out_buffer, out_size ) ) { fprintf( stderr, "lzcheck: LZ_compress_restart_member error: " "in_size = %d, out_size = %d\n", in_size, out_size ); show_line( line_buf, in_size ); show_line( out_buffer, out_size ); retval = 1; } } xclose_decoder( decoder, retval == 0 ); xclose_encoder( encoder, retval == 0 ); return retval; } int main( const int argc, const char * const argv[] ) { int retval = 0, i; int open_failures = 0; const char opt = ( argc > 2 && ( strcmp( argv[1], "-m" ) == 0 || strcmp( argv[1], "-s" ) == 0 ) ) ? argv[1][1] : 0; const int first = opt ? 2 : 1; const bool verbose = ( opt != 0 || argc > first + 1 ); if( argc < 2 ) { fputs( "Usage: lzcheck [-m|-s] filename.txt...\n", stderr ); return 1; } for( i = first; i < argc && retval == 0; ++i ) { struct stat st; if( stat( argv[i], &st ) != 0 || !S_ISREG( st.st_mode ) ) continue; FILE * file = fopen( argv[i], "rb" ); if( !file ) { fprintf( stderr, "lzcheck: %s: Can't open file for reading.\n", argv[i] ); ++open_failures; continue; } if( verbose ) fprintf( stderr, " Testing file '%s'\n", argv[i] ); /* 65535,16 chooses fast encoder */ if( opt != 'm' ) retval = check_sync_flush( file, 65535 ); if( retval == 0 && opt != 'm' ) { next_line( file, 0 ); retval = check_sync_flush( file, 1 << 20 ); } if( retval == 0 && opt != 's' ) { next_line( file, 0 ); retval = check_members( file, 65535 ); } if( retval == 0 && opt != 's' ) { next_line( file, 0 ); retval = check_members( file, 1 << 20 ); } fclose( file ); } if( open_failures > 0 && verbose ) fprintf( stderr, "lzcheck: warning: %d %s failed to open.\n", open_failures, ( open_failures == 1 ) ? "file" : "files" ); if( retval == 0 && open_failures ) retval = 1; return retval; }