diff options
-rw-r--r-- | ChangeLog | 13 | ||||
-rw-r--r-- | INSTALL | 2 | ||||
-rw-r--r-- | Makefile.in | 13 | ||||
-rw-r--r-- | NEWS | 28 | ||||
-rw-r--r-- | README | 12 | ||||
-rw-r--r-- | carg_parser.c | 9 | ||||
-rw-r--r-- | carg_parser.h | 2 | ||||
-rwxr-xr-x | configure | 21 | ||||
-rw-r--r-- | decoder.c | 108 | ||||
-rw-r--r-- | decoder.h | 245 | ||||
-rw-r--r-- | doc/lunzip.1 | 9 | ||||
-rw-r--r-- | file_index.c | 268 | ||||
-rw-r--r-- | file_index.h | 90 | ||||
-rw-r--r-- | list.c | 123 | ||||
-rw-r--r-- | lzip.h | 33 | ||||
-rw-r--r-- | main.c | 213 | ||||
-rwxr-xr-x | testsuite/check.sh | 201 | ||||
-rw-r--r-- | testsuite/test.txt.lz | bin | 7376 -> 7376 bytes |
18 files changed, 1006 insertions, 384 deletions
@@ -1,3 +1,14 @@ +2017-04-13 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.9 released. + * The option '-l, --list' has been ported from lziprecover. + * Don't allow mixing different operations (-d, -l or -t). + * Decompression time has been reduced by 7%. + * main.c: Continue testing if any input file is a terminal. + * main.c: Show trailing data in both hexadecimal and ASCII. + * file_index.c: Improve detection of bad dict and trailing data. + * lzip.h: Unified messages for bad magic, trailing data, etc. + 2016-05-12 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.8 released. @@ -69,7 +80,7 @@ * Created from the decompression code of clzip 1.1. -Copyright (C) 2010-2016 Antonio Diaz Diaz. +Copyright (C) 2010-2017 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and @@ -61,7 +61,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2010-2016 Antonio Diaz Diaz. +Copyright (C) 2010-2017 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Makefile.in b/Makefile.in index e4ff366..4c0d3a5 100644 --- a/Makefile.in +++ b/Makefile.in @@ -7,13 +7,14 @@ INSTALL_DIR = $(INSTALL) -d -m 755 SHELL = /bin/sh CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 -objs = carg_parser.o decoder.o main.o +objs = carg_parser.o file_index.o list.o decoder.o main.o .PHONY : all install install-bin install-info install-man \ install-strip install-compress install-strip-compress \ install-bin-strip install-info-compress install-man-compress \ - install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \ + install-as-lzip \ + uninstall uninstall-bin uninstall-info uninstall-man \ doc info man check dist clean distclean all : $(progname) @@ -30,6 +31,8 @@ main.o : main.c $(objs) : Makefile carg_parser.o : carg_parser.h decoder.o : lzip.h decoder.h +file_index.o : lzip.h file_index.h +list.o : lzip.h file_index.h main.o : carg_parser.h lzip.h decoder.h @@ -112,11 +115,11 @@ dist : doc $(DISTNAME)/README \ $(DISTNAME)/configure \ $(DISTNAME)/doc/$(progname).1 \ + $(DISTNAME)/*.h \ + $(DISTNAME)/*.c \ $(DISTNAME)/testsuite/check.sh \ $(DISTNAME)/testsuite/test.txt \ - $(DISTNAME)/testsuite/test.txt.lz \ - $(DISTNAME)/*.h \ - $(DISTNAME)/*.c + $(DISTNAME)/testsuite/test.txt.lz rm -f $(DISTNAME) lzip -v -9 $(DISTNAME).tar @@ -1,24 +1,14 @@ -Changes in version 1.8: +Changes in version 1.9: -The option "-a, --trailing-error", which makes lunzip exit with error -status 2 if any remaining input is detected after decompressing the last -member, has been added. +The option '-l, --list' has been ported from lziprecover. -Lunzip now verifies that the output file is regular when "low memory" -mode is requested. +It is now an error to specify two or more different operations in the +command line (--decompress, --list or --test). -Up to 6 bytes of trailing data are printed if "-vvvv" is specified. +Decompression time has been reduced by 7%. -The test of the value remaining in the range decoder has been removed. -(After extensive testing it has been found useless to detect corruption -in the decompressed data. Eliminating it reduces the number of false -positives for corruption and makes error detection more accurate). +In test mode, lunzip now continues checking the rest of the files if any +input file is a terminal. -When decompressing, the file specified with the '--output' option is now -deleted if the input is a terminal. - -Some error messages have been adjusted to be identical to those of -lzip-1.18. - -A harmless check failure on Windows, caused by the failed comparison of -a message in text mode, has been fixed. +Trailing data are now shown both in hexadecimal and as a string of +printable ASCII characters. @@ -16,11 +16,11 @@ availability: merging of damaged copies of a file. * The lzip format is as simple as possible (but not simpler). The - lzip manual provides the code of a simple decompressor along with a - detailed explanation of how it works, so that with the only help of - the lzip manual it would be possible for a digital archaeologist to - extract the data from a lzip file long after quantum computers - eventually render LZMA obsolete. + lzip manual provides the source code of a simple decompressor along + with a detailed explanation of how it works, so that with the only + help of the lzip manual it would be possible for a digital + archaeologist to extract the data from a lzip file long after + quantum computers eventually render LZMA obsolete. * Additionally the lzip reference implementation is copylefted, which guarantees that it will remain free forever. @@ -83,7 +83,7 @@ range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI). -Copyright (C) 2010-2016 Antonio Diaz Diaz. +Copyright (C) 2010-2017 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/carg_parser.c b/carg_parser.c index 3d4e89f..6850643 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2016 Antonio Diaz Diaz. + Copyright (C) 2006-2017 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -94,7 +94,7 @@ static char parse_long_option( struct Arg_parser * const ap, else if( index < 0 ) index = i; /* First nonexact match found */ else if( options[index].code != options[i].code || options[index].has_arg != options[i].has_arg ) - ambig = 1; /* Second or later nonexact match found */ + ambig = 1; /* Second or later nonexact match found */ } if( ambig && !exact ) @@ -230,7 +230,9 @@ char ap_init( struct Arg_parser * const ap, } else { - if( !in_order ) + if( in_order ) + { if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; } + else { void * tmp = ap_resize_buffer( non_options, ( non_options_size + 1 ) * sizeof *non_options ); @@ -238,7 +240,6 @@ char ap_init( struct Arg_parser * const ap, non_options = (const char **)tmp; non_options[non_options_size++] = argv[argind++]; } - else if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; } } if( ap->error ) free_data( ap ); diff --git a/carg_parser.h b/carg_parser.h index e918942..c4ce31d 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2016 Antonio Diaz Diaz. + Copyright (C) 2006-2017 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lunzip - Decompressor for the lzip format -# Copyright (C) 2010-2016 Antonio Diaz Diaz. +# Copyright (C) 2010-2017 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=lunzip -pkgversion=1.8 +pkgversion=1.9 progname=lunzip srctrigger=doc/${progname}.1 @@ -26,11 +26,11 @@ CFLAGS='-Wall -W -O2' LDFLAGS= # checking whether we are using GNU C. -if /bin/sh -c "${CC} --version" > /dev/null 2>&1 ; then true -else +/bin/sh -c "${CC} --version" > /dev/null 2>&1 || + { CC=cc - CFLAGS='-W -O2' -fi + CFLAGS=-O2 + } # Loop over all args args= @@ -52,9 +52,12 @@ while [ $# != 0 ] ; do # Process the options case ${option} in --help | -h) - echo "Usage: configure [options]" + echo "Usage: $0 [OPTION]... [VAR=VALUE]..." + echo + echo "To assign makefile variables (e.g., CC, CFLAGS...), specify them as" + echo "arguments to configure in the form VAR=VALUE." echo - echo "Options: [defaults in brackets]" + echo "Options and variables: [defaults in brackets]" echo " -h, --help display this help and exit" echo " -V, --version output version information and exit" echo " --srcdir=DIR find the sources in DIR [. or ..]" @@ -165,7 +168,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lunzip - Decompressor for the lzip format -# Copyright (C) 2010-2016 Antonio Diaz Diaz. +# Copyright (C) 2010-2017 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2016 Antonio Diaz Diaz. + Copyright (C) 2010-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -53,7 +53,7 @@ void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ) /* Returns the number of bytes really read. If (returned value < size) and (errno == 0), means EOF was reached. */ -static int readblock( const int fd, uint8_t * const buf, const int size ) +int readblock( const int fd, uint8_t * const buf, const int size ) { int sz = 0; errno = 0; @@ -87,10 +87,10 @@ static int writeblock( const int fd, const uint8_t * const buf, const int size ) } -int seek_read( const int fd, uint8_t * const buf, const int size, - const int offset ) +unsigned seek_read_back( const int fd, uint8_t * const buf, const int size, + const int offset ) { - if( lseek( fd, offset, SEEK_END ) >= 0 ) + if( lseek( fd, -offset, SEEK_END ) >= 0 ) return readblock( fd, buf, size ); return 0; } @@ -121,8 +121,9 @@ void LZd_flush_data( struct LZ_decoder * const d ) writeblock( d->outfd, d->buffer + d->stream_pos, size ) != size ) { show_error( "Write error", errno, false ); cleanup_and_fail( 1 ); } if( d->pos >= d->buffer_size ) - { d->partial_data_pos += d->pos; d->pos = 0; - if( d->partial_data_pos >= d->dictionary_size ) d->pos_wrapped = true; } + { d->partial_data_pos += d->pos; d->pos = 0; d->pos_wrapped = true; + if( d->partial_data_pos >= d->dictionary_size ) + d->pos_wrapped_dic = true; } d->stream_pos = d->pos; } } @@ -185,7 +186,7 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d, ( 8.0 * member_size ) / data_size, 100.0 * ( 1.0 - ( (double)member_size / data_size ) ) ); if( !error && verbosity >= 4 ) - fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ", + fprintf( stderr, "CRC %08X, decompressed %9llu, compressed %8llu. ", LZd_crc( d ), data_size, member_size ); return !error; } @@ -198,49 +199,77 @@ int LZd_decode_member( struct LZ_decoder * const d, { struct Range_decoder * const rdec = d->rdec; void (* const copy_block) - ( struct LZ_decoder * const d, const int distance, int len ) = - ( (unsigned)d->buffer_size >= d->dictionary_size ) ? + ( struct LZ_decoder * const d, const unsigned distance, unsigned len ) = + ( d->buffer_size >= d->dictionary_size ) ? &LZd_copy_block : &LZd_copy_block2; + Bit_model bm_literal[1<<literal_context_bits][0x300]; + Bit_model bm_match[states][pos_states]; + Bit_model bm_rep[states]; + Bit_model bm_rep0[states]; + Bit_model bm_rep1[states]; + Bit_model bm_rep2[states]; + Bit_model bm_len[states][pos_states]; + Bit_model bm_dis_slot[len_states][1<<dis_slot_bits]; + Bit_model bm_dis[modeled_distances-end_dis_model+1]; + Bit_model bm_align[dis_align_size]; + struct Len_model match_len_model; + struct Len_model rep_len_model; unsigned rep0 = 0; /* rep[0-3] latest four distances */ unsigned rep1 = 0; /* used for efficient coding of */ unsigned rep2 = 0; /* repeated distances */ unsigned rep3 = 0; State state = 0; + Bm_array_init( bm_literal[0], (1 << literal_context_bits) * 0x300 ); + Bm_array_init( bm_match[0], states * pos_states ); + Bm_array_init( bm_rep, states ); + Bm_array_init( bm_rep0, states ); + Bm_array_init( bm_rep1, states ); + Bm_array_init( bm_rep2, states ); + Bm_array_init( bm_len[0], states * pos_states ); + Bm_array_init( bm_dis_slot[0], len_states * (1 << dis_slot_bits) ); + Bm_array_init( bm_dis, modeled_distances - end_dis_model + 1 ); + Bm_array_init( bm_align, dis_align_size ); + Lm_init( &match_len_model ); + Lm_init( &rep_len_model ); + Rd_load( rdec ); while( !Rd_finished( rdec ) ) { const int pos_state = LZd_data_position( d ) & pos_state_mask; - if( Rd_decode_bit( rdec, &d->bm_match[state][pos_state] ) == 0 ) /* 1st bit */ + if( Rd_decode_bit( rdec, &bm_match[state][pos_state] ) == 0 ) /* 1st bit */ { - const uint8_t prev_byte = LZd_peek_prev( d ); + Bit_model * const bm = bm_literal[get_lit_state(LZd_peek_prev( d ))]; if( St_is_char( state ) ) { state -= ( state < 4 ) ? state : 3; - LZd_put_byte( d, Rd_decode_tree( rdec, - d->bm_literal[get_lit_state(prev_byte)], 8 ) ); + LZd_put_byte( d, Rd_decode_tree8( rdec, bm ) ); } else { state -= ( state < 10 ) ? 3 : 6; - LZd_put_byte( d, Rd_decode_matched( rdec, - d->bm_literal[get_lit_state(prev_byte)], - LZd_peek( d, rep0 ) ) ); + LZd_put_byte( d, Rd_decode_matched( rdec, bm, LZd_peek( d, rep0 ) ) ); } } else /* match or repeated match */ { int len; - if( Rd_decode_bit( rdec, &d->bm_rep[state] ) != 0 ) /* 2nd bit */ + if( Rd_decode_bit( rdec, &bm_rep[state] ) != 0 ) /* 2nd bit */ { - if( Rd_decode_bit( rdec, &d->bm_rep0[state] ) != 0 ) /* 3rd bit */ + if( Rd_decode_bit( rdec, &bm_rep0[state] ) == 0 ) /* 3rd bit */ + { + if( Rd_decode_bit( rdec, &bm_len[state][pos_state] ) == 0 ) /* 4th bit */ + { state = St_set_short_rep( state ); + LZd_put_byte( d, LZd_peek( d, rep0 ) ); continue; } + } + else { unsigned distance; - if( Rd_decode_bit( rdec, &d->bm_rep1[state] ) == 0 ) /* 4th bit */ + if( Rd_decode_bit( rdec, &bm_rep1[state] ) == 0 ) /* 4th bit */ distance = rep1; else { - if( Rd_decode_bit( rdec, &d->bm_rep2[state] ) == 0 ) /* 5th bit */ + if( Rd_decode_bit( rdec, &bm_rep2[state] ) == 0 ) /* 5th bit */ distance = rep2; else { distance = rep3; rep3 = rep2; } @@ -249,36 +278,29 @@ int LZd_decode_member( struct LZ_decoder * const d, rep1 = rep0; rep0 = distance; } - else - { - if( Rd_decode_bit( rdec, &d->bm_len[state][pos_state] ) == 0 ) /* 4th bit */ - { state = St_set_short_rep( state ); - LZd_put_byte( d, LZd_peek( d, rep0 ) ); continue; } - } state = St_set_rep( state ); - len = min_match_len + Rd_decode_len( rdec, &d->rep_len_model, pos_state ); + len = min_match_len + Rd_decode_len( rdec, &rep_len_model, pos_state ); } else /* match */ { - int dis_slot; - const unsigned rep0_saved = rep0; - len = min_match_len + Rd_decode_len( rdec, &d->match_len_model, pos_state ); - dis_slot = Rd_decode_tree6( rdec, d->bm_dis_slot[get_len_state(len)] ); - if( dis_slot < start_dis_model ) rep0 = dis_slot; - else + unsigned distance; + len = min_match_len + Rd_decode_len( rdec, &match_len_model, pos_state ); + distance = Rd_decode_tree6( rdec, bm_dis_slot[get_len_state(len)] ); + if( distance >= start_dis_model ) { + const unsigned dis_slot = distance; const int direct_bits = ( dis_slot >> 1 ) - 1; - rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits; if( dis_slot < end_dis_model ) - rep0 += Rd_decode_tree_reversed( rdec, - d->bm_dis + rep0 - dis_slot - 1, direct_bits ); + distance += Rd_decode_tree_reversed( rdec, + bm_dis + ( distance - dis_slot ), direct_bits ); else { - rep0 += Rd_decode( rdec, direct_bits - dis_align_bits ) << dis_align_bits; - rep0 += Rd_decode_tree_reversed4( rdec, d->bm_align ); - if( rep0 == 0xFFFFFFFFU ) /* marker found */ + distance += + Rd_decode( rdec, direct_bits - dis_align_bits ) << dis_align_bits; + distance += Rd_decode_tree_reversed4( rdec, bm_align ); + if( distance == 0xFFFFFFFFU ) /* marker found */ { - rep0 = rep0_saved; Rd_normalize( rdec ); LZd_flush_data( d ); if( len == min_match_len ) /* End Of Stream marker */ @@ -298,10 +320,10 @@ int LZd_decode_member( struct LZ_decoder * const d, } } } - rep3 = rep2; rep2 = rep1; rep1 = rep0_saved; + rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance; state = St_set_match( state ); if( rep0 >= d->dictionary_size || - ( rep0 >= LZd_data_position( d ) && !d->pos_wrapped ) ) + ( rep0 >= LZd_data_position( d ) && !d->pos_wrapped_dic ) ) { LZd_flush_data( d ); return 1; } } copy_block( d, rep0, len ); @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2016 Antonio Diaz Diaz. + Copyright (C) 2010-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -56,7 +56,7 @@ Rd_member_position( const struct Range_decoder * const rdec ) { return rdec->partial_member_pos + rdec->pos; } static inline void Rd_reset_member_position( struct Range_decoder * const rdec ) - { rdec->partial_member_pos = -rdec->pos; } + { rdec->partial_member_pos = 0; rdec->partial_member_pos -= rdec->pos; } static inline uint8_t Rd_get_byte( struct Range_decoder * const rdec ) { @@ -68,23 +68,22 @@ static inline uint8_t Rd_get_byte( struct Range_decoder * const rdec ) static inline int Rd_read_data( struct Range_decoder * const rdec, uint8_t * const outbuf, const int size ) { - int rest = size; - while( rest > 0 && !Rd_finished( rdec ) ) + int sz = 0; + while( sz < size && !Rd_finished( rdec ) ) { - const int rd = min( rest, rdec->stream_pos - rdec->pos ); - memcpy( outbuf + size - rest, rdec->buffer + rdec->pos, rd ); + const int rd = min( size - sz, rdec->stream_pos - rdec->pos ); + memcpy( outbuf + sz, rdec->buffer + rdec->pos, rd ); rdec->pos += rd; - rest -= rd; + sz += rd; } - return size - rest; + return sz; } static inline void Rd_load( struct Range_decoder * const rdec ) { int i; rdec->code = 0; - for( i = 0; i < 5; ++i ) - rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); + for( i = 0; i < 5; ++i ) rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); rdec->range = 0xFFFFFFFFU; rdec->code &= rdec->range; /* make sure that first byte is discarded */ } @@ -92,34 +91,30 @@ static inline void Rd_load( struct Range_decoder * const rdec ) static inline void Rd_normalize( struct Range_decoder * const rdec ) { if( rdec->range <= 0x00FFFFFFU ) - { - rdec->range <<= 8; - rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); - } + { rdec->range <<= 8; rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); } } -static inline int Rd_decode( struct Range_decoder * const rdec, - const int num_bits ) +static inline unsigned Rd_decode( struct Range_decoder * const rdec, + const int num_bits ) { - int symbol = 0; + unsigned symbol = 0; int i; for( i = num_bits; i > 0; --i ) { - uint32_t mask; + bool bit; Rd_normalize( rdec ); rdec->range >>= 1; /* symbol <<= 1; */ /* if( rdec->code >= rdec->range ) { rdec->code -= rdec->range; symbol |= 1; } */ - mask = 0U - (rdec->code < rdec->range); - rdec->code -= rdec->range; - rdec->code += rdec->range & mask; - symbol = (symbol << 1) + (mask + 1); + bit = ( rdec->code >= rdec->range ); + symbol = ( symbol << 1 ) + bit; + rdec->code -= rdec->range & ( 0U - bit ); } return symbol; } -static inline int Rd_decode_bit( struct Range_decoder * const rdec, - Bit_model * const probability ) +static inline unsigned Rd_decode_bit( struct Range_decoder * const rdec, + Bit_model * const probability ) { uint32_t bound; Rd_normalize( rdec ); @@ -139,20 +134,20 @@ static inline int Rd_decode_bit( struct Range_decoder * const rdec, } } -static inline int Rd_decode_tree( struct Range_decoder * const rdec, - Bit_model bm[], const int num_bits ) +static inline unsigned Rd_decode_tree3( struct Range_decoder * const rdec, + Bit_model bm[] ) { - int symbol = 1; - int i; - for( i = num_bits; i > 0; --i ) - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - return symbol - (1 << num_bits); + unsigned symbol = 1; + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + return symbol & 7; } -static inline int Rd_decode_tree6( struct Range_decoder * const rdec, - Bit_model bm[] ) +static inline unsigned Rd_decode_tree6( struct Range_decoder * const rdec, + Bit_model bm[] ) { - int symbol = 1; + unsigned symbol = 1; symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); @@ -162,69 +157,69 @@ static inline int Rd_decode_tree6( struct Range_decoder * const rdec, return symbol & 0x3F; } -static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec, - Bit_model bm[], const int num_bits ) +static inline unsigned Rd_decode_tree8( struct Range_decoder * const rdec, + Bit_model bm[] ) { - int model = 1; - int symbol = 0; + unsigned symbol = 1; + int i; + for( i = 0; i < 8; ++i ) + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + return symbol & 0xFF; + } + +static inline unsigned +Rd_decode_tree_reversed( struct Range_decoder * const rdec, + Bit_model bm[], const int num_bits ) + { + unsigned model = 1; + unsigned symbol = 0; int i; for( i = 0; i < num_bits; ++i ) { - const bool bit = Rd_decode_bit( rdec, &bm[model] ); - model <<= 1; - if( bit ) { ++model; symbol |= (1 << i); } + const unsigned bit = Rd_decode_bit( rdec, &bm[model] ); + model = ( model << 1 ) + bit; + symbol |= ( bit << i ); } return symbol; } -static inline int Rd_decode_tree_reversed4( struct Range_decoder * const rdec, - Bit_model bm[] ) +static inline unsigned +Rd_decode_tree_reversed4( struct Range_decoder * const rdec, Bit_model bm[] ) { - int model = 1; - int symbol = Rd_decode_bit( rdec, &bm[model] ); - int bit; - model = (model << 1) + symbol; - bit = Rd_decode_bit( rdec, &bm[model] ); - model = (model << 1) + bit; symbol |= (bit << 1); + unsigned symbol = Rd_decode_bit( rdec, &bm[1] ); + unsigned model = 2 + symbol; + unsigned bit = Rd_decode_bit( rdec, &bm[model] ); + model = ( model << 1 ) + bit; symbol |= ( bit << 1 ); bit = Rd_decode_bit( rdec, &bm[model] ); - model = (model << 1) + bit; symbol |= (bit << 2); - if( Rd_decode_bit( rdec, &bm[model] ) ) symbol |= 8; + model = ( model << 1 ) + bit; symbol |= ( bit << 2 ); + symbol |= ( Rd_decode_bit( rdec, &bm[model] ) << 3 ); return symbol; } -static inline int Rd_decode_matched( struct Range_decoder * const rdec, - Bit_model bm[], int match_byte ) +static inline unsigned Rd_decode_matched( struct Range_decoder * const rdec, + Bit_model bm[], unsigned match_byte ) { - Bit_model * const bm1 = bm + 0x100; - int symbol = 1; - while( symbol < 0x100 ) + unsigned symbol = 1; + unsigned mask = 0x100; + while( true ) { - int match_bit, bit; - match_byte <<= 1; - match_bit = match_byte & 0x100; - bit = Rd_decode_bit( rdec, &bm1[match_bit+symbol] ); - symbol = ( symbol << 1 ) | bit; - if( match_bit != bit << 8 ) - { - while( symbol < 0x100 ) - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - break; - } + const unsigned match_bit = ( match_byte <<= 1 ) & mask; + const unsigned bit = Rd_decode_bit( rdec, &bm[symbol+match_bit+mask] ); + symbol = ( symbol << 1 ) + bit; + if( symbol > 0xFF ) return symbol & 0xFF; + mask &= ~(match_bit ^ (bit << 8)); /* if( match_bit != bit ) mask = 0; */ } - return symbol & 0xFF; } -static inline int Rd_decode_len( struct Range_decoder * const rdec, - struct Len_model * const lm, - const int pos_state ) +static inline unsigned Rd_decode_len( struct Range_decoder * const rdec, + struct Len_model * const lm, + const int pos_state ) { if( Rd_decode_bit( rdec, &lm->choice1 ) == 0 ) - return Rd_decode_tree( rdec, lm->bm_low[pos_state], len_low_bits ); + return Rd_decode_tree3( rdec, lm->bm_low[pos_state] ); if( Rd_decode_bit( rdec, &lm->choice2 ) == 0 ) - return len_low_symbols + - Rd_decode_tree( rdec, lm->bm_mid[pos_state], len_mid_bits ); - return len_low_symbols + len_mid_symbols + - Rd_decode_tree( rdec, lm->bm_high, len_high_bits ); + return len_low_symbols + Rd_decode_tree3( rdec, lm->bm_mid[pos_state] ); + return len_low_symbols + len_mid_symbols + Rd_decode_tree8( rdec, lm->bm_high ); } @@ -233,49 +228,37 @@ struct LZ_decoder unsigned long long partial_data_pos; struct Range_decoder * rdec; unsigned dictionary_size; - int buffer_size; + unsigned buffer_size; uint8_t * buffer; /* output buffer */ - int pos; /* current pos in buffer */ - int stream_pos; /* first byte not yet written to file */ + unsigned pos; /* current pos in buffer */ + unsigned stream_pos; /* first byte not yet written to file */ uint32_t crc; int outfd; /* output file descriptor */ bool pos_wrapped; - - Bit_model bm_literal[1<<literal_context_bits][0x300]; - Bit_model bm_match[states][pos_states]; - Bit_model bm_rep[states]; - Bit_model bm_rep0[states]; - Bit_model bm_rep1[states]; - Bit_model bm_rep2[states]; - Bit_model bm_len[states][pos_states]; - Bit_model bm_dis_slot[len_states][1<<dis_slot_bits]; - Bit_model bm_dis[modeled_distances-end_dis_model]; - Bit_model bm_align[dis_align_size]; - - struct Len_model match_len_model; - struct Len_model rep_len_model; + bool pos_wrapped_dic; }; void LZd_flush_data( struct LZ_decoder * const d ); -int seek_read( const int fd, uint8_t * const buf, const int size, - const int offset ); +unsigned seek_read_back( const int fd, uint8_t * const buf, const int size, + const int offset ); static inline uint8_t LZd_peek_prev( const struct LZ_decoder * const d ) { - const int i = ( ( d->pos > 0 ) ? d->pos : d->buffer_size ) - 1; - return d->buffer[i]; + if( d->pos > 0 ) return d->buffer[d->pos-1]; + if( d->pos_wrapped ) return d->buffer[d->buffer_size-1]; + return 0; /* prev_byte of first byte */ } static inline uint8_t LZd_peek( const struct LZ_decoder * const d, - const int distance ) + const unsigned distance ) { uint8_t b; - const int i = d->pos - distance - 1; - if( i >= 0 ) b = d->buffer[i]; - else if( i + d->buffer_size >= d->pos ) - b = d->buffer[i+d->buffer_size]; - else if( seek_read( d->outfd, &b, 1, i - d->stream_pos ) != 1 ) + if( d->pos > distance ) b = d->buffer[d->pos-distance-1]; + else if( d->buffer_size > distance ) + b = d->buffer[d->buffer_size+d->pos-distance-1]; + else if( seek_read_back( d->outfd, &b, 1, + distance + 1 + d->stream_pos - d->pos ) != 1 ) { show_error( "Seek error", errno, false ); cleanup_and_fail( 1 ); } return b; } @@ -287,19 +270,28 @@ static inline void LZd_put_byte( struct LZ_decoder * const d, const uint8_t b ) } static inline void LZd_copy_block( struct LZ_decoder * const d, - const int distance, int len ) + const unsigned distance, unsigned len ) { - int i = d->pos - distance - 1; - bool fast; - if( i < 0 ) - { i += d->buffer_size; - fast = ( len <= d->buffer_size - i && len <= i - d->pos ); } + unsigned lpos = d->pos, i = lpos - distance - 1; + bool fast, fast2; + if( lpos > distance ) + { + fast = ( len < d->buffer_size - lpos ); + fast2 = ( fast && len <= lpos - i ); + } else - fast = ( len < d->buffer_size - d->pos && len <= d->pos - i ); - if( fast ) /* no wrap, no overlap */ { - memcpy( d->buffer + d->pos, d->buffer + i, len ); + i += d->buffer_size; + fast = ( len < d->buffer_size - i ); /* (i == pos) may happen */ + fast2 = ( fast && len <= i - lpos ); + } + if( fast ) /* no wrap */ + { d->pos += len; + if( fast2 ) /* no wrap, no overlap */ + memcpy( d->buffer + lpos, d->buffer + i, len ); + else + for( ; len > 0; --len ) d->buffer[lpos++] = d->buffer[i++]; } else for( ; len > 0; --len ) { @@ -310,16 +302,16 @@ static inline void LZd_copy_block( struct LZ_decoder * const d, } static inline void LZd_copy_block2( struct LZ_decoder * const d, - const int distance, int len ) + const unsigned distance, unsigned len ) { - if( distance < d->buffer_size ) /* block is in buffer */ + if( d->buffer_size > distance ) /* block is in buffer */ { LZd_copy_block( d, distance, len ); return; } if( len < d->buffer_size - d->pos ) /* no wrap */ { - const int offset = d->pos - d->stream_pos - distance - 1; - if( len <= -offset ) /* block is in file */ + const unsigned offset = distance + 1 + d->stream_pos - d->pos; + if( len <= offset ) /* block is in file */ { - if( seek_read( d->outfd, d->buffer + d->pos, len, offset ) != len ) + if( seek_read_back( d->outfd, d->buffer + d->pos, len, offset ) != len ) { show_error( "Seek error", errno, false ); cleanup_and_fail( 1 ); } d->pos += len; return; @@ -331,8 +323,8 @@ static inline void LZd_copy_block2( struct LZ_decoder * const d, static inline bool LZd_init( struct LZ_decoder * const d, struct Range_decoder * const rde, - const int buffer_size, - const int dict_size, const int ofd ) + const unsigned buffer_size, + const unsigned dict_size, const int ofd ) { d->partial_data_pos = 0; d->rdec = rde; @@ -345,20 +337,7 @@ static inline bool LZd_init( struct LZ_decoder * const d, d->crc = 0xFFFFFFFFU; d->outfd = ofd; d->pos_wrapped = false; - - Bm_array_init( d->bm_literal[0], (1 << literal_context_bits) * 0x300 ); - Bm_array_init( d->bm_match[0], states * pos_states ); - Bm_array_init( d->bm_rep, states ); - Bm_array_init( d->bm_rep0, states ); - Bm_array_init( d->bm_rep1, states ); - Bm_array_init( d->bm_rep2, states ); - Bm_array_init( d->bm_len[0], states * pos_states ); - Bm_array_init( d->bm_dis_slot[0], len_states * (1 << dis_slot_bits) ); - Bm_array_init( d->bm_dis, modeled_distances - end_dis_model ); - Bm_array_init( d->bm_align, dis_align_size ); - Lm_init( &d->match_len_model ); - Lm_init( &d->rep_len_model ); - d->buffer[d->buffer_size-1] = 0; /* prev_byte of first byte */ + d->pos_wrapped_dic = false; return true; } diff --git a/doc/lunzip.1 b/doc/lunzip.1 index 53b3faf..c83f901 100644 --- a/doc/lunzip.1 +++ b/doc/lunzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH LUNZIP "1" "May 2016" "lunzip 1.8" "User Commands" +.TH LUNZIP "1" "April 2017" "lunzip 1.9" "User Commands" .SH NAME lunzip \- decompressor for the lzip format .SH SYNOPSIS @@ -44,6 +44,9 @@ overwrite existing output files \fB\-k\fR, \fB\-\-keep\fR keep (don't delete) input files .TP +\fB\-l\fR, \fB\-\-list\fR +print (un)compressed file sizes +.TP \fB\-o\fR, \fB\-\-output=\fR<file> if reading standard input, write to <file> .TP @@ -63,6 +66,8 @@ If no file names are given, or if a file is '\-', lunzip decompresses from standard input to standard output. Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... +Buffer sizes 12 to 29 are interpreted as powers of two, meaning 2^12 +to 2^29 bytes. .PP Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or @@ -73,7 +78,7 @@ Report bugs to lzip\-bug@nongnu.org .br Lunzip home page: http://www.nongnu.org/lzip/lunzip.html .SH COPYRIGHT -Copyright \(co 2016 Antonio Diaz Diaz. +Copyright \(co 2017 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. diff --git a/file_index.c b/file_index.c new file mode 100644 index 0000000..e737608 --- /dev/null +++ b/file_index.c @@ -0,0 +1,268 @@ +/* Lunzip - Decompressor for the lzip format + Copyright (C) 2010-2017 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> + +#include "lzip.h" +#include "file_index.h" + + +static int seek_read( const int fd, uint8_t * const buf, const int size, + const long long pos ) + { + if( lseek( fd, pos, SEEK_SET ) == pos ) + return readblock( fd, buf, size ); + return 0; + } + + +static bool add_error( struct File_index * const fi, const char * const msg ) + { + const int len = strlen( msg ); + void * tmp = resize_buffer( fi->error, fi->error_size + len + 1 ); + if( !tmp ) return false; + fi->error = (char *)tmp; + strncpy( fi->error + fi->error_size, msg, len + 1 ); + fi->error_size += len; + return true; + } + + +static bool push_back_member( struct File_index * const fi, + const long long dp, const long long ds, + const long long mp, const long long ms, + const unsigned dict_size ) + { + struct Member * p; + void * tmp = resize_buffer( fi->member_vector, + ( fi->members + 1 ) * sizeof fi->member_vector[0] ); + if( !tmp ) + { add_error( fi, "Not enough memory." ); fi->retval = 1; return false; } + fi->member_vector = (struct Member *)tmp; + p = &(fi->member_vector[fi->members]); + init_member( p, dp, ds, mp, ms, dict_size ); + ++fi->members; + return true; + } + + +static void Fi_free_member_vector( struct File_index * const fi ) + { + if( fi->member_vector ) + { free( fi->member_vector ); fi->member_vector = 0; } + fi->members = 0; + } + + +static void Fi_reverse_member_vector( struct File_index * const fi ) + { + struct Member tmp; + long i; + for( i = 0; i < fi->members / 2; ++i ) + { + tmp = fi->member_vector[i]; + fi->member_vector[i] = fi->member_vector[fi->members-i-1]; + fi->member_vector[fi->members-i-1] = tmp; + } + } + + +static void Fi_set_errno_error( struct File_index * const fi, + const char * const msg ) + { + add_error( fi, msg ); add_error( fi, strerror( errno ) ); + fi->retval = 1; + } + +static void Fi_set_num_error( struct File_index * const fi, + const char * const msg, unsigned long long num ) + { + char buf[80]; + snprintf( buf, sizeof buf, "%s%llu", msg, num ); + add_error( fi, buf ); + fi->retval = 2; + } + + +/* If successful, push last member and set pos to member header. */ +static bool Fi_skip_trailing_data( struct File_index * const fi, + const int fd, long long * const pos ) + { + enum { block_size = 16384, + buffer_size = block_size + Ft_size - 1 + Fh_size }; + uint8_t buffer[buffer_size]; + int bsize = *pos % block_size; /* total bytes in buffer */ + int search_size, rd_size; + unsigned long long ipos; + int i; + if( bsize <= buffer_size - block_size ) bsize += block_size; + search_size = bsize; /* bytes to search for trailer */ + rd_size = bsize; /* bytes to read from file */ + ipos = *pos - rd_size; /* aligned to block_size */ + if( *pos < min_member_size ) return false; + + while( true ) + { + const uint8_t max_msb = ( ipos + search_size ) >> 56; + if( seek_read( fd, buffer, rd_size, ipos ) != rd_size ) + { Fi_set_errno_error( fi, "Error seeking member trailer: " ); + return false; } + for( i = search_size; i >= Ft_size; --i ) + if( buffer[i-1] <= max_msb ) /* most significant byte of member_size */ + { + File_header header; + File_trailer * trailer = (File_trailer *)( buffer + i - Ft_size ); + const unsigned long long member_size = Ft_get_member_size( *trailer ); + unsigned dictionary_size; + if( member_size == 0 ) + { while( i > Ft_size && buffer[i-9] == 0 ) --i; continue; } + if( member_size < min_member_size || member_size > ipos + i ) + continue; + if( seek_read( fd, header, Fh_size, + ipos + i - member_size ) != Fh_size ) + { Fi_set_errno_error( fi, "Error reading member header: " ); + return false; } + dictionary_size = Fh_get_dictionary_size( header ); + if( !Fh_verify_magic( header ) || !Fh_verify_version( header ) || + !isvalid_ds( dictionary_size ) ) continue; + if( Fh_verify_prefix( buffer + i, bsize - i ) ) + { + add_error( fi, "Last member in input file is truncated or corrupt." ); + fi->retval = 2; return false; + } + *pos = ipos + i - member_size; + return push_back_member( fi, 0, Ft_get_data_size( *trailer ), *pos, + member_size, dictionary_size ); + } + if( ipos <= 0 ) + { Fi_set_num_error( fi, "Member size in trailer is corrupt at pos ", + *pos - 8 ); + return false; } + bsize = buffer_size; + search_size = bsize - Fh_size; + rd_size = block_size; + ipos -= rd_size; + memcpy( buffer + rd_size, buffer, buffer_size - rd_size ); + } + } + + +bool Fi_init( struct File_index * const fi, const int infd, + const bool ignore_trailing ) + { + File_header header; + long long pos; + long i; + fi->member_vector = 0; + fi->error = 0; + fi->isize = lseek( infd, 0, SEEK_END ); + fi->members = 0; + fi->error_size = 0; + fi->retval = 0; + if( fi->isize < 0 ) + { Fi_set_errno_error( fi, "Input file is not seekable: " ); return false; } + if( fi->isize < min_member_size ) + { add_error( fi, "Input file is too short." ); fi->retval = 2; + return false; } + if( fi->isize > INT64_MAX ) + { add_error( fi, "Input file is too long (2^63 bytes or more)." ); + fi->retval = 2; return false; } + + if( seek_read( infd, header, Fh_size, 0 ) != Fh_size ) + { Fi_set_errno_error( fi, "Error reading member header: " ); return false; } + if( !Fh_verify_magic( header ) ) + { add_error( fi, bad_magic_msg ); fi->retval = 2; return false; } + if( !Fh_verify_version( header ) ) + { add_error( fi, bad_version( Fh_version( header ) ) ); fi->retval = 2; + return false; } + if( !isvalid_ds( Fh_get_dictionary_size( header ) ) ) + { add_error( fi, bad_dict_msg ); fi->retval = 2; return false; } + + pos = fi->isize; /* always points to a header or to EOF */ + while( pos >= min_member_size ) + { + File_trailer trailer; + unsigned long long member_size; + unsigned dictionary_size; + if( seek_read( infd, trailer, Ft_size, pos - Ft_size ) != Ft_size ) + { Fi_set_errno_error( fi, "Error reading member trailer: " ); break; } + member_size = Ft_get_member_size( trailer ); + if( member_size < min_member_size || member_size > (unsigned long long)pos ) + { + if( fi->members > 0 ) + Fi_set_num_error( fi, "Member size in trailer is corrupt at pos ", + pos - 8 ); + else if( Fi_skip_trailing_data( fi, infd, &pos ) ) + { if( ignore_trailing ) continue; + add_error( fi, trailing_msg ); fi->retval = 2; return false; } + break; + } + if( seek_read( infd, header, Fh_size, pos - member_size ) != Fh_size ) + { Fi_set_errno_error( fi, "Error reading member header: " ); break; } + dictionary_size = Fh_get_dictionary_size( header ); + if( !Fh_verify_magic( header ) || !Fh_verify_version( header ) || + !isvalid_ds( dictionary_size ) ) + { + if( fi->members > 0 ) + Fi_set_num_error( fi, "Bad header at pos ", pos - member_size ); + else if( Fi_skip_trailing_data( fi, infd, &pos ) ) + { if( ignore_trailing ) continue; + add_error( fi, trailing_msg ); fi->retval = 2; return false; } + break; + } + pos -= member_size; + if( !push_back_member( fi, 0, Ft_get_data_size( trailer ), pos, + member_size, dictionary_size ) ) + return false; + } + if( pos != 0 || fi->members <= 0 ) + { + Fi_free_member_vector( fi ); + if( fi->retval == 0 ) + { add_error( fi, "Can't create file index." ); fi->retval = 2; } + return false; + } + Fi_reverse_member_vector( fi ); + for( i = 0; i < fi->members - 1; ++i ) + { + const long long end = block_end( fi->member_vector[i].dblock ); + if( end < 0 || end > INT64_MAX ) + { + Fi_free_member_vector( fi ); + add_error( fi, "Data in input file is too long (2^63 bytes or more)." ); + fi->retval = 2; return false; + } + fi->member_vector[i+1].dblock.pos = end; + } + return true; + } + + +void Fi_free( struct File_index * const fi ) + { + Fi_free_member_vector( fi ); + if( fi->error ) { free( fi->error ); fi->error = 0; } + fi->error_size = 0; + } diff --git a/file_index.h b/file_index.h new file mode 100644 index 0000000..38f0246 --- /dev/null +++ b/file_index.h @@ -0,0 +1,90 @@ +/* Lunzip - Decompressor for the lzip format + Copyright (C) 2010-2017 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef INT64_MAX +#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL +#endif + + +struct Block + { + long long pos, size; /* pos + size <= INT64_MAX */ + }; + +static inline void init_block( struct Block * const b, + const long long p, const long long s ) + { b->pos = p; b->size = s; } + +static inline long long block_end( const struct Block b ) + { return b.pos + b.size; } + + +struct Member + { + struct Block dblock, mblock; /* data block, member block */ + unsigned dictionary_size; + }; + +static inline void init_member( struct Member * const m, + const long long dp, const long long ds, + const long long mp, const long long ms, + const unsigned dict_size ) + { init_block( &m->dblock, dp, ds ); init_block( &m->mblock, mp, ms ); + m->dictionary_size = dict_size; } + +struct File_index + { + struct Member * member_vector; + char * error; + long long isize; + long members; + int error_size; + int retval; + }; + +bool Fi_init( struct File_index * const fi, const int infd, + const bool ignore_trailing ); + +void Fi_free( struct File_index * const fi ); + +static inline long long Fi_udata_size( const struct File_index * const fi ) + { + if( fi->members <= 0 ) return 0; + return block_end( fi->member_vector[fi->members-1].dblock ); + } + +static inline long long Fi_cdata_size( const struct File_index * const fi ) + { + if( fi->members <= 0 ) return 0; + return block_end( fi->member_vector[fi->members-1].mblock ); + } + + /* total size including trailing data (if any) */ +static inline long long Fi_file_size( const struct File_index * const fi ) + { if( fi->isize >= 0 ) return fi->isize; else return 0; } + +static inline const struct Block * Fi_dblock( const struct File_index * const fi, + const long i ) + { return &fi->member_vector[i].dblock; } + +static inline const struct Block * Fi_mblock( const struct File_index * const fi, + const long i ) + { return &fi->member_vector[i].mblock; } + +static inline unsigned Fi_dictionary_size( const struct File_index * const fi, + const long i ) + { return fi->member_vector[i].dictionary_size; } @@ -0,0 +1,123 @@ +/* Lunzip - Decompressor for the lzip format + Copyright (C) 2010-2017 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <stdbool.h> +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <unistd.h> +#include <sys/stat.h> + +#include "lzip.h" +#include "file_index.h" + + +static void list_line( const unsigned long long uncomp_size, + const unsigned long long comp_size, + const char * const input_filename ) + { + if( uncomp_size > 0 ) + printf( "%15llu %15llu %6.2f%% %s\n", uncomp_size, comp_size, + 100.0 * ( 1.0 - ( (double)comp_size / uncomp_size ) ), + input_filename ); + else + printf( "%15llu %15llu -INF%% %s\n", uncomp_size, comp_size, + input_filename ); + } + + +int list_files( const char * const filenames[], const int num_filenames, + const bool ignore_trailing ) + { + unsigned long long total_comp = 0, total_uncomp = 0; + int files = 0, retval = 0; + int i; + bool first_post = true; + bool stdin_used = false; + for( i = 0; i < num_filenames; ++i ) + { + const char * input_filename; + struct File_index file_index; + struct stat in_stats; /* not used */ + int infd; + const bool from_stdin = ( strcmp( filenames[i], "-" ) == 0 ); + if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; } + input_filename = from_stdin ? "(stdin)" : filenames[i]; + infd = from_stdin ? STDIN_FILENO : + open_instream( input_filename, &in_stats, true, true ); + if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } + + Fi_init( &file_index, infd, ignore_trailing ); + close( infd ); + if( file_index.retval != 0 ) + { + show_file_error( input_filename, file_index.error, 0 ); + if( retval < file_index.retval ) retval = file_index.retval; + Fi_free( &file_index ); continue; + } + if( verbosity >= 0 ) + { + const unsigned long long udata_size = Fi_udata_size( &file_index ); + const unsigned long long cdata_size = Fi_cdata_size( &file_index ); + total_comp += cdata_size; total_uncomp += udata_size; ++files; + if( first_post ) + { + first_post = false; + if( verbosity >= 1 ) fputs( " dict memb trail ", stdout ); + fputs( " uncompressed compressed saved name\n", stdout ); + } + if( verbosity >= 1 ) + { + long long trailing_size; + unsigned dictionary_size = 0; + long i; + for( i = 0; i < file_index.members; ++i ) + dictionary_size = + max( dictionary_size, Fi_dictionary_size( &file_index, i ) ); + trailing_size = Fi_file_size( &file_index ) - cdata_size; + printf( "%s %5ld %6lld ", format_ds( dictionary_size ), + file_index.members, trailing_size ); + } + list_line( udata_size, cdata_size, input_filename ); + + if( verbosity >= 2 && file_index.members > 1 ) + { + long i; + fputs( " member data_pos data_size member_pos member_size\n", stdout ); + for( i = 0; i < file_index.members; ++i ) + { + const struct Block * db = Fi_dblock( &file_index, i ); + const struct Block * mb = Fi_mblock( &file_index, i ); + printf( "%5ld %15llu %15llu %15llu %15llu\n", + i + 1, db->pos, db->size, mb->pos, mb->size ); + } + first_post = true; /* reprint heading after list of members */ + } + fflush( stdout ); + } + Fi_free( &file_index ); + } + if( verbosity >= 0 && files > 1 ) + { + if( verbosity >= 1 ) fputs( " ", stdout ); + list_line( total_uncomp, total_comp, "(totals)" ); + fflush( stdout ); + } + return retval; + } @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2016 Antonio Diaz Diaz. + Copyright (C) 2010-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -49,6 +49,7 @@ enum { min_dictionary_size = 1 << min_dictionary_bits, /* >= modeled_distances */ max_dictionary_bits = 29, max_dictionary_size = 1 << max_dictionary_bits, + min_member_size = 36, literal_context_bits = 3, literal_pos_state_bits = 0, /* not used */ pos_state_bits = 2, @@ -131,9 +132,9 @@ static inline void Pp_init( struct Pretty_print * const pp, pp->stdin_name = "(stdin)"; pp->longest_name = 0; pp->first_post = false; - stdin_name_len = strlen( pp->stdin_name ); if( verbosity <= 0 ) return; + stdin_name_len = strlen( pp->stdin_name ); for( i = 0; i < num_filenames; ++i ) { const char * const s = filenames[i]; @@ -179,11 +180,18 @@ static inline void CRC32_update_buf( uint32_t * const crc, const int size ) { int i; + uint32_t c = *crc; for( i = 0; i < size; ++i ) - *crc = crc32[(*crc^buffer[i])&0xFF] ^ ( *crc >> 8 ); + c = crc32[(c^buffer[i])&0xFF] ^ ( c >> 8 ); + *crc = c; } +static inline bool isvalid_ds( const unsigned dictionary_size ) + { return ( dictionary_size >= min_dictionary_size && + dictionary_size <= max_dictionary_size ); } + + static const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */ typedef uint8_t File_header[6]; /* 0-3 magic bytes */ @@ -246,7 +254,26 @@ static inline unsigned long long Ft_get_member_size( const File_trailer data ) } +static const char * const bad_magic_msg = "Bad magic number (file not in lzip format)."; +static const char * const bad_dict_msg = "Invalid dictionary size in member header."; +static const char * const trailing_msg = "Trailing data not allowed."; + +/* defined in decoder.c */ +int readblock( const int fd, uint8_t * const buf, const int size ); + +/* defined in list.c */ +int list_files( const char * const filenames[], const int num_filenames, + const bool ignore_trailing ); + /* defined in main.c */ extern int verbosity; +struct stat; +const char * bad_version( const unsigned version ); +const char * format_ds( const unsigned dictionary_size ); +int open_instream( const char * const name, struct stat * const in_statsp, + const bool no_ofile, const bool reg_only ); +void * resize_buffer( void * buf, const unsigned min_size ); void cleanup_and_fail( const int retval ); void show_error( const char * const msg, const int errcode, const bool help ); +void show_file_error( const char * const filename, const char * const msg, + const int errcode ); @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2016 Antonio Diaz Diaz. + Copyright (C) 2010-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -67,14 +67,16 @@ int verbosity = 0; const char * const Program_name = "Lunzip"; const char * const program_name = "lunzip"; -const char * const program_year = "2016"; +const char * const program_year = "2017"; const char * invocation_name = 0; -struct { const char * from; const char * to; } const known_extensions[] = { +const struct { const char * from; const char * to; } known_extensions[] = { { ".lz", "" }, { ".tlz", ".tar" }, { 0, 0 } }; +enum Mode { m_compress, m_decompress, m_list, m_test }; + char * output_filename = 0; int outfd = -1; bool delete_output_on_interrupt = false; @@ -105,6 +107,7 @@ static void show_help( void ) " -d, --decompress decompress (this is the default)\n" " -f, --force overwrite existing output files\n" " -k, --keep keep (don't delete) input files\n" + " -l, --list print (un)compressed file sizes\n" " -o, --output=<file> if reading standard input, write to <file>\n" " -q, --quiet suppress all messages\n" " -t, --test test compressed file integrity\n" @@ -114,6 +117,8 @@ static void show_help( void ) "from standard input to standard output.\n" "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" + "Buffer sizes 12 to 29 are interpreted as powers of two, meaning 2^12\n" + "to 2^29 bytes.\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" "invalid input file, 3 for an internal consistency error (eg, bug) which\n" @@ -133,23 +138,38 @@ static void show_version( void ) } +const char * bad_version( const unsigned version ) + { + static char buf[80]; + snprintf( buf, sizeof buf, "Version %u member format not supported.", + version ); + return buf; + } + + +const char * format_ds( const unsigned dictionary_size ) + { + enum { bufsize = 16, factor = 1024 }; + static char buf[bufsize]; + const char * const prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + const char * p = ""; + const char * np = " "; + unsigned num = dictionary_size, i; + bool exact = ( num % factor == 0 ); + + for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; + p = prefix[i]; np = ""; } + snprintf( buf, bufsize, "%s%4u %sB", np, num, p ); + return buf; + } + + static void show_header( const unsigned dictionary_size ) { if( verbosity >= 3 ) - { - const char * const prefix[8] = - { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; - enum { factor = 1024 }; - const char * p = ""; - const char * np = " "; - unsigned num = dictionary_size, i; - bool exact = ( num % factor == 0 ); - - for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) - { num /= factor; if( num % factor != 0 ) exact = false; - p = prefix[i]; np = ""; } - fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p ); - } + fprintf( stderr, "dictionary %s. ", format_ds( dictionary_size ) ); } @@ -169,7 +189,7 @@ static unsigned long getnum( const char * const ptr, if( !errno && tail[0] ) { - const int factor = ( tail[1] == 'i' ) ? 1024 : 1000; + const unsigned factor = ( tail[1] == 'i' ) ? 1024 : 1000; int exponent = 0; /* 0 = bad multiplier */ int i; switch( tail[0] ) @@ -208,7 +228,7 @@ static unsigned long getnum( const char * const ptr, static int get_dict_size( const char * const arg ) { char * tail; - const int bits = strtol( arg, &tail, 0 ); + const long bits = strtol( arg, &tail, 0 ); if( bits >= min_dictionary_bits && bits <= max_dictionary_bits && *tail == 0 ) return ( 1 << bits ); @@ -216,37 +236,44 @@ static int get_dict_size( const char * const arg ) } +void set_mode( enum Mode * const program_modep, const enum Mode new_mode ) + { + if( *program_modep != m_compress && *program_modep != new_mode ) + { + show_error( "Only one operation can be specified.", 0, true ); + exit( 1 ); + } + *program_modep = new_mode; + } + + static int extension_index( const char * const name ) { - int i; - for( i = 0; known_extensions[i].from; ++i ) + int eindex; + for( eindex = 0; known_extensions[eindex].from; ++eindex ) { - const char * const ext = known_extensions[i].from; + const char * const ext = known_extensions[eindex].from; const unsigned name_len = strlen( name ); const unsigned ext_len = strlen( ext ); if( name_len > ext_len && strncmp( name + name_len - ext_len, ext, ext_len ) == 0 ) - return i; + return eindex; } return -1; } -static int open_instream( const char * const name, struct stat * const in_statsp, - const bool no_ofile ) +int open_instream( const char * const name, struct stat * const in_statsp, + const bool no_ofile, const bool reg_only ) { int infd = open( name, O_RDONLY | O_BINARY ); if( infd < 0 ) - { - if( verbosity >= 0 ) - fprintf( stderr, "%s: Can't open input file '%s': %s\n", - program_name, name, strerror( errno ) ); - } + show_file_error( name, "Can't open input file", errno ); else { const int i = fstat( infd, in_statsp ); const mode_t mode = in_statsp->st_mode; - const bool can_read = ( i == 0 && + const bool can_read = ( i == 0 && !reg_only && ( S_ISBLK( mode ) || S_ISCHR( mode ) || S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || !no_ofile ) ) ) @@ -265,7 +292,7 @@ static int open_instream( const char * const name, struct stat * const in_statsp /* assure at least a minimum size for buffer 'buf' */ -static void * resize_buffer( void * buf, const int min_size ) +void * resize_buffer( void * buf, const unsigned min_size ) { if( buf ) buf = realloc( buf, min_size ); else buf = malloc( min_size ); @@ -278,19 +305,19 @@ static void * resize_buffer( void * buf, const int min_size ) } -static void set_d_outname( const char * const name, const int i ) +static void set_d_outname( const char * const name, const int eindex ) { const unsigned name_len = strlen( name ); - if( i >= 0 ) + if( eindex >= 0 ) { - const char * const from = known_extensions[i].from; + const char * const from = known_extensions[eindex].from; const unsigned from_len = strlen( from ); if( name_len > from_len ) { output_filename = resize_buffer( output_filename, name_len + - strlen( known_extensions[0].to ) + 1 ); + strlen( known_extensions[eindex].to ) + 1 ); strcpy( output_filename, name ); - strcpy( output_filename + name_len - from_len, known_extensions[i].to ); + strcpy( output_filename + name_len - from_len, known_extensions[eindex].to ); return; } } @@ -376,10 +403,10 @@ static void close_and_set_permissions( const struct stat * const in_statsp ) } -static unsigned char xdigit( const int value ) +static unsigned char xdigit( const unsigned value ) { - if( value >= 0 && value <= 9 ) return '0' + value; - if( value >= 10 && value <= 15 ) return 'A' + value - 10; + if( value <= 9 ) return '0' + value; + if( value <= 15 ) return 'A' + value - 10; return 0; } @@ -392,35 +419,28 @@ static bool show_trailing_data( const uint8_t * const data, const int size, { int i; char buf[80]; - int len = snprintf( buf, sizeof buf, "%strailing data = ", - all ? "" : "first bytes of " ); - bool text = true; - for( i = 0; i < size; ++i ) - if( !isprint( data[i] ) ) { text = false; break; } - if( text ) + unsigned len = max( 0, snprintf( buf, sizeof buf, "%strailing data = ", + all ? "" : "first bytes of " ) ); + for( i = 0; i < size && len + 2 < sizeof buf; ++i ) { - if( len > 0 && len < (int)sizeof buf ) - snprintf( buf + len, sizeof buf - len, "'%.*s'", size, (const char *)data ); - } - else - { - for( i = 0; i < size && len > 0 && len + 3 < (int)sizeof buf; ++i ) - { - if( i > 0 ) buf[len++] = ' '; - buf[len++] = xdigit( data[i] >> 4 ); - buf[len++] = xdigit( data[i] & 0x0F ); - buf[len] = 0; - } + buf[len++] = xdigit( data[i] >> 4 ); + buf[len++] = xdigit( data[i] & 0x0F ); + buf[len++] = ' '; } + if( len < sizeof buf ) buf[len++] = '\''; + for( i = 0; i < size && len < sizeof buf; ++i ) + { if( isprint( data[i] ) ) buf[len++] = data[i]; else buf[len++] = '.'; } + if( len < sizeof buf ) buf[len++] = '\''; + if( len < sizeof buf ) buf[len] = 0; else buf[sizeof buf - 1] = 0; Pp_show_msg( pp, buf ); - if( !ignore_trailing ) show_error( "Trailing data not allowed.", 0, false ); + if( !ignore_trailing ) show_file_error( pp->name, trailing_msg, 0 ); } return ignore_trailing; } static int decompress( const int infd, struct Pretty_print * const pp, - const int buffer_size, + const unsigned buffer_size, const bool ignore_trailing, const bool testing ) { unsigned long long partial_file_pos = 0; @@ -454,25 +474,19 @@ static int decompress( const int infd, struct Pretty_print * const pp, if( !Fh_verify_magic( header ) ) { if( first_member ) - { Pp_show_msg( pp, "Bad magic number (file not in lzip format)." ); - retval = 2; } + { show_file_error( pp->name, bad_magic_msg, 0 ); retval = 2; } else if( !show_trailing_data( header, size, pp, false, ignore_trailing ) ) retval = 2; break; } if( !Fh_verify_version( header ) ) { - if( verbosity >= 0 ) - { Pp_show_msg( pp, 0 ); - fprintf( stderr, "Version %d member format not supported.\n", - Fh_version( header ) ); } + Pp_show_msg( pp, bad_version( Fh_version( header ) ) ); retval = 2; break; } dictionary_size = Fh_get_dictionary_size( header ); - if( dictionary_size < min_dictionary_size || - dictionary_size > max_dictionary_size ) - { Pp_show_msg( pp, "Invalid dictionary size in member header." ); - retval = 2; break; } + if( !isvalid_ds( dictionary_size ) ) + { Pp_show_msg( pp, bad_dict_msg ); retval = 2; break; } if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) { Pp_show_msg( pp, 0 ); show_header( dictionary_size ); } @@ -536,6 +550,16 @@ void show_error( const char * const msg, const int errcode, const bool help ) } +void show_file_error( const char * const filename, const char * const msg, + const int errcode ) + { + if( verbosity < 0 ) return; + fprintf( stderr, "%s: %s: %s", program_name, filename, msg ); + if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) ); + fputc( '\n', stderr ); + } + + void internal_error( const char * const msg ) { if( verbosity >= 0 ) @@ -546,12 +570,12 @@ void internal_error( const char * const msg ) int main( const int argc, const char * const argv[] ) { - const char * input_filename = ""; const char * default_output_filename = ""; const char ** filenames = 0; int num_filenames = 0; - int buffer_size = max_dictionary_size; + unsigned buffer_size = max_dictionary_size; int infd = -1; + enum Mode program_mode = m_compress; int argind = 0; int retval = 0; int i; @@ -560,7 +584,6 @@ int main( const int argc, const char * const argv[] ) bool ignore_trailing = true; bool keep_input_files = false; bool stdin_used = false; - bool testing = false; bool to_stdout = false; struct Pretty_print pp; @@ -572,6 +595,7 @@ int main( const int argc, const char * const argv[] ) { 'f', "force", ap_no }, { 'h', "help", ap_no }, { 'k', "keep", ap_no }, + { 'l', "list", ap_no }, { 'n', "threads", ap_yes }, { 'o', "output", ap_yes }, { 'q', "quiet", ap_no }, @@ -600,14 +624,15 @@ int main( const int argc, const char * const argv[] ) { case 'a': ignore_trailing = false; break; case 'c': to_stdout = true; break; - case 'd': testing = false; break; + case 'd': set_mode( &program_mode, m_decompress ); break; case 'f': force = true; break; case 'h': show_help(); return 0; case 'k': keep_input_files = true; break; + case 'l': set_mode( &program_mode, m_list ); break; case 'n': break; case 'o': default_output_filename = arg; break; case 'q': verbosity = -1; break; - case 't': testing = true; break; + case 't': set_mode( &program_mode, m_test ); break; case 'u': buffer_size = get_dict_size( arg ); break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; @@ -620,9 +645,6 @@ int main( const int argc, const char * const argv[] ) setmode( STDOUT_FILENO, O_BINARY ); #endif - if( testing ) - outfd = -1; - num_filenames = max( 1, ap_arguments( &parser ) - argind ); filenames = resize_buffer( filenames, num_filenames * sizeof filenames[0] ); filenames[0] = "-"; @@ -633,10 +655,18 @@ int main( const int argc, const char * const argv[] ) if( strcmp( filenames[i], "-" ) != 0 ) filenames_given = true; } + if( program_mode == m_list ) + return list_files( filenames, num_filenames, ignore_trailing ); + + if( program_mode == m_test ) + outfd = -1; + else if( program_mode == m_compress ) + program_mode = m_decompress; /* default mode */ + if( buffer_size < max_dictionary_size ) { bool from_stdin = false; - if( to_stdout || testing ) + if( to_stdout || program_mode == m_test ) { show_error( "'--buffer-size' is incompatible with '--stdout' and '--test'.", 0, false ); return 1; } for( i = 0; i < num_filenames; ++i ) @@ -647,7 +677,7 @@ int main( const int argc, const char * const argv[] ) " with a reduced buffer size.", 0, false ); return 1; } } - if( !to_stdout && !testing && + if( !to_stdout && program_mode != m_test && ( filenames_given || default_output_filename[0] ) ) set_signals(); @@ -656,6 +686,7 @@ int main( const int argc, const char * const argv[] ) output_filename = resize_buffer( output_filename, 1 ); for( i = 0; i < num_filenames; ++i ) { + const char * input_filename = ""; int tmp; struct stat in_stats; const struct stat * in_statsp; @@ -664,16 +695,15 @@ int main( const int argc, const char * const argv[] ) if( !filenames[i][0] || strcmp( filenames[i], "-" ) == 0 ) { if( stdin_used ) continue; else stdin_used = true; - input_filename = ""; infd = STDIN_FILENO; - if( !testing ) + if( program_mode != m_test ) { if( to_stdout || !default_output_filename[0] ) outfd = STDOUT_FILENO; else { output_filename = resize_buffer( output_filename, - strlen( default_output_filename ) + 1 ); + strlen( default_output_filename ) + 1 ); strcpy( output_filename, default_output_filename ); if( !open_outstream( force, true ) ) { @@ -687,9 +717,10 @@ int main( const int argc, const char * const argv[] ) else { input_filename = filenames[i]; - infd = open_instream( input_filename, &in_stats, to_stdout || testing ); + infd = open_instream( input_filename, &in_stats, + to_stdout || program_mode == m_test, false ); if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } - if( !testing ) + if( program_mode != m_test ) { if( to_stdout ) outfd = STDOUT_FILENO; else @@ -705,10 +736,13 @@ int main( const int argc, const char * const argv[] ) } } + Pp_set_name( &pp, input_filename ); if( isatty( infd ) ) { - show_error( "I won't read compressed data from a terminal.", 0, true ); + show_file_error( pp.name, + "I won't read compressed data from a terminal.", 0 ); if( retval < 1 ) retval = 1; + if( program_mode == m_test ) { close( infd ); infd = -1; continue; } cleanup_and_fail( retval ); } @@ -727,17 +761,16 @@ int main( const int argc, const char * const argv[] ) } in_statsp = input_filename[0] ? &in_stats : 0; - Pp_set_name( &pp, input_filename ); - tmp = decompress( infd, &pp, buffer_size, ignore_trailing, testing ); + tmp = decompress( infd, &pp, buffer_size, ignore_trailing, program_mode == m_test ); if( tmp > retval ) retval = tmp; - if( tmp && !testing ) cleanup_and_fail( retval ); + if( tmp && program_mode != m_test ) cleanup_and_fail( retval ); if( delete_output_on_interrupt ) close_and_set_permissions( in_statsp ); if( input_filename[0] ) { close( infd ); infd = -1; - if( !keep_input_files && !to_stdout && !testing ) + if( !keep_input_files && !to_stdout && program_mode != m_test ) remove( input_filename ); } } diff --git a/testsuite/check.sh b/testsuite/check.sh index f5b2088..334cb71 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lunzip - Decompressor for the lzip format -# Copyright (C) 2010-2016 Antonio Diaz Diaz. +# Copyright (C) 2010-2017 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -17,12 +17,12 @@ if [ ! -f "${LZIP}" ] || [ ! -x "${LZIP}" ] ; then exit 1 fi -if [ -e "${LZIP}" ] 2> /dev/null ; then true -else +[ -e "${LZIP}" ] 2> /dev/null || + { echo "$0: a POSIX shell is required to run the tests" echo "Try bash -c \"$0 $1 $2\"" exit 1 -fi + } if [ -d tmp ] ; then rm -rf tmp ; fi mkdir tmp @@ -31,109 +31,176 @@ cd "${objdir}"/tmp || framework_failure cat "${testdir}"/test.txt > in || framework_failure in_lz="${testdir}"/test.txt.lz fail=0 +test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } printf "testing lunzip-%s..." "$2" -cat "${testdir}"/test.txt.lz > uin.lz || framework_failure -"${LZIP}" -dfkqu-1 uin.lz -if [ $? = 1 ] && [ ! -e uin ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -dfkqu0 uin.lz -if [ $? = 1 ] && [ ! -e uin ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -dfkqu4095 uin.lz -if [ $? = 1 ] && [ ! -e uin ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -dfkqu513MiB uin.lz -if [ $? = 1 ] && [ ! -e uin ] ; then printf . ; else printf - ; fail=1 ; fi +cat "${in_lz}" > uin.lz || framework_failure +for i in bad_size -1 0 4095 513MiB 1G 1T 1P 1E 1Z 1Y 10KB ; do + "${LZIP}" -dfkqu $i uin.lz + { [ $? = 1 ] && [ ! -e uin ] ; } || test_failed $LINENO $i +done rm -f uin.lz +"${LZIP}" -lq in +[ $? = 2 ] || test_failed $LINENO "${LZIP}" -tq in -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO "${LZIP}" -tq < in -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO "${LZIP}" -cdq in -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO "${LZIP}" -cdq < in -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi -dd if="${in_lz}" bs=1 count=6 2> /dev/null | "${LZIP}" -tq -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi -dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" -tq -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO +# these are for code coverage +"${LZIP}" -lt "${in_lz}" 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" -cdl "${in_lz}" > out 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" -cdt "${in_lz}" > out 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" -t -- nx_file 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" --help > /dev/null || test_failed $LINENO +"${LZIP}" -n1 -V > /dev/null || test_failed $LINENO +"${LZIP}" -m 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" -z 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" --bad_option 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" --t 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" --test=2 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" --output= 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" --output 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +printf "LZIP\001-.............................." | "${LZIP}" -t 2> /dev/null +printf "LZIP\002-.............................." | "${LZIP}" -t 2> /dev/null +printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null printf "\ntesting decompression..." -"${LZIP}" -t "${in_lz}" -if [ $? = 0 ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -cd "${in_lz}" > copy || fail=1 -cmp in copy || fail=1 -printf . +"${LZIP}" -lq "${in_lz}" || test_failed $LINENO +"${LZIP}" -t "${in_lz}" || test_failed $LINENO +"${LZIP}" -cd "${in_lz}" > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO rm -f copy cat "${in_lz}" > copy.lz || framework_failure -"${LZIP}" -dk copy.lz || fail=1 -cmp in copy || fail=1 +"${LZIP}" -dk copy.lz || test_failed $LINENO +cmp in copy || test_failed $LINENO printf "to be overwritten" > copy || framework_failure -"${LZIP}" -dq copy.lz -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -d copy.lz 2> /dev/null +[ $? = 1 ] || test_failed $LINENO "${LZIP}" -df copy.lz -if [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; then - printf . ; else printf - ; fail=1 ; fi +{ [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; } || test_failed $LINENO printf "to be overwritten" > copy || framework_failure -"${LZIP}" -df -o copy < "${in_lz}" || fail=1 -cmp in copy || fail=1 -printf . +"${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO +cmp in copy || test_failed $LINENO rm -f copy cat "${in_lz}" > anyothername || framework_failure -"${LZIP}" -d -o copy - anyothername - < "${in_lz}" -if [ $? = 0 ] && cmp in copy && cmp in anyothername.out ; then - printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -dv --output copy - anyothername - < "${in_lz}" 2> /dev/null +{ [ $? = 0 ] && cmp in copy && cmp in anyothername.out ; } || + test_failed $LINENO rm -f copy anyothername.out +"${LZIP}" -lq in "${in_lz}" +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -lq nx_file.lz "${in_lz}" +[ $? = 1 ] || test_failed $LINENO "${LZIP}" -tq in "${in_lz}" -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -tq foo.lz "${in_lz}" -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -tq nx_file.lz "${in_lz}" +[ $? = 1 ] || test_failed $LINENO "${LZIP}" -cdq in "${in_lz}" > copy -if [ $? = 2 ] && cat copy in | cmp in - ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -cdq foo.lz "${in_lz}" > copy -if [ $? = 1 ] && cmp in copy ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 2 ] && cat copy in | cmp in - ; } || test_failed $LINENO +"${LZIP}" -cdq nx_file.lz "${in_lz}" > copy +{ [ $? = 1 ] && cmp in copy ; } || test_failed $LINENO rm -f copy cat "${in_lz}" > copy.lz || framework_failure +for i in 1 2 3 4 5 6 7 ; do + printf "g" >> copy.lz || framework_failure + "${LZIP}" -alvv copy.lz "${in_lz}" > /dev/null 2>&1 + [ $? = 2 ] || test_failed $LINENO $i + "${LZIP}" -atvvvv copy.lz "${in_lz}" 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i +done "${LZIP}" -dq in copy.lz -if [ $? = 2 ] && [ -e copy.lz ] && [ ! -e copy ] && [ ! -e in.out ] ; then - printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -dq foo.lz copy.lz -if [ $? = 1 ] && [ ! -e copy.lz ] && [ ! -e foo ] && cmp in copy ; then - printf . ; else printf - ; fail=1 ; fi +{ [ $? = 2 ] && [ -e copy.lz ] && [ ! -e copy ] && [ ! -e in.out ] ; } || + test_failed $LINENO +"${LZIP}" -dq nx_file.lz copy.lz +{ [ $? = 1 ] && [ ! -e copy.lz ] && [ ! -e nx_file ] && cmp in copy ; } || + test_failed $LINENO cat in in > in2 || framework_failure -cat "${in_lz}" "${in_lz}" > copy2.lz || framework_failure -"${LZIP}" -t copy2.lz || fail=1 -"${LZIP}" -cd copy2.lz > copy2 || fail=1 -cmp in2 copy2 || fail=1 -printf . - -printf "garbage" >> copy2.lz || framework_failure +cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure +"${LZIP}" -lq in2.lz || test_failed $LINENO +"${LZIP}" -t in2.lz || test_failed $LINENO +"${LZIP}" -cd in2.lz > copy2 || test_failed $LINENO +cmp in2 copy2 || test_failed $LINENO + +cat in2.lz > copy2.lz || framework_failure +printf "\ngarbage" >> copy2.lz || framework_failure +"${LZIP}" -tvvvv copy2.lz 2> /dev/null || test_failed $LINENO rm -f copy2 +"${LZIP}" -alq copy2.lz +[ $? = 2 ] || test_failed $LINENO "${LZIP}" -atq copy2.lz -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO "${LZIP}" -atq < copy2.lz -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO "${LZIP}" -adkq copy2.lz -if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 2 ] && [ ! -e copy2 ] ; } || test_failed $LINENO "${LZIP}" -adkq -o copy2 < copy2.lz -if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 2 ] && [ ! -e copy2 ] ; } || test_failed $LINENO printf "to be overwritten" > copy2 || framework_failure -"${LZIP}" -df copy2.lz || fail=1 -cmp in2 copy2 || fail=1 -printf . +"${LZIP}" -df copy2.lz || test_failed $LINENO +cmp in2 copy2 || test_failed $LINENO -for i in 12 4096 4Ki 29 512KiB ; do +for i in 12 5120 6Ki 29 512KiB ; do printf "to be overwritten" > copy || framework_failure - "${LZIP}" -df -u$i -o copy < "${in_lz}" || fail=1 - cmp in copy || fail=1 - printf . + "${LZIP}" -df -u$i -o copy < "${in_lz}" || test_failed $LINENO $i + cmp in copy || test_failed $LINENO $i done +printf "\ntesting bad input..." + +cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure +if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && + [ -e trunc.lz ] && cmp in2.lz trunc.lz > /dev/null 2>&1 ; then + for i in 6 20 14734 14753 14754 14755 14756 14757 14758 ; do + dd if=in3.lz of=trunc.lz bs=$i count=1 2> /dev/null + "${LZIP}" -lq trunc.lz + [ $? = 2 ] || test_failed $LINENO $i + "${LZIP}" -t trunc.lz 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + "${LZIP}" -tq < trunc.lz + [ $? = 2 ] || test_failed $LINENO $i + "${LZIP}" -cdq trunc.lz > out + [ $? = 2 ] || test_failed $LINENO $i + "${LZIP}" -dq < trunc.lz > out + [ $? = 2 ] || test_failed $LINENO $i + done +else + printf "\nwarning: skipping truncation test: 'dd' does not work on your system." +fi + +cat "${in_lz}" > ingin.lz || framework_failure +printf "g" >> ingin.lz || framework_failure +cat "${in_lz}" >> ingin.lz || framework_failure +"${LZIP}" -lq ingin.lz +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -t ingin.lz || test_failed $LINENO +"${LZIP}" -cd ingin.lz > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +"${LZIP}" -t < ingin.lz || test_failed $LINENO +"${LZIP}" -d < ingin.lz > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO + echo if [ ${fail} = 0 ] ; then echo "tests completed successfully." diff --git a/testsuite/test.txt.lz b/testsuite/test.txt.lz Binary files differindex 41d2e39..22cea6e 100644 --- a/testsuite/test.txt.lz +++ b/testsuite/test.txt.lz |