From 4723024d2474cf9686b4c8e88b738c6b04c44d3a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 21 Feb 2022 17:09:59 +0100 Subject: Adding upstream version 1.13. Signed-off-by: Daniel Baumann --- ChangeLog | 10 +++- INSTALL | 4 +- Makefile.in | 2 +- NEWS | 30 ++--------- README | 4 +- carg_parser.c | 110 +++++++++++++++++++++++++------------- carg_parser.h | 14 +++-- configure | 6 +-- decoder.c | 43 ++++++--------- decoder.h | 119 ++++++++++++++++++++++++++++------------- doc/lunzip.1 | 6 +-- list.c | 63 +++++++++++----------- lzip.h | 9 ++-- lzip_index.c | 43 ++++++--------- lzip_index.h | 2 +- main.c | 153 +++++++++++++++++++++++++++++++++-------------------- testsuite/check.sh | 23 ++++---- 17 files changed, 367 insertions(+), 274 deletions(-) diff --git a/ChangeLog b/ChangeLog index 08e3c0a..07d8d6e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2022-01-22 Antonio Diaz Diaz + + * Version 1.13 released. + * Decompression time has been reduced by 5-12% depending on the file. + * main.c (getnum): Show option name and valid range if error. + 2021-01-01 Antonio Diaz Diaz * Version 1.12 released. @@ -18,7 +24,7 @@ * lzip_index.c: Detect some kinds of corrupt trailers. * main.c (main): Check return value of close( infd ). * main.c: Compile on DOS with DJGPP. - * configure: Accept appending to CFLAGS, 'CFLAGS+=OPTIONS'. + * configure: Accept appending to CFLAGS; 'CFLAGS+=OPTIONS'. * INSTALL: Document use of CFLAGS+='-D __USE_MINGW_ANSI_STDIO'. 2018-02-05 Antonio Diaz Diaz @@ -115,7 +121,7 @@ * Created from the decompression code of clzip 1.1. -Copyright (C) 2010-2021 Antonio Diaz Diaz. +Copyright (C) 2010-2022 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute, and diff --git a/INSTALL b/INSTALL index 313b7dc..4282b1e 100644 --- a/INSTALL +++ b/INSTALL @@ -1,7 +1,7 @@ Requirements ------------ You will need a C99 compiler. (gcc 3.3.6 or newer is recommended). -I use gcc 6.1.0 and 4.1.2, but the code should compile with any standards +I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -69,7 +69,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2010-2021 Antonio Diaz Diaz. +Copyright (C) 2010-2022 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/Makefile.in b/Makefile.in index aff94d4..ffc4ce8 100644 --- a/Makefile.in +++ b/Makefile.in @@ -20,7 +20,7 @@ objs = carg_parser.o lzip_index.o list.o decoder.o main.o all : $(progname) $(progname) : $(objs) - $(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(objs) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs) main.o : main.c $(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< diff --git a/NEWS b/NEWS index 98e59f3..fee658c 100644 --- a/NEWS +++ b/NEWS @@ -1,28 +1,6 @@ -Changes in version 1.12: +Changes in version 1.13: -Lunzip now reports an error if a file name is empty (lunzip -t ""). +Decompression time has been reduced by 5-12% depending on the file. -Option '-o, --output' now behaves like '-c, --stdout', but sending the -output unconditionally to a file instead of to standard output. See the new -description of '-o' in the manual. This change is backwards compatible only -when decompressing from standard input alone. Therefore commands like: - lunzip -d -o foo - bar.lz < foo.lz -must now be split into: - lunzip -d -o foo - < foo.lz - lunzip -d bar.lz -or rewritten as: - lunzip -d - bar.lz < foo.lz > foo - -Lunzip now does not even open the output file if the input file is a terminal. - -The words 'decompressed' and 'compressed' have been replaced with the -shorter 'out' and 'in' in the verbose output when decompressing or testing. - -Option '--list' now reports corruption or truncation of the last header in a -multimenber file specifically instead of showing the generic message "Last -member in input file is truncated or corrupt." - -The commands needed to extract files from a tar.lz archive have been -documented in the output of '--help' and in the man page. - -9 new test files have been added to the testsuite. +In case of error in a numerical argument to a command line option, lunzip +now shows the name of the option and the range of valid values. diff --git a/README b/README index b09c908..39cd00d 100644 --- a/README +++ b/README @@ -60,7 +60,7 @@ filename.lz becomes filename filename.tlz becomes filename.tar anyothername becomes anyothername.out -Decompressing a file is much like copying or moving it; therefore lunzip +Decompressing a file is much like copying or moving it. Therefore lunzip preserves the access and modification dates, permissions, and, when possible, ownership of the file just as 'cp -p' does. (If the user ID or the group ID can't be duplicated, the file permission bits S_ISUID and @@ -89,7 +89,7 @@ been compressed. Decompressed is used to refer to data which have undergone the process of decompression. -Copyright (C) 2010-2021 Antonio Diaz Diaz. +Copyright (C) 2010-2022 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/carg_parser.c b/carg_parser.c index d0c05d5..181ba23 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2021 Antonio Diaz Diaz. + Copyright (C) 2006-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -32,10 +32,10 @@ static void * ap_resize_buffer( void * buf, const int min_size ) } -static char push_back_record( struct Arg_parser * const ap, - const int code, const char * const argument ) +static char push_back_record( struct Arg_parser * const ap, const int code, + const char * const long_name, + const char * const argument ) { - const int len = strlen( argument ); struct ap_Record * p; void * tmp = ap_resize_buffer( ap->data, ( ap->data_size + 1 ) * sizeof (struct ap_Record) ); @@ -43,11 +43,29 @@ static char push_back_record( struct Arg_parser * const ap, ap->data = (struct ap_Record *)tmp; p = &(ap->data[ap->data_size]); p->code = code; - p->argument = 0; - tmp = ap_resize_buffer( p->argument, len + 1 ); - if( !tmp ) return 0; - p->argument = (char *)tmp; - strncpy( p->argument, argument, len + 1 ); + if( long_name ) + { + const int len = strlen( long_name ); + p->parsed_name = (char *)malloc( len + 2 + 1 ); + if( !p->parsed_name ) return 0; + p->parsed_name[0] = p->parsed_name[1] = '-'; + strncpy( p->parsed_name + 2, long_name, len + 1 ); + } + else if( code > 0 && code < 256 ) + { + p->parsed_name = (char *)malloc( 2 + 1 ); + if( !p->parsed_name ) return 0; + p->parsed_name[0] = '-'; p->parsed_name[1] = code; p->parsed_name[2] = 0; + } + else p->parsed_name = 0; + if( argument ) + { + const int len = strlen( argument ); + p->argument = (char *)malloc( len + 1 ); + if( !p->argument ) { free( p->parsed_name ); return 0; } + strncpy( p->argument, argument, len + 1 ); + } + else p->argument = 0; ++ap->data_size; return 1; } @@ -68,12 +86,14 @@ static char add_error( struct Arg_parser * const ap, const char * const msg ) static void free_data( struct Arg_parser * const ap ) { int i; - for( i = 0; i < ap->data_size; ++i ) free( ap->data[i].argument ); + for( i = 0; i < ap->data_size; ++i ) + { free( ap->data[i].argument ); free( ap->data[i].parsed_name ); } if( ap->data ) { free( ap->data ); ap->data = 0; } ap->data_size = 0; } +/* Return 0 only if out of memory. */ static char parse_long_option( struct Arg_parser * const ap, const char * const opt, const char * const arg, const struct ap_Option options[], @@ -87,9 +107,10 @@ static char parse_long_option( struct Arg_parser * const ap, /* Test all long options for either exact match or abbreviated matches. */ for( i = 0; options[i].code != 0; ++i ) - if( options[i].name && strncmp( options[i].name, &opt[2], len ) == 0 ) + if( options[i].long_name && + strncmp( options[i].long_name, &opt[2], len ) == 0 ) { - if( strlen( options[i].name ) == len ) /* Exact match found */ + if( strlen( options[i].long_name ) == len ) /* Exact match found */ { index = i; exact = 1; break; } else if( index < 0 ) index = i; /* First nonexact match found */ else if( options[index].code != options[i].code || @@ -117,35 +138,39 @@ static char parse_long_option( struct Arg_parser * const ap, { if( options[index].has_arg == ap_no ) { - add_error( ap, "option '--" ); add_error( ap, options[index].name ); + add_error( ap, "option '--" ); add_error( ap, options[index].long_name ); add_error( ap, "' doesn't allow an argument" ); return 1; } if( options[index].has_arg == ap_yes && !opt[len+3] ) { - add_error( ap, "option '--" ); add_error( ap, options[index].name ); + add_error( ap, "option '--" ); add_error( ap, options[index].long_name ); add_error( ap, "' requires an argument" ); return 1; } - return push_back_record( ap, options[index].code, &opt[len+3] ); + return push_back_record( ap, options[index].code, + options[index].long_name, &opt[len+3] ); } if( options[index].has_arg == ap_yes ) { if( !arg || !arg[0] ) { - add_error( ap, "option '--" ); add_error( ap, options[index].name ); + add_error( ap, "option '--" ); add_error( ap, options[index].long_name ); add_error( ap, "' requires an argument" ); return 1; } ++*argindp; - return push_back_record( ap, options[index].code, arg ); + return push_back_record( ap, options[index].code, + options[index].long_name, arg ); } - return push_back_record( ap, options[index].code, "" ); + return push_back_record( ap, options[index].code, + options[index].long_name, 0 ); } +/* Return 0 only if out of memory. */ static char parse_short_option( struct Arg_parser * const ap, const char * const opt, const char * const arg, const struct ap_Option options[], @@ -156,13 +181,13 @@ static char parse_short_option( struct Arg_parser * const ap, while( cind > 0 ) { int index = -1, i; - const unsigned char code = opt[cind]; + const unsigned char c = opt[cind]; char code_str[2]; - code_str[0] = code; code_str[1] = 0; + code_str[0] = c; code_str[1] = 0; - if( code != 0 ) + if( c != 0 ) for( i = 0; options[i].code; ++i ) - if( code == options[i].code ) + if( c == options[i].code ) { index = i; break; } if( index < 0 ) @@ -176,7 +201,7 @@ static char parse_short_option( struct Arg_parser * const ap, if( options[index].has_arg != ap_no && cind > 0 && opt[cind] ) { - if( !push_back_record( ap, code, &opt[cind] ) ) return 0; + if( !push_back_record( ap, c, 0, &opt[cind] ) ) return 0; ++*argindp; cind = 0; } else if( options[index].has_arg == ap_yes ) @@ -188,9 +213,9 @@ static char parse_short_option( struct Arg_parser * const ap, return 1; } ++*argindp; cind = 0; - if( !push_back_record( ap, code, arg ) ) return 0; + if( !push_back_record( ap, c, 0, arg ) ) return 0; } - else if( !push_back_record( ap, code, "" ) ) return 0; + else if( !push_back_record( ap, c, 0, 0 ) ) return 0; } return 1; } @@ -203,7 +228,7 @@ char ap_init( struct Arg_parser * const ap, const char ** non_options = 0; /* skipped non-options */ int non_options_size = 0; /* number of skipped non-options */ int argind = 1; /* index in argv */ - int i; + char done = 0; /* false until success */ ap->data = 0; ap->error = 0; @@ -223,20 +248,20 @@ char ap_init( struct Arg_parser * const ap, if( ch2 == '-' ) { if( !argv[argind][2] ) { ++argind; break; } /* we found "--" */ - else if( !parse_long_option( ap, opt, arg, options, &argind ) ) return 0; + else if( !parse_long_option( ap, opt, arg, options, &argind ) ) goto out; } - else if( !parse_short_option( ap, opt, arg, options, &argind ) ) return 0; + else if( !parse_short_option( ap, opt, arg, options, &argind ) ) goto out; if( ap->error ) break; } else { if( in_order ) - { if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; } + { if( !push_back_record( ap, 0, 0, argv[argind++] ) ) goto out; } else { void * tmp = ap_resize_buffer( non_options, ( non_options_size + 1 ) * sizeof *non_options ); - if( !tmp ) return 0; + if( !tmp ) goto out; non_options = (const char **)tmp; non_options[non_options_size++] = argv[argind++]; } @@ -245,13 +270,15 @@ char ap_init( struct Arg_parser * const ap, if( ap->error ) free_data( ap ); else { + int i; for( i = 0; i < non_options_size; ++i ) - if( !push_back_record( ap, 0, non_options[i] ) ) return 0; + if( !push_back_record( ap, 0, 0, non_options[i] ) ) goto out; while( argind < argc ) - if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; + if( !push_back_record( ap, 0, 0, argv[argind++] ) ) goto out; } - if( non_options ) free( non_options ); - return 1; + done = 1; +out: if( non_options ) free( non_options ); + return done; } @@ -273,13 +300,20 @@ int ap_arguments( const struct Arg_parser * const ap ) int ap_code( const struct Arg_parser * const ap, const int i ) { - if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].code; - else return 0; + if( i < 0 || i >= ap_arguments( ap ) ) return 0; + return ap->data[i].code; + } + + +const char * ap_parsed_name( const struct Arg_parser * const ap, const int i ) + { + if( i < 0 || i >= ap_arguments( ap ) || !ap->data[i].parsed_name ) return ""; + return ap->data[i].parsed_name; } const char * ap_argument( const struct Arg_parser * const ap, const int i ) { - if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].argument; - else return ""; + if( i < 0 || i >= ap_arguments( ap ) || !ap->data[i].argument ) return ""; + return ap->data[i].argument; } diff --git a/carg_parser.h b/carg_parser.h index c5f2352..0c64861 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2021 Antonio Diaz Diaz. + Copyright (C) 2006-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -24,9 +24,9 @@ message. 'options' is an array of 'struct ap_Option' terminated by an element - containing a code which is zero. A null name means a short-only - option. A code value outside the unsigned char range means a - long-only option. + containing a code which is zero. A null long_name means a short-only + option. A code value outside the unsigned char range means a long-only + option. Arg_parser normally makes it appear as if all the option arguments were specified before all the non-option arguments for the purposes @@ -50,7 +50,7 @@ enum ap_Has_arg { ap_no, ap_yes, ap_maybe }; struct ap_Option { int code; /* Short option letter or code ( code != 0 ) */ - const char * name; /* Long option name (maybe null) */ + const char * long_name; /* Long option name (maybe null) */ enum ap_Has_arg has_arg; }; @@ -58,6 +58,7 @@ struct ap_Option struct ap_Record { int code; + char * parsed_name; char * argument; }; @@ -86,6 +87,9 @@ int ap_arguments( const struct Arg_parser * const ap ); Else ap_argument( i ) is the option's argument (or empty). */ int ap_code( const struct Arg_parser * const ap, const int i ); +/* Full name of the option parsed (short or long). */ +const char * ap_parsed_name( const struct Arg_parser * const ap, const int i ); + const char * ap_argument( const struct Arg_parser * const ap, const int i ); #ifdef __cplusplus diff --git a/configure b/configure index 54fb2fb..e241235 100755 --- a/configure +++ b/configure @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lunzip - Decompressor for the lzip format -# Copyright (C) 2010-2021 Antonio Diaz Diaz. +# Copyright (C) 2010-2022 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute, and modify it. pkgname=lunzip -pkgversion=1.12 +pkgversion=1.13 progname=lunzip srctrigger=doc/${progname}.1 @@ -167,7 +167,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lunzip - Decompressor for the lzip format -# Copyright (C) 2010-2021 Antonio Diaz Diaz. +# Copyright (C) 2010-2022 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission diff --git a/decoder.c b/decoder.c index 63d30ea..b52b35f 100644 --- a/decoder.c +++ b/decoder.c @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,8 +32,8 @@ CRC32 crc32; -/* Returns the number of bytes really read. - If (returned value < size) and (errno == 0), means EOF was reached. +/* Return the number of bytes really read. + If (value returned < size) and (errno == 0), means EOF was reached. */ int readblock( const int fd, uint8_t * const buf, const int size ) { @@ -51,8 +51,8 @@ int readblock( const int fd, uint8_t * const buf, const int size ) } -/* Returns the number of bytes really written. - If (returned value < size), it is always an error. +/* Return the number of bytes really written. + If (value returned < size), it is always an error. */ static int writeblock( const int fd, const uint8_t * const buf, const int size ) { @@ -118,8 +118,6 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d, int size = Rd_read_data( d->rdec, trailer, Lt_size ); const unsigned long long data_size = LZd_data_position( d ); const unsigned long long member_size = Rd_member_position( d->rdec ); - unsigned td_crc; - unsigned long long td_size, tm_size; bool error = false; if( size < Lt_size ) @@ -134,7 +132,7 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d, while( size < Lt_size ) trailer[size++] = 0; } - td_crc = Lt_get_data_crc( trailer ); + const unsigned td_crc = Lt_get_data_crc( trailer ); if( td_crc != LZd_crc( d ) ) { error = true; @@ -145,7 +143,7 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d, td_crc, LZd_crc( d ) ); } } - td_size = Lt_get_data_size( trailer ); + const unsigned long long td_size = Lt_get_data_size( trailer ); if( td_size != data_size ) { error = true; @@ -156,7 +154,7 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d, td_size, td_size, data_size, data_size ); } } - tm_size = Lt_get_member_size( trailer ); + const unsigned long long tm_size = Lt_get_member_size( trailer ); if( tm_size != member_size ) { error = true; @@ -192,10 +190,6 @@ int LZd_decode_member( struct LZ_decoder * const d, struct Pretty_print * const pp ) { struct Range_decoder * const rdec = d->rdec; - void (* const copy_block) - ( struct LZ_decoder * const d, const unsigned distance, unsigned len ) = - ( d->buffer_size >= d->dictionary_size ) ? - &LZd_copy_block : &LZd_copy_block2; Bit_model bm_literal[1<buffer_size >= d->dictionary_size; Bm_array_init( bm_literal[0], (1 << literal_context_bits) * 0x300 ); Bm_array_init( bm_match[0], states * pos_states ); @@ -230,25 +225,19 @@ int LZd_decode_member( struct LZ_decoder * const d, Rd_load( rdec ); while( !Rd_finished( rdec ) ) { - int len; const int pos_state = LZd_data_position( d ) & pos_state_mask; if( Rd_decode_bit( rdec, &bm_match[state][pos_state] ) == 0 ) /* 1st bit */ { /* literal byte */ Bit_model * const bm = bm_literal[get_lit_state(LZd_peek_prev( d ))]; - if( St_is_char( state ) ) - { - state -= ( state < 4 ) ? state : 3; + if( ( state = St_set_char( state ) ) < 4 ) LZd_put_byte( d, Rd_decode_tree8( rdec, bm ) ); - } else - { - state -= ( state < 10 ) ? 3 : 6; LZd_put_byte( d, Rd_decode_matched( rdec, bm, LZd_peek( d, rep0 ) ) ); - } continue; } /* match or repeated match */ + int len; if( Rd_decode_bit( rdec, &bm_rep[state] ) != 0 ) /* 2nd bit */ { if( Rd_decode_bit( rdec, &bm_rep0[state] ) == 0 ) /* 3rd bit */ @@ -274,13 +263,12 @@ int LZd_decode_member( struct LZ_decoder * const d, rep0 = distance; } state = St_set_rep( state ); - len = min_match_len + Rd_decode_len( rdec, &rep_len_model, pos_state ); + len = Rd_decode_len( rdec, &rep_len_model, pos_state ); } else /* match */ { - unsigned distance; - len = min_match_len + Rd_decode_len( rdec, &match_len_model, pos_state ); - distance = Rd_decode_tree6( rdec, bm_dis_slot[get_len_state(len)] ); + len = Rd_decode_len( rdec, &match_len_model, pos_state ); + unsigned distance = Rd_decode_tree6( rdec, bm_dis_slot[get_len_state(len)] ); if( distance >= start_dis_model ) { const unsigned dis_slot = distance; @@ -321,7 +309,8 @@ int LZd_decode_member( struct LZ_decoder * const d, ( !d->pos_wrapped && rep0 >= LZd_data_position( d ) ) ) { LZd_flush_data( d ); return 1; } } - copy_block( d, rep0, len ); + if( full_buffer || rep0 < d->buffer_size ) LZd_copy_block( d, rep0, len ); + else LZd_copy_block2( d, rep0, len ); } LZd_flush_data( d ); return 2; diff --git a/decoder.h b/decoder.h index 0fe0110..0afdd83 100644 --- a/decoder.h +++ b/decoder.h @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -101,12 +101,11 @@ static inline unsigned Rd_decode( struct Range_decoder * const rdec, int i; for( i = num_bits; i > 0; --i ) { - bool bit; Rd_normalize( rdec ); rdec->range >>= 1; /* symbol <<= 1; */ /* if( rdec->code >= rdec->range ) { rdec->code -= rdec->range; symbol |= 1; } */ - bit = ( rdec->code >= rdec->range ); + const bool bit = ( rdec->code >= rdec->range ); symbol <<= 1; symbol += bit; rdec->code -= rdec->range & ( 0U - bit ); } @@ -116,42 +115,75 @@ static inline unsigned Rd_decode( struct Range_decoder * const rdec, static inline unsigned Rd_decode_bit( struct Range_decoder * const rdec, Bit_model * const probability ) { - uint32_t bound; Rd_normalize( rdec ); - bound = ( rdec->range >> bit_model_total_bits ) * *probability; + const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability; if( rdec->code < bound ) { rdec->range = bound; - *probability += (bit_model_total - *probability) >> bit_model_move_bits; + *probability += ( bit_model_total - *probability ) >> bit_model_move_bits; return 0; } else { - rdec->range -= bound; rdec->code -= bound; + rdec->range -= bound; *probability -= *probability >> bit_model_move_bits; return 1; } } -static inline unsigned Rd_decode_tree3( struct Range_decoder * const rdec, - Bit_model bm[] ) +static inline void Rd_decode_symbol_bit( struct Range_decoder * const rdec, + Bit_model * const probability, unsigned * symbol ) { - unsigned symbol = 2 | Rd_decode_bit( rdec, &bm[1] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - return symbol & 7; + Rd_normalize( rdec ); + *symbol <<= 1; + const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability; + if( rdec->code < bound ) + { + rdec->range = bound; + *probability += ( bit_model_total - *probability ) >> bit_model_move_bits; + } + else + { + rdec->code -= bound; + rdec->range -= bound; + *probability -= *probability >> bit_model_move_bits; + *symbol |= 1; + } + } + +static inline void Rd_decode_symbol_bit_reversed( struct Range_decoder * const rdec, + Bit_model * const probability, unsigned * model, + unsigned * symbol, const int i ) + { + Rd_normalize( rdec ); + *model <<= 1; + const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability; + if( rdec->code < bound ) + { + rdec->range = bound; + *probability += ( bit_model_total - *probability ) >> bit_model_move_bits; + } + else + { + rdec->code -= bound; + rdec->range -= bound; + *probability -= *probability >> bit_model_move_bits; + *model |= 1; + *symbol |= 1 << i; + } } static inline unsigned Rd_decode_tree6( struct Range_decoder * const rdec, Bit_model bm[] ) { - unsigned symbol = 2 | Rd_decode_bit( rdec, &bm[1] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + unsigned symbol = 1; + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); return symbol & 0x3F; } @@ -159,9 +191,14 @@ static inline unsigned Rd_decode_tree8( struct Range_decoder * const rdec, Bit_model bm[] ) { unsigned symbol = 1; - int i; - for( i = 0; i < 8; ++i ) - symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); return symbol & 0xFF; } @@ -173,21 +210,19 @@ Rd_decode_tree_reversed( struct Range_decoder * const rdec, unsigned symbol = 0; int i; for( i = 0; i < num_bits; ++i ) - { - const unsigned bit = Rd_decode_bit( rdec, &bm[model] ); - model <<= 1; model += bit; - symbol |= ( bit << i ); - } + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, i ); return symbol; } static inline unsigned Rd_decode_tree_reversed4( struct Range_decoder * const rdec, Bit_model bm[] ) { - unsigned symbol = Rd_decode_bit( rdec, &bm[1] ); - symbol += Rd_decode_bit( rdec, &bm[2+symbol] ) << 1; - symbol += Rd_decode_bit( rdec, &bm[4+symbol] ) << 2; - symbol += Rd_decode_bit( rdec, &bm[8+symbol] ) << 3; + unsigned model = 1; + unsigned symbol = 0; + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 0 ); + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 1 ); + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 2 ); + Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 3 ); return symbol; } @@ -210,11 +245,24 @@ static inline unsigned Rd_decode_len( struct Range_decoder * const rdec, struct Len_model * const lm, const int pos_state ) { + Bit_model * bm; + unsigned mask, offset, symbol = 1; + if( Rd_decode_bit( rdec, &lm->choice1 ) == 0 ) - return Rd_decode_tree3( rdec, lm->bm_low[pos_state] ); + { bm = lm->bm_low[pos_state]; mask = 7; offset = 0; goto len3; } if( Rd_decode_bit( rdec, &lm->choice2 ) == 0 ) - return len_low_symbols + Rd_decode_tree3( rdec, lm->bm_mid[pos_state] ); - return len_low_symbols + len_mid_symbols + Rd_decode_tree8( rdec, lm->bm_high ); + { bm = lm->bm_mid[pos_state]; mask = 7; offset = len_low_symbols; goto len3; } + bm = lm->bm_high; mask = 0xFF; offset = len_low_symbols + len_mid_symbols; + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); +len3: + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol ); + return ( symbol & mask ) + min_match_len + offset; } @@ -291,11 +339,10 @@ static inline void LZd_copy_block( struct LZ_decoder * const d, } } +/* block is (at least partially) outside of the buffer */ static inline void LZd_copy_block2( struct LZ_decoder * const d, const unsigned distance, unsigned len ) { - if( d->buffer_size > distance ) /* block is in buffer */ - { LZd_copy_block( d, distance, len ); return; } if( len < d->buffer_size - d->pos ) /* no wrap */ { const unsigned offset = distance + 1 + d->stream_pos - d->pos; diff --git a/doc/lunzip.1 b/doc/lunzip.1 index f96b431..9aa0300 100644 --- a/doc/lunzip.1 +++ b/doc/lunzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH LUNZIP "1" "January 2021" "lunzip 1.12" "User Commands" +.TH LUNZIP "1" "January 2022" "lunzip 1.13" "User Commands" .SH NAME lunzip \- decompressor for the lzip format .SH SYNOPSIS @@ -77,7 +77,7 @@ To extract all the files from archive 'foo.tar.lz', use the commands .PP Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or -invalid input file, 3 for an internal consistency error (eg, bug) which +invalid input file, 3 for an internal consistency error (e.g., bug) which caused lunzip to panic. .PP The ideas embodied in lunzip are due to (at least) the following people: @@ -90,7 +90,7 @@ Report bugs to lzip\-bug@nongnu.org .br Lunzip home page: http://www.nongnu.org/lzip/lunzip.html .SH COPYRIGHT -Copyright \(co 2021 Antonio Diaz Diaz. +Copyright \(co 2022 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later .br This is free software: you are free to change and redistribute it. diff --git a/list.c b/list.c index 8ab8c44..33de75c 100644 --- a/list.c +++ b/list.c @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -52,17 +52,15 @@ int list_files( const char * const filenames[], const int num_filenames, bool stdin_used = false; for( i = 0; i < num_filenames; ++i ) { - const char * input_filename; - struct Lzip_index lzip_index; - struct stat in_stats; /* not used */ - int infd; const bool from_stdin = ( strcmp( filenames[i], "-" ) == 0 ); if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; } - input_filename = from_stdin ? "(stdin)" : filenames[i]; - infd = from_stdin ? STDIN_FILENO : + const char * const input_filename = from_stdin ? "(stdin)" : filenames[i]; + struct stat in_stats; /* not used */ + const int infd = from_stdin ? STDIN_FILENO : open_instream( input_filename, &in_stats, false, true ); if( infd < 0 ) { set_retval( &retval, 1 ); continue; } + struct Lzip_index lzip_index; Li_init( &lzip_index, infd, ignore_trailing, loose_trailing ); close( infd ); if( lzip_index.retval != 0 ) @@ -71,37 +69,36 @@ int list_files( const char * const filenames[], const int num_filenames, set_retval( &retval, lzip_index.retval ); Li_free( &lzip_index ); continue; } - if( verbosity >= 0 ) + if( verbosity < 0 ) { Li_free( &lzip_index ); continue; } + const unsigned long long udata_size = Li_udata_size( &lzip_index ); + const unsigned long long cdata_size = Li_cdata_size( &lzip_index ); + total_comp += cdata_size; total_uncomp += udata_size; ++files; + const long members = lzip_index.members; + if( first_post ) { - const unsigned long long udata_size = Li_udata_size( &lzip_index ); - const unsigned long long cdata_size = Li_cdata_size( &lzip_index ); - total_comp += cdata_size; total_uncomp += udata_size; ++files; - if( first_post ) - { - first_post = false; - if( verbosity >= 1 ) fputs( " dict memb trail ", stdout ); - fputs( " uncompressed compressed saved name\n", stdout ); - } - if( verbosity >= 1 ) - printf( "%s %5ld %6lld ", format_ds( lzip_index.dictionary_size ), - lzip_index.members, Li_file_size( &lzip_index ) - cdata_size ); - list_line( udata_size, cdata_size, input_filename ); + first_post = false; + if( verbosity >= 1 ) fputs( " dict memb trail ", stdout ); + fputs( " uncompressed compressed saved name\n", stdout ); + } + if( verbosity >= 1 ) + printf( "%s %5ld %6lld ", format_ds( lzip_index.dictionary_size ), + members, Li_file_size( &lzip_index ) - cdata_size ); + list_line( udata_size, cdata_size, input_filename ); - if( verbosity >= 2 && lzip_index.members > 1 ) + if( verbosity >= 2 && members > 1 ) + { + long i; + fputs( " member data_pos data_size member_pos member_size\n", stdout ); + for( i = 0; i < members; ++i ) { - long i; - fputs( " member data_pos data_size member_pos member_size\n", stdout ); - for( i = 0; i < lzip_index.members; ++i ) - { - const struct Block * db = Li_dblock( &lzip_index, i ); - const struct Block * mb = Li_mblock( &lzip_index, i ); - printf( "%6ld %14llu %14llu %14llu %14llu\n", - i + 1, db->pos, db->size, mb->pos, mb->size ); - } - first_post = true; /* reprint heading after list of members */ + const struct Block * db = Li_dblock( &lzip_index, i ); + const struct Block * mb = Li_mblock( &lzip_index, i ); + printf( "%6ld %14llu %14llu %14llu %14llu\n", + i + 1, db->pos, db->size, mb->pos, mb->size ); } - fflush( stdout ); + first_post = true; /* reprint heading after list of members */ } + fflush( stdout ); Li_free( &lzip_index ); } if( verbosity >= 0 && files > 1 ) diff --git a/lzip.h b/lzip.h index 3961c33..4b77be8 100644 --- a/lzip.h +++ b/lzip.h @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -131,6 +131,7 @@ static inline void CRC32_init( void ) } } +/* about as fast as it is possible without messing with endianness */ static inline void CRC32_update_buf( uint32_t * const crc, const uint8_t * const buffer, const int size ) @@ -228,12 +229,12 @@ static inline bool Lt_verify_consistency( const Lzip_trailer data ) { const unsigned crc = Lt_get_data_crc( data ); const unsigned long long dsize = Lt_get_data_size( data ); - const unsigned long long msize = Lt_get_member_size( data ); - const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size; - const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1; if( ( crc == 0 ) != ( dsize == 0 ) ) return false; + const unsigned long long msize = Lt_get_member_size( data ); if( msize < min_member_size ) return false; + const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size; if( mlimit > dsize && msize > mlimit ) return false; + const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1; if( dlimit > msize && dsize > dlimit ) return false; return true; } diff --git a/lzip_index.c b/lzip_index.c index ca4df8d..559fd7a 100644 --- a/lzip_index.c +++ b/lzip_index.c @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -133,44 +133,38 @@ static bool Li_skip_trailing_data( struct Lzip_index * const li, const int fd, const bool ignore_trailing, const bool loose_trailing ) { + if( *pos < min_member_size ) return false; enum { block_size = 16384, buffer_size = block_size + Lt_size - 1 + Lh_size }; uint8_t buffer[buffer_size]; int bsize = *pos % block_size; /* total bytes in buffer */ - int search_size, rd_size; - unsigned long long ipos; - int i; - if( *pos < min_member_size ) return false; if( bsize <= buffer_size - block_size ) bsize += block_size; - search_size = bsize; /* bytes to search for trailer */ - rd_size = bsize; /* bytes to read from file */ - ipos = *pos - rd_size; /* aligned to block_size */ + int search_size = bsize; /* bytes to search for trailer */ + int rd_size = bsize; /* bytes to read from file */ + unsigned long long ipos = *pos - rd_size; /* aligned to block_size */ while( true ) { - const uint8_t max_msb = ( ipos + search_size ) >> 56; if( seek_read( fd, buffer, rd_size, ipos ) != rd_size ) - { Li_set_errno_error( li, "Error seeking member trailer: " ); - return false; } + { Li_set_errno_error( li, "Error seeking member trailer: " ); return false; } + const uint8_t max_msb = ( ipos + search_size ) >> 56; + int i; for( i = search_size; i >= Lt_size; --i ) if( buffer[i-1] <= max_msb ) /* most significant byte of member_size */ { - Lzip_header header; - const Lzip_header * header2; const Lzip_trailer * const trailer = (const Lzip_trailer *)( buffer + i - Lt_size ); const unsigned long long member_size = Lt_get_member_size( *trailer ); - unsigned dictionary_size; - bool full_h2; if( member_size == 0 ) /* skip trailing zeros */ { while( i > Lt_size && buffer[i-9] == 0 ) --i; continue; } if( member_size > ipos + i || !Lt_verify_consistency( *trailer ) ) continue; + Lzip_header header; if( !Li_read_header( li, fd, header, ipos + i - member_size ) ) return false; if( !Lh_verify( header ) ) continue; - header2 = (const Lzip_header *)( buffer + i ); - full_h2 = bsize - i >= Lh_size; + const Lzip_header * header2 = (const Lzip_header *)( buffer + i ); + const bool full_h2 = bsize - i >= Lh_size; if( Lh_verify_prefix( *header2, bsize - i ) ) /* last member */ { if( !full_h2 ) add_error( li, "Last member in input file is truncated." ); @@ -183,7 +177,7 @@ static bool Li_skip_trailing_data( struct Lzip_index * const li, const int fd, if( !ignore_trailing ) { add_error( li, trailing_msg ); li->retval = 2; return false; } *pos = ipos + i - member_size; - dictionary_size = Lh_get_dictionary_size( header ); + const unsigned dictionary_size = Lh_get_dictionary_size( header ); if( li->dictionary_size < dictionary_size ) li->dictionary_size = dictionary_size; return push_back_member( li, 0, Lt_get_data_size( *trailer ), *pos, @@ -204,9 +198,6 @@ static bool Li_skip_trailing_data( struct Lzip_index * const li, const int fd, bool Li_init( struct Lzip_index * const li, const int infd, const bool ignore_trailing, const bool loose_trailing ) { - Lzip_header header; - unsigned long long pos; - long i; li->member_vector = 0; li->error = 0; li->insize = lseek( infd, 0, SEEK_END ); @@ -223,18 +214,17 @@ bool Li_init( struct Lzip_index * const li, const int infd, { add_error( li, "Input file is too long (2^63 bytes or more)." ); li->retval = 2; return false; } + Lzip_header header; if( !Li_read_header( li, infd, header, 0 ) ) return false; if( Li_check_header_error( li, header ) ) return false; - pos = li->insize; /* always points to a header or to EOF */ + unsigned long long pos = li->insize; /* always points to a header or to EOF */ while( pos >= min_member_size ) { Lzip_trailer trailer; - unsigned long long member_size; - unsigned dictionary_size; if( seek_read( infd, trailer, Lt_size, pos - Lt_size ) != Lt_size ) { Li_set_errno_error( li, "Error reading member trailer: " ); break; } - member_size = Lt_get_member_size( trailer ); + const unsigned long long member_size = Lt_get_member_size( trailer ); if( member_size > pos || !Lt_verify_consistency( trailer ) ) { /* bad trailer */ if( li->members <= 0 ) @@ -253,7 +243,7 @@ bool Li_init( struct Lzip_index * const li, const int infd, break; } pos -= member_size; - dictionary_size = Lh_get_dictionary_size( header ); + const unsigned dictionary_size = Lh_get_dictionary_size( header ); if( li->dictionary_size < dictionary_size ) li->dictionary_size = dictionary_size; if( !push_back_member( li, 0, Lt_get_data_size( trailer ), pos, @@ -268,6 +258,7 @@ bool Li_init( struct Lzip_index * const li, const int infd, return false; } Li_reverse_member_vector( li ); + long i; for( i = 0; ; ++i ) { const long long end = block_end( li->member_vector[i].dblock ); diff --git a/lzip_index.h b/lzip_index.h index 4e9cd44..0938533 100644 --- a/lzip_index.h +++ b/lzip_index.h @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/main.c b/main.c index b5e458e..73e29b4 100644 --- a/main.c +++ b/main.c @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid input file, 3 for an internal consistency error - (eg, bug) which caused lunzip to panic. + (e.g., bug) which caused lunzip to panic. */ #define _FILE_OFFSET_BITS 64 @@ -36,9 +36,9 @@ #include #include #include -#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ #include -#if defined(__MSVCRT__) +#if defined __MSVCRT__ #define fchmod(x,y) 0 #define fchown(x,y,z) 0 #define SIGHUP SIGTERM @@ -50,7 +50,7 @@ #define S_IWOTH 0 #endif #endif -#if defined(__DJGPP__) +#if defined __DJGPP__ #define S_ISSOCK(x) 0 #define S_ISVTX 0 #endif @@ -68,10 +68,15 @@ #error "Environments where CHAR_BIT != 8 are not supported." #endif +#if ( defined SIZE_MAX && SIZE_MAX < UINT_MAX ) || \ + ( defined SSIZE_MAX && SSIZE_MAX < INT_MAX ) +#error "Environments where 'size_t' is narrower than 'int' are not supported." +#endif + int verbosity = 0; static const char * const program_name = "lunzip"; -static const char * const program_year = "2021"; +static const char * const program_year = "2022"; static const char * invocation_name = "lunzip"; /* default value */ static const struct { const char * from; const char * to; } known_extensions[] = { @@ -130,7 +135,7 @@ static void show_help( void ) "'tar -xf foo.tar.lz' or 'lunzip -cd foo.tar.lz | tar -xf -'.\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" - "invalid input file, 3 for an internal consistency error (eg, bug) which\n" + "invalid input file, 3 for an internal consistency error (e.g., bug) which\n" "caused lunzip to panic.\n" "\nThe ideas embodied in lunzip are due to (at least) the following people:\n" "Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the\n" @@ -174,8 +179,6 @@ struct Pretty_print static void Pp_init( struct Pretty_print * const pp, const char * const filenames[], const int num_filenames ) { - unsigned stdin_name_len; - int i; pp->name = 0; pp->padded_name = 0; pp->stdin_name = "(stdin)"; @@ -183,7 +186,8 @@ static void Pp_init( struct Pretty_print * const pp, pp->first_post = false; if( verbosity <= 0 ) return; - stdin_name_len = strlen( pp->stdin_name ); + const unsigned stdin_name_len = strlen( pp->stdin_name ); + int i; for( i = 0; i < num_filenames; ++i ) { const char * const s = filenames[i]; @@ -217,16 +221,14 @@ static void Pp_reset( struct Pretty_print * const pp ) void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ) { - if( verbosity >= 0 ) + if( verbosity < 0 ) return; + if( pp->first_post ) { - if( pp->first_post ) - { - pp->first_post = false; - fputs( pp->padded_name, stderr ); - if( !msg ) fflush( stderr ); - } - if( msg ) fprintf( stderr, "%s\n", msg ); + pp->first_post = false; + fputs( pp->padded_name, stderr ); + if( !msg ) fflush( stderr ); } + if( msg ) fprintf( stderr, "%s\n", msg ); } @@ -264,17 +266,53 @@ void show_header( const unsigned dictionary_size ) } -static unsigned long getnum( const char * const ptr, +/* separate large numbers >= 100_000 in groups of 3 digits using '_' */ +static const char * format_num3( unsigned long long num ) + { + const char * const si_prefix = "kMGTPEZY"; + const char * const binary_prefix = "KMGTPEZY"; + enum { buffers = 8, bufsize = 4 * sizeof (long long) }; + static char buffer[buffers][bufsize]; /* circle of static buffers for printf */ + static int current = 0; + int i; + char * const buf = buffer[current++]; current %= buffers; + char * p = buf + bufsize - 1; /* fill the buffer backwards */ + *p = 0; /* terminator */ + if( num > 1024 ) + { + char prefix = 0; /* try binary first, then si */ + for( i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i ) + { num /= 1024; prefix = binary_prefix[i]; } + if( prefix ) *(--p) = 'i'; + else + for( i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i ) + { num /= 1000; prefix = si_prefix[i]; } + if( prefix ) *(--p) = prefix; + } + const bool split = num >= 100000; + + for( i = 0; ; ) + { + *(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break; + if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; } + } + return p; + } + + +static unsigned long getnum( const char * const arg, + const char * const option_name, const unsigned long llimit, const unsigned long ulimit ) { - unsigned long result; char * tail; errno = 0; - result = strtoul( ptr, &tail, 0 ); - if( tail == ptr ) + unsigned long long result = strtoul( arg, &tail, 0 ); + if( tail == arg ) { - show_error( "Bad or missing numerical argument.", 0, true ); + if( verbosity >= 0 ) + fprintf( stderr, "%s: Bad or missing numerical argument in " + "option '%s'.\n", program_name, option_name ); exit( 1 ); } @@ -297,7 +335,9 @@ static unsigned long getnum( const char * const ptr, } if( exponent <= 0 ) { - show_error( "Bad multiplier in numerical argument.", 0, true ); + if( verbosity >= 0 ) + fprintf( stderr, "%s: Bad multiplier in numerical argument of " + "option '%s'.\n", program_name, option_name ); exit( 1 ); } for( i = 0; i < exponent; ++i ) @@ -309,21 +349,24 @@ static unsigned long getnum( const char * const ptr, if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; if( errno ) { - show_error( "Numerical argument out of limits.", 0, false ); + if( verbosity >= 0 ) + fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] " + "in option '%s'.\n", program_name, format_num3( llimit ), + format_num3( ulimit ), option_name ); exit( 1 ); } return result; } -static int get_dict_size( const char * const arg ) +static int get_dict_size( const char * const arg, const char * const option_name ) { char * tail; const long bits = strtol( arg, &tail, 0 ); if( bits >= min_dictionary_bits && bits <= max_dictionary_bits && *tail == 0 ) return 1 << bits; - return getnum( arg, min_dictionary_size, max_dictionary_size ); + return getnum( arg, option_name, min_dictionary_size, max_dictionary_size ); } @@ -468,7 +511,7 @@ static bool check_tty_in( const char * const input_filename, const int infd, if( isatty( infd ) ) /* for example /dev/tty */ { show_file_error( input_filename, "I won't read compressed data from a terminal.", 0 ); - close( infd ); set_retval( retval, 1 ); + close( infd ); set_retval( retval, 2 ); if( program_mode != m_test ) cleanup_and_fail( *retval ); return false; } return true; @@ -559,12 +602,9 @@ static int decompress( const unsigned long long cfile_size, const int infd, for( first_member = true; ; first_member = false ) { - int result, size; - unsigned dictionary_size; Lzip_header header; - struct LZ_decoder decoder; Rd_reset_member_position( &rdec ); - size = Rd_read_data( &rdec, header, Lh_size ); + const int size = Rd_read_data( &rdec, header, Lh_size ); if( Rd_finished( &rdec ) ) /* End Of File */ { if( first_member ) @@ -594,20 +634,21 @@ static int decompress( const unsigned long long cfile_size, const int infd, if( !Lh_verify_version( header ) ) { Pp_show_msg( pp, bad_version( Lh_version( header ) ) ); retval = 2; break; } - dictionary_size = Lh_get_dictionary_size( header ); + const unsigned dictionary_size = Lh_get_dictionary_size( header ); if( !isvalid_ds( dictionary_size ) ) { Pp_show_msg( pp, bad_dict_msg ); retval = 2; break; } if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) Pp_show_msg( pp, 0 ); + struct LZ_decoder decoder; if( !LZd_init( &decoder, &rdec, buffer_size, dictionary_size, outfd ) ) { Pp_show_msg( pp, "Not enough memory. Try a smaller output buffer size." ); retval = 1; break; } show_dprogress( cfile_size, partial_file_pos, &rdec, pp ); /* init */ - result = LZd_decode_member( &decoder, pp ); + const int result = LZd_decode_member( &decoder, pp ); partial_file_pos += Rd_member_position( &rdec ); LZd_free( &decoder ); if( result != 0 ) @@ -696,23 +737,15 @@ void show_dprogress( const unsigned long long cfile_size, int main( const int argc, const char * const argv[] ) { const char * default_output_filename = ""; - static struct Arg_parser parser; /* static because valgrind complains */ - static struct Pretty_print pp; /* and memory management in C sucks */ - static const char ** filenames = 0; - int num_filenames = 0; unsigned buffer_size = max_dictionary_size; enum Mode program_mode = m_compress; - int argind = 0; - int failed_tests = 0; - int retval = 0; int i; - bool filenames_given = false; bool force = false; bool ignore_trailing = true; bool keep_input_files = false; bool loose_trailing = false; - bool stdin_used = false; bool to_stdout = false; + if( argc > 0 ) invocation_name = argv[0]; enum { opt_lt = 256 }; const struct ap_Option options[] = @@ -734,19 +767,22 @@ int main( const int argc, const char * const argv[] ) { opt_lt, "loose-trailing", ap_no }, { 0 , 0, ap_no } }; - if( argc > 0 ) invocation_name = argv[0]; CRC32_init(); + /* static because valgrind complains and memory management in C sucks */ + static struct Arg_parser parser; if( !ap_init( &parser, argc, argv, options, 0 ) ) { show_error( mem_msg, 0, false ); return 1; } if( ap_error( &parser ) ) /* bad option */ { show_error( ap_error( &parser ), 0, true ); return 1; } + int argind = 0; for( ; argind < ap_arguments( &parser ); ++argind ) { const int code = ap_code( &parser, argind ); - const char * const arg = ap_argument( &parser, argind ); if( !code ) break; /* no more options */ + const char * const pn = ap_parsed_name( &parser, argind ); + const char * const arg = ap_argument( &parser, argind ); switch( code ) { case 'a': ignore_trailing = false; break; @@ -761,7 +797,7 @@ int main( const int argc, const char * const argv[] ) else { default_output_filename = arg; } break; case 'q': verbosity = -1; break; case 't': set_mode( &program_mode, m_test ); break; - case 'u': buffer_size = get_dict_size( arg ); break; + case 'u': buffer_size = get_dict_size( arg, pn ); break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; case opt_lt: loose_trailing = true; break; @@ -769,15 +805,17 @@ int main( const int argc, const char * const argv[] ) } } /* end process options */ -#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); #endif - num_filenames = max( 1, ap_arguments( &parser ) - argind ); + static const char ** filenames = 0; + int num_filenames = max( 1, ap_arguments( &parser ) - argind ); filenames = resize_buffer( filenames, num_filenames * sizeof filenames[0] ); filenames[0] = "-"; + bool filenames_given = false; for( i = 0; argind + i < ap_arguments( &parser ); ++i ) { filenames[i] = ap_argument( &parser, argind + i ); @@ -816,17 +854,18 @@ int main( const int argc, const char * const argv[] ) if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) ) set_signals( signal_handler ); + static struct Pretty_print pp; Pp_init( &pp, filenames, num_filenames ); + int failed_tests = 0; + int retval = 0; const bool one_to_one = !to_stdout && program_mode != m_test && !to_file; + bool stdin_used = false; for( i = 0; i < num_filenames; ++i ) { - unsigned long long cfile_size; const char * input_filename = ""; int infd; - int tmp; struct stat in_stats; - const struct stat * in_statsp; Pp_set_name( &pp, filenames[i] ); if( strcmp( filenames[i], "-" ) == 0 ) @@ -872,11 +911,13 @@ int main( const int argc, const char * const argv[] ) } } - in_statsp = ( input_filename[0] && one_to_one ) ? &in_stats : 0; - cfile_size = ( input_filename[0] && S_ISREG( in_stats.st_mode ) ) ? - ( in_stats.st_size + 99 ) / 100 : 0; - tmp = decompress( cfile_size, infd, &pp, buffer_size, ignore_trailing, - loose_trailing, program_mode == m_test ); + const struct stat * const in_statsp = + ( input_filename[0] && one_to_one ) ? &in_stats : 0; + const unsigned long long cfile_size = + ( input_filename[0] && S_ISREG( in_stats.st_mode ) ) ? + ( in_stats.st_size + 99 ) / 100 : 0; + int tmp = decompress( cfile_size, infd, &pp, buffer_size, ignore_trailing, + loose_trailing, program_mode == m_test ); if( close( infd ) != 0 ) { show_file_error( pp.name, "Error closing input file", errno ); set_retval( &tmp, 1 ); } diff --git a/testsuite/check.sh b/testsuite/check.sh index 19928dd..c495ba1 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lunzip - Decompressor for the lzip format -# Copyright (C) 2010-2021 Antonio Diaz Diaz. +# Copyright (C) 2010-2022 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute, and modify it. @@ -91,6 +91,7 @@ rm -f uin.lz || framework_failure printf "LZIP\001-.............................." | "${LZIP}" -t 2> /dev/null printf "LZIP\002-.............................." | "${LZIP}" -t 2> /dev/null printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null +rm -f out || framework_failure printf "\ntesting decompression..." @@ -114,19 +115,23 @@ lines=$("${LZIP}" -tvv "${in_em}" 2>&1 | wc -l) || test_failed $LINENO lines=$("${LZIP}" -lvv "${in_em}" | wc -l) || test_failed $LINENO [ "${lines}" -eq 11 ] || test_failed $LINENO "${lines}" +"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO cat "${in_lz}" > copy.lz || framework_failure "${LZIP}" -dk copy.lz || test_failed $LINENO cmp in copy || test_failed $LINENO -printf "to be overwritten" > copy || framework_failure -"${LZIP}" -d copy.lz 2> /dev/null +cat fox > copy || framework_failure +cat "${in_lz}" > out.lz || framework_failure +rm -f out || framework_failure +"${LZIP}" -d copy.lz out.lz 2> /dev/null # skip copy, decompress out [ $? = 1 ] || test_failed $LINENO +cmp fox copy || test_failed $LINENO +cmp in out || test_failed $LINENO "${LZIP}" -df copy.lz || test_failed $LINENO [ ! -e copy.lz ] || test_failed $LINENO cmp in copy || test_failed $LINENO +rm -f out || framework_failure printf "to be overwritten" > copy || framework_failure -"${LZIP}" -d -o copy < "${in_lz}" 2> /dev/null -[ $? = 1 ] || test_failed $LINENO "${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO cmp in copy || test_failed $LINENO rm -f out copy || framework_failure @@ -154,7 +159,7 @@ rm -f copy anyothername.out || framework_failure [ $? = 1 ] || test_failed $LINENO "${LZIP}" -cdq in "${in_lz}" > copy [ $? = 2 ] || test_failed $LINENO -cat copy in | cmp in - || test_failed $LINENO +cat copy in | cmp in - || test_failed $LINENO # copy must be empty "${LZIP}" -cdq nx_file.lz "${in_lz}" > copy [ $? = 1 ] || test_failed $LINENO cmp in copy || test_failed $LINENO @@ -207,6 +212,7 @@ printf "\ngarbage" >> copy2.lz || framework_failure printf "to be overwritten" > copy2 || framework_failure "${LZIP}" -df copy2.lz || test_failed $LINENO cmp in2 copy2 || test_failed $LINENO +rm -f copy2 || framework_failure for i in 12 5120 6Ki 29 512KiB ; do printf "to be overwritten" > copy || framework_failure @@ -215,12 +221,12 @@ for i in 12 5120 6Ki 29 512KiB ; do rm -f copy || framework_failure "${LZIP}" -d -u$i -o copy "${in_lz}" || test_failed $LINENO $i cmp in copy || test_failed $LINENO $i - rm -f copy2 || framework_failure "${LZIP}" -d -u$i -o copy2 "${in_lz}" "${in_lz}" || test_failed $LINENO $i cmp in2 copy2 || test_failed $LINENO $i + rm -f copy2 || framework_failure done -rm -f in2 copy copy2 || framework_failure +rm -f in2 copy || framework_failure printf "\ntesting bad input..." @@ -285,7 +291,6 @@ for i in fox_v2.lz fox_s11.lz fox_de20.lz \ [ $? = 2 ] || test_failed $LINENO $i done -"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO for i in fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do "${LZIP}" -cdq "${testdir}"/$i > out [ $? = 2 ] || test_failed $LINENO $i -- cgit v1.2.3