diff options
-rw-r--r-- | ChangeLog | 15 | ||||
-rw-r--r-- | INSTALL | 14 | ||||
-rw-r--r-- | Makefile.in | 6 | ||||
-rw-r--r-- | NEWS | 36 | ||||
-rw-r--r-- | README | 46 | ||||
-rw-r--r-- | carg_parser.c | 2 | ||||
-rw-r--r-- | carg_parser.h | 2 | ||||
-rwxr-xr-x | configure | 16 | ||||
-rw-r--r-- | decoder.c | 140 | ||||
-rw-r--r-- | decoder.h | 2 | ||||
-rw-r--r-- | doc/lunzip.1 | 4 | ||||
-rw-r--r-- | file_index.c | 272 | ||||
-rw-r--r-- | list.c | 38 | ||||
-rw-r--r-- | lzip.h | 56 | ||||
-rw-r--r-- | lzip_index.c | 273 | ||||
-rw-r--r-- | lzip_index.h (renamed from file_index.h) | 38 | ||||
-rw-r--r-- | main.c | 105 | ||||
-rwxr-xr-x | testsuite/check.sh | 114 |
18 files changed, 619 insertions, 560 deletions
@@ -1,3 +1,14 @@ +2019-01-01 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.11 released. + * File_* renamed to Lzip_*. + * lzip.h (Lzip_trailer): New function 'Lt_verify_consistency'. + * lzip_index.c: Detect some kinds of corrupt trailers. + * main.c (main): Check return value of close( infd ). + * main.c: Compile on DOS with DJGPP. + * configure: Accept appending to CFLAGS, 'CFLAGS+=OPTIONS'. + * INSTALL: Document use of CFLAGS+='-D __USE_MINGW_ANSI_STDIO'. + 2018-02-05 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.10 released. @@ -19,7 +30,7 @@ * Decompression time has been reduced by 7%. * main.c: Continue testing if any input file is a terminal. * main.c: Show trailing data in both hexadecimal and ASCII. - * file_index.c: Improve detection of bad dict and trailing data. + * lzip_index.c: Improve detection of bad dict and trailing data. * lzip.h: Unified messages for bad magic, trailing data, etc. 2016-05-12 Antonio Diaz Diaz <antonio@gnu.org> @@ -93,7 +104,7 @@ * Created from the decompression code of clzip 1.1. -Copyright (C) 2010-2018 Antonio Diaz Diaz. +Copyright (C) 2010-2019 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and @@ -1,10 +1,14 @@ Requirements ------------ You will need a C compiler. -I use gcc 5.3.0 and 4.1.2, but the code should compile with any -standards compliant compiler. +I use gcc 5.3.0 and 4.1.2, but the code should compile with any standards +compliant compiler. Gcc is available at http://gcc.gnu.org. +The operating system must allow signal handlers read access to objects with +static storage duration so that the cleanup handler for Control-C can delete +the partial output file. + Procedure --------- @@ -23,6 +27,10 @@ the main archive. cd lunzip[version] ./configure + If you are compiling on MinGW, use: + + ./configure CFLAGS+='-D __USE_MINGW_ANSI_STDIO' + 3. Run make. make @@ -61,7 +69,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2010-2018 Antonio Diaz Diaz. +Copyright (C) 2010-2019 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Makefile.in b/Makefile.in index 4c0d3a5..691bd14 100644 --- a/Makefile.in +++ b/Makefile.in @@ -7,7 +7,7 @@ INSTALL_DIR = $(INSTALL) -d -m 755 SHELL = /bin/sh CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 -objs = carg_parser.o file_index.o list.o decoder.o main.o +objs = carg_parser.o lzip_index.o list.o decoder.o main.o .PHONY : all install install-bin install-info install-man \ @@ -31,8 +31,8 @@ main.o : main.c $(objs) : Makefile carg_parser.o : carg_parser.h decoder.o : lzip.h decoder.h -file_index.o : lzip.h file_index.h -list.o : lzip.h file_index.h +list.o : lzip.h lzip_index.h +lzip_index.o : lzip.h lzip_index.h main.o : carg_parser.h lzip.h decoder.h @@ -1,32 +1,14 @@ -Changes in version 1.10: +Changes in version 1.11: -The option '--loose-trailing', has been added. +Detection of forbidden combinations of characters in trailing data has +been improved. -The test used by lunzip to discriminate trailing data from a corrupt -header in multimember or concatenated files has been improved to a -Hamming distance (HD) of 3, and the 3 bit flips must happen in different -magic bytes for the test to fail. As a consequence some kinds of files -no longer can be appended to a lzip file as trailing data unless the -'--loose-trailing' option is used when decompressing. -Lziprecover can be used to remove conflicting trailing data from a file. +Errors are now also checked when closing the input file. -The contents of a corrupt or truncated header found in a multimember -file is now shown, after the error message, in the same format as -trailing data. +Lunzip now compiles on DOS with DJGPP. (Patch from Robert Riebisch). -The 'bits/byte' ratio has been replaced with the inverse compression -ratio in the output. +The configure script now accepts appending options to CFLAGS using the +syntax 'CFLAGS+=OPTIONS'. -The progress of decompression is now shown at verbosity level 2 (-vv) or -higher. - -Progress of decompression is only shown if stderr is a terminal. - -A final diagnostic is now shown at verbosity level 1 (-v) or higher if -any file fails the test when testing multiple files. - -In case of (de)compressed size mismatch, the stored size is now also -shown in hexadecimal to ease visual comparison. - -The dictionary size is now shown at verbosity level 4 (-vvvv) when -decompressing or testing. +It has been documented in INSTALL the use of +CFLAGS+='-D __USE_MINGW_ANSI_STDIO' when compiling on MinGW. @@ -5,22 +5,21 @@ small size makes it well suited for embedded devices or software installers that need to decompress files but don't need compression capabilities. Lunzip is fully compatible with lzip-1.4 or newer. -The lzip file format is designed for data sharing and long-term -archiving, taking into account both data integrity and decoder -availability: +The lzip file format is designed for data sharing and long-term archiving, +taking into account both data integrity and decoder availability: * The lzip format provides very safe integrity checking and some data - recovery means. The lziprecover program can repair bit-flip errors + recovery means. The lziprecover program can repair bit flip errors (one of the most common forms of data corruption) in lzip files, and provides data recovery capabilities, including error-checked merging of damaged copies of a file. * The lzip format is as simple as possible (but not simpler). The - lzip manual provides the source code of a simple decompressor along - with a detailed explanation of how it works, so that with the only - help of the lzip manual it would be possible for a digital - archaeologist to extract the data from a lzip file long after - quantum computers eventually render LZMA obsolete. + lzip manual provides the source code of a simple decompressor + along with a detailed explanation of how it works, so that with + the only help of the lzip manual it would be possible for a + digital archaeologist to extract the data from a lzip file long + after quantum computers eventually render LZMA obsolete. * Additionally the lzip reference implementation is copylefted, which guarantees that it will remain free forever. @@ -30,26 +29,25 @@ repair the nearer it is from the beginning of the file. Therefore, with the help of lziprecover, losing an entire archive just because of a corrupt byte near the beginning is a thing of the past. -Lunzip uses the same well-defined exit status values used by lzip and -bzip2, which makes it safer than decompressors returning ambiguous -warning values (like gunzip) when it is used as a back end for other -programs like tar or zutils. +Lunzip uses the same well-defined exit status values used by lzip, which +makes it safer than decompressors returning ambiguous warning values (like +gunzip) when it is used as a back end for other programs like tar or zutils. -Lunzip provides a "low memory" mode able to decompress any file using as +Lunzip provides a 'low memory' mode able to decompress any file using as little memory as 50 kB, irrespective of the dictionary size used to compress the file. To activate it, specify the size of the output buffer -with the "--buffer-size" option and lunzip will use the decompressed +with the '--buffer-size' option and lunzip will use the decompressed file as dictionary for distances beyond the buffer size. Of course, the smaller the buffer size used in relation to the dictionary size, the more accesses to disk are needed and the slower the decompression is. -This "low memory" mode only works when decompressing to a regular file +This 'low memory' mode only works when decompressing to a regular file and is intended for systems without enough memory (RAM + swap) to keep the whole dictionary at once. It has been tested on a laptop with a 486 processor and 4 MiB of RAM. The amount of memory required by lunzip to decompress a file is about 46 kB larger than the dictionary size used to compress that file, unless -the "--buffer-size" option is specified. +the '--buffer-size' option is specified. Lunzip attempts to guess the name for the decompressed file from that of the compressed file as follows: @@ -60,21 +58,21 @@ anyothername becomes anyothername.out Decompressing a file is much like copying or moving it; therefore lunzip preserves the access and modification dates, permissions, and, when -possible, ownership of the file just as "cp -p" does. (If the user ID or +possible, ownership of the file just as 'cp -p' does. (If the user ID or the group ID can't be duplicated, the file permission bits S_ISUID and S_ISGID are cleared). Lunzip is able to read from some types of non regular files if the -"--stdout" option is specified. +'--stdout' option is specified. If no file names are specified, lunzip decompresses from standard input to standard output. In this case, lunzip will decline to read compressed input from a terminal. -Lunzip will correctly decompress a file which is the concatenation of -two or more compressed files. The result is the concatenation of the -corresponding decompressed files. Integrity testing of concatenated -compressed files is also supported. +Lunzip will correctly decompress a file which is the concatenation of two or +more compressed files. The result is the concatenation of the corresponding +decompressed files. Integrity testing of concatenated compressed files is +also supported. The ideas embodied in lunzip are due to (at least) the following people: Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for @@ -83,7 +81,7 @@ range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI). -Copyright (C) 2010-2018 Antonio Diaz Diaz. +Copyright (C) 2010-2019 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/carg_parser.c b/carg_parser.c index 10ad4dc..ce01d7b 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2018 Antonio Diaz Diaz. + Copyright (C) 2006-2019 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/carg_parser.h b/carg_parser.h index e1c70dd..dcae2de 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2018 Antonio Diaz Diaz. + Copyright (C) 2006-2019 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lunzip - Decompressor for the lzip format -# Copyright (C) 2010-2018 Antonio Diaz Diaz. +# Copyright (C) 2010-2019 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=lunzip -pkgversion=1.10 +pkgversion=1.11 progname=lunzip srctrigger=doc/${progname}.1 @@ -70,6 +70,7 @@ while [ $# != 0 ] ; do echo " CC=COMPILER C compiler to use [${CC}]" echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]" echo " CFLAGS=OPTIONS command line options for the C compiler [${CFLAGS}]" + echo " CFLAGS+=OPTIONS append options to the current value of CFLAGS" echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]" echo exit 0 ;; @@ -93,10 +94,11 @@ while [ $# != 0 ] ; do --mandir=*) mandir=${optarg} ;; --no-create) no_create=yes ;; - CC=*) CC=${optarg} ;; - CPPFLAGS=*) CPPFLAGS=${optarg} ;; - CFLAGS=*) CFLAGS=${optarg} ;; - LDFLAGS=*) LDFLAGS=${optarg} ;; + CC=*) CC=${optarg} ;; + CPPFLAGS=*) CPPFLAGS=${optarg} ;; + CFLAGS=*) CFLAGS=${optarg} ;; + CFLAGS+=*) CFLAGS="${CFLAGS} ${optarg}" ;; + LDFLAGS=*) LDFLAGS=${optarg} ;; --*) echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; @@ -168,7 +170,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lunzip - Decompressor for the lzip format -# Copyright (C) 2010-2018 Antonio Diaz Diaz. +# Copyright (C) 2010-2019 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2018 Antonio Diaz Diaz. + Copyright (C) 2010-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -115,15 +115,15 @@ void LZd_flush_data( struct LZ_decoder * const d ) static bool LZd_verify_trailer( struct LZ_decoder * const d, struct Pretty_print * const pp ) { - File_trailer trailer; - int size = Rd_read_data( d->rdec, trailer, Ft_size ); + Lzip_trailer trailer; + int size = Rd_read_data( d->rdec, trailer, Lt_size ); const unsigned long long data_size = LZd_data_position( d ); const unsigned long long member_size = Rd_member_position( d->rdec ); unsigned td_crc; unsigned long long td_size, tm_size; bool error = false; - if( size < Ft_size ) + if( size < Lt_size ) { error = true; if( verbosity >= 0 ) @@ -132,10 +132,10 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d, fprintf( stderr, "Trailer truncated at trailer position %d;" " some checks may fail.\n", size ); } - while( size < Ft_size ) trailer[size++] = 0; + while( size < Lt_size ) trailer[size++] = 0; } - td_crc = Ft_get_data_crc( trailer ); + td_crc = Lt_get_data_crc( trailer ); if( td_crc != LZd_crc( d ) ) { error = true; @@ -146,7 +146,7 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d, td_crc, LZd_crc( d ) ); } } - td_size = Ft_get_data_size( trailer ); + td_size = Lt_get_data_size( trailer ); if( td_size != data_size ) { error = true; @@ -157,7 +157,7 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d, td_size, td_size, data_size, data_size ); } } - tm_size = Ft_get_member_size( trailer ); + tm_size = Lt_get_member_size( trailer ); if( tm_size != member_size ) { error = true; @@ -232,9 +232,11 @@ int LZd_decode_member( struct LZ_decoder * const d, Rd_load( rdec ); while( !Rd_finished( rdec ) ) { + int len; const int pos_state = LZd_data_position( d ) & pos_state_mask; - if( Rd_decode_bit( rdec, &bm_match[state][pos_state] ) == 0 ) /* 1st bit */ + if( Rd_decode_bit( rdec, &bm_match[state][pos_state] ) == 0 ) /* 1st bit */ { + /* literal byte */ Bit_model * const bm = bm_literal[get_lit_state(LZd_peek_prev( d ))]; if( St_is_char( state ) ) { @@ -246,84 +248,82 @@ int LZd_decode_member( struct LZ_decoder * const d, state -= ( state < 10 ) ? 3 : 6; LZd_put_byte( d, Rd_decode_matched( rdec, bm, LZd_peek( d, rep0 ) ) ); } + continue; } - else /* match or repeated match */ + /* match or repeated match */ + if( Rd_decode_bit( rdec, &bm_rep[state] ) != 0 ) /* 2nd bit */ { - int len; - if( Rd_decode_bit( rdec, &bm_rep[state] ) != 0 ) /* 2nd bit */ + if( Rd_decode_bit( rdec, &bm_rep0[state] ) == 0 ) /* 3rd bit */ { - if( Rd_decode_bit( rdec, &bm_rep0[state] ) == 0 ) /* 3rd bit */ - { - if( Rd_decode_bit( rdec, &bm_len[state][pos_state] ) == 0 ) /* 4th bit */ - { state = St_set_short_rep( state ); - LZd_put_byte( d, LZd_peek( d, rep0 ) ); continue; } - } + if( Rd_decode_bit( rdec, &bm_len[state][pos_state] ) == 0 ) /* 4th bit */ + { state = St_set_short_rep( state ); + LZd_put_byte( d, LZd_peek( d, rep0 ) ); continue; } + } + else + { + unsigned distance; + if( Rd_decode_bit( rdec, &bm_rep1[state] ) == 0 ) /* 4th bit */ + distance = rep1; else { - unsigned distance; - if( Rd_decode_bit( rdec, &bm_rep1[state] ) == 0 ) /* 4th bit */ - distance = rep1; + if( Rd_decode_bit( rdec, &bm_rep2[state] ) == 0 ) /* 5th bit */ + distance = rep2; else - { - if( Rd_decode_bit( rdec, &bm_rep2[state] ) == 0 ) /* 5th bit */ - distance = rep2; - else - { distance = rep3; rep3 = rep2; } - rep2 = rep1; - } - rep1 = rep0; - rep0 = distance; + { distance = rep3; rep3 = rep2; } + rep2 = rep1; } - state = St_set_rep( state ); - len = min_match_len + Rd_decode_len( rdec, &rep_len_model, pos_state ); + rep1 = rep0; + rep0 = distance; } - else /* match */ + state = St_set_rep( state ); + len = min_match_len + Rd_decode_len( rdec, &rep_len_model, pos_state ); + } + else /* match */ + { + unsigned distance; + len = min_match_len + Rd_decode_len( rdec, &match_len_model, pos_state ); + distance = Rd_decode_tree6( rdec, bm_dis_slot[get_len_state(len)] ); + if( distance >= start_dis_model ) { - unsigned distance; - len = min_match_len + Rd_decode_len( rdec, &match_len_model, pos_state ); - distance = Rd_decode_tree6( rdec, bm_dis_slot[get_len_state(len)] ); - if( distance >= start_dis_model ) + const unsigned dis_slot = distance; + const int direct_bits = ( dis_slot >> 1 ) - 1; + distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + if( dis_slot < end_dis_model ) + distance += Rd_decode_tree_reversed( rdec, + bm_dis + ( distance - dis_slot ), direct_bits ); + else { - const unsigned dis_slot = distance; - const int direct_bits = ( dis_slot >> 1 ) - 1; - distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits; - if( dis_slot < end_dis_model ) - distance += Rd_decode_tree_reversed( rdec, - bm_dis + ( distance - dis_slot ), direct_bits ); - else + distance += + Rd_decode( rdec, direct_bits - dis_align_bits ) << dis_align_bits; + distance += Rd_decode_tree_reversed4( rdec, bm_align ); + if( distance == 0xFFFFFFFFU ) /* marker found */ { - distance += - Rd_decode( rdec, direct_bits - dis_align_bits ) << dis_align_bits; - distance += Rd_decode_tree_reversed4( rdec, bm_align ); - if( distance == 0xFFFFFFFFU ) /* marker found */ + Rd_normalize( rdec ); + LZd_flush_data( d ); + if( len == min_match_len ) /* End Of Stream marker */ + { + if( LZd_verify_trailer( d, pp ) ) return 0; else return 3; + } + if( len == min_match_len + 1 ) /* Sync Flush marker */ + { + Rd_load( rdec ); continue; + } + if( verbosity >= 0 ) { - Rd_normalize( rdec ); - LZd_flush_data( d ); - if( len == min_match_len ) /* End Of Stream marker */ - { - if( LZd_verify_trailer( d, pp ) ) return 0; else return 3; - } - if( len == min_match_len + 1 ) /* Sync Flush marker */ - { - Rd_load( rdec ); continue; - } - if( verbosity >= 0 ) - { - Pp_show_msg( pp, 0 ); - fprintf( stderr, "Unsupported marker code '%d'\n", len ); - } - return 4; + Pp_show_msg( pp, 0 ); + fprintf( stderr, "Unsupported marker code '%d'\n", len ); } + return 4; } } - rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance; - state = St_set_match( state ); - if( rep0 >= d->dictionary_size || - ( rep0 >= LZd_data_position( d ) && !d->pos_wrapped_dic ) ) - { LZd_flush_data( d ); return 1; } } - copy_block( d, rep0, len ); + rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance; + state = St_set_match( state ); + if( rep0 >= d->dictionary_size || + ( rep0 >= LZd_data_position( d ) && !d->pos_wrapped_dic ) ) + { LZd_flush_data( d ); return 1; } } + copy_block( d, rep0, len ); } LZd_flush_data( d ); return 2; @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2018 Antonio Diaz Diaz. + Copyright (C) 2010-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/doc/lunzip.1 b/doc/lunzip.1 index ffcd8a3..1198723 100644 --- a/doc/lunzip.1 +++ b/doc/lunzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH LUNZIP "1" "February 2018" "lunzip 1.10" "User Commands" +.TH LUNZIP "1" "January 2019" "lunzip 1.11" "User Commands" .SH NAME lunzip \- decompressor for the lzip format .SH SYNOPSIS @@ -81,7 +81,7 @@ Report bugs to lzip\-bug@nongnu.org .br Lunzip home page: http://www.nongnu.org/lzip/lunzip.html .SH COPYRIGHT -Copyright \(co 2018 Antonio Diaz Diaz. +Copyright \(co 2019 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. diff --git a/file_index.c b/file_index.c deleted file mode 100644 index 1872d67..0000000 --- a/file_index.c +++ /dev/null @@ -1,272 +0,0 @@ -/* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2018 Antonio Diaz Diaz. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#define _FILE_OFFSET_BITS 64 - -#include <errno.h> -#include <stdbool.h> -#include <stdio.h> -#include <string.h> -#include <stdint.h> -#include <stdlib.h> -#include <unistd.h> - -#include "lzip.h" -#include "file_index.h" - - -static int seek_read( const int fd, uint8_t * const buf, const int size, - const long long pos ) - { - if( lseek( fd, pos, SEEK_SET ) == pos ) - return readblock( fd, buf, size ); - return 0; - } - - -static bool add_error( struct File_index * const fi, const char * const msg ) - { - const int len = strlen( msg ); - void * tmp = resize_buffer( fi->error, fi->error_size + len + 1 ); - if( !tmp ) return false; - fi->error = (char *)tmp; - strncpy( fi->error + fi->error_size, msg, len + 1 ); - fi->error_size += len; - return true; - } - - -static bool push_back_member( struct File_index * const fi, - const long long dp, const long long ds, - const long long mp, const long long ms, - const unsigned dict_size ) - { - struct Member * p; - void * tmp = resize_buffer( fi->member_vector, - ( fi->members + 1 ) * sizeof fi->member_vector[0] ); - if( !tmp ) - { add_error( fi, "Not enough memory." ); fi->retval = 1; return false; } - fi->member_vector = (struct Member *)tmp; - p = &(fi->member_vector[fi->members]); - init_member( p, dp, ds, mp, ms, dict_size ); - ++fi->members; - return true; - } - - -static void Fi_free_member_vector( struct File_index * const fi ) - { - if( fi->member_vector ) - { free( fi->member_vector ); fi->member_vector = 0; } - fi->members = 0; - } - - -static void Fi_reverse_member_vector( struct File_index * const fi ) - { - struct Member tmp; - long i; - for( i = 0; i < fi->members / 2; ++i ) - { - tmp = fi->member_vector[i]; - fi->member_vector[i] = fi->member_vector[fi->members-i-1]; - fi->member_vector[fi->members-i-1] = tmp; - } - } - - -static void Fi_set_errno_error( struct File_index * const fi, - const char * const msg ) - { - add_error( fi, msg ); add_error( fi, strerror( errno ) ); - fi->retval = 1; - } - -static void Fi_set_num_error( struct File_index * const fi, - const char * const msg, unsigned long long num ) - { - char buf[80]; - snprintf( buf, sizeof buf, "%s%llu", msg, num ); - add_error( fi, buf ); - fi->retval = 2; - } - - -/* If successful, push last member and set pos to member header. */ -static bool Fi_skip_trailing_data( struct File_index * const fi, - const int fd, long long * const pos, - const bool ignore_trailing, - const bool loose_trailing ) - { - enum { block_size = 16384, - buffer_size = block_size + Ft_size - 1 + Fh_size }; - uint8_t buffer[buffer_size]; - int bsize = *pos % block_size; /* total bytes in buffer */ - int search_size, rd_size; - unsigned long long ipos; - int i; - if( bsize <= buffer_size - block_size ) bsize += block_size; - search_size = bsize; /* bytes to search for trailer */ - rd_size = bsize; /* bytes to read from file */ - ipos = *pos - rd_size; /* aligned to block_size */ - if( *pos < min_member_size ) return false; - - while( true ) - { - const uint8_t max_msb = ( ipos + search_size ) >> 56; - if( seek_read( fd, buffer, rd_size, ipos ) != rd_size ) - { Fi_set_errno_error( fi, "Error seeking member trailer: " ); - return false; } - for( i = search_size; i >= Ft_size; --i ) - if( buffer[i-1] <= max_msb ) /* most significant byte of member_size */ - { - File_header header; - File_trailer * trailer = (File_trailer *)( buffer + i - Ft_size ); - const unsigned long long member_size = Ft_get_member_size( *trailer ); - unsigned dictionary_size; - if( member_size == 0 ) - { while( i > Ft_size && buffer[i-9] == 0 ) --i; continue; } - if( member_size < min_member_size || member_size > ipos + i ) - continue; - if( seek_read( fd, header, Fh_size, - ipos + i - member_size ) != Fh_size ) - { Fi_set_errno_error( fi, "Error reading member header: " ); - return false; } - dictionary_size = Fh_get_dictionary_size( header ); - if( !Fh_verify_magic( header ) || !Fh_verify_version( header ) || - !isvalid_ds( dictionary_size ) ) continue; - if( Fh_verify_prefix( buffer + i, bsize - i ) ) - { - add_error( fi, "Last member in input file is truncated or corrupt." ); - fi->retval = 2; return false; - } - if( !loose_trailing && bsize - i >= Fh_size && - Fh_verify_corrupt( buffer + i ) ) - { add_error( fi, corrupt_mm_msg ); fi->retval = 2; return false; } - if( !ignore_trailing ) - { add_error( fi, trailing_msg ); fi->retval = 2; return false; } - *pos = ipos + i - member_size; - return push_back_member( fi, 0, Ft_get_data_size( *trailer ), *pos, - member_size, dictionary_size ); - } - if( ipos <= 0 ) - { Fi_set_num_error( fi, "Member size in trailer is corrupt at pos ", - *pos - 8 ); - return false; } - bsize = buffer_size; - search_size = bsize - Fh_size; - rd_size = block_size; - ipos -= rd_size; - memcpy( buffer + rd_size, buffer, buffer_size - rd_size ); - } - } - - -bool Fi_init( struct File_index * const fi, const int infd, - const bool ignore_trailing, const bool loose_trailing ) - { - File_header header; - long long pos; - long i; - fi->member_vector = 0; - fi->error = 0; - fi->isize = lseek( infd, 0, SEEK_END ); - fi->members = 0; - fi->error_size = 0; - fi->retval = 0; - if( fi->isize < 0 ) - { Fi_set_errno_error( fi, "Input file is not seekable: " ); return false; } - if( fi->isize < min_member_size ) - { add_error( fi, "Input file is too short." ); fi->retval = 2; - return false; } - if( fi->isize > INT64_MAX ) - { add_error( fi, "Input file is too long (2^63 bytes or more)." ); - fi->retval = 2; return false; } - - if( seek_read( infd, header, Fh_size, 0 ) != Fh_size ) - { Fi_set_errno_error( fi, "Error reading member header: " ); return false; } - if( !Fh_verify_magic( header ) ) - { add_error( fi, bad_magic_msg ); fi->retval = 2; return false; } - if( !Fh_verify_version( header ) ) - { add_error( fi, bad_version( Fh_version( header ) ) ); fi->retval = 2; - return false; } - if( !isvalid_ds( Fh_get_dictionary_size( header ) ) ) - { add_error( fi, bad_dict_msg ); fi->retval = 2; return false; } - - pos = fi->isize; /* always points to a header or to EOF */ - while( pos >= min_member_size ) - { - File_trailer trailer; - unsigned long long member_size; - unsigned dictionary_size; - if( seek_read( infd, trailer, Ft_size, pos - Ft_size ) != Ft_size ) - { Fi_set_errno_error( fi, "Error reading member trailer: " ); break; } - member_size = Ft_get_member_size( trailer ); - if( member_size < min_member_size || member_size > (unsigned long long)pos ) - { - if( fi->members <= 0 ) - { if( Fi_skip_trailing_data( fi, infd, &pos, ignore_trailing, - loose_trailing ) ) continue; else return false; } - Fi_set_num_error( fi, "Member size in trailer is corrupt at pos ", pos - 8 ); - break; - } - if( seek_read( infd, header, Fh_size, pos - member_size ) != Fh_size ) - { Fi_set_errno_error( fi, "Error reading member header: " ); break; } - dictionary_size = Fh_get_dictionary_size( header ); - if( !Fh_verify_magic( header ) || !Fh_verify_version( header ) || - !isvalid_ds( dictionary_size ) ) - { - if( fi->members <= 0 ) - { if( Fi_skip_trailing_data( fi, infd, &pos, ignore_trailing, - loose_trailing ) ) continue; else return false; } - Fi_set_num_error( fi, "Bad header at pos ", pos - member_size ); - break; - } - pos -= member_size; - if( !push_back_member( fi, 0, Ft_get_data_size( trailer ), pos, - member_size, dictionary_size ) ) - return false; - } - if( pos != 0 || fi->members <= 0 ) - { - Fi_free_member_vector( fi ); - if( fi->retval == 0 ) - { add_error( fi, "Can't create file index." ); fi->retval = 2; } - return false; - } - Fi_reverse_member_vector( fi ); - for( i = 0; i < fi->members - 1; ++i ) - { - const long long end = block_end( fi->member_vector[i].dblock ); - if( end < 0 || end > INT64_MAX ) - { - Fi_free_member_vector( fi ); - add_error( fi, "Data in input file is too long (2^63 bytes or more)." ); - fi->retval = 2; return false; - } - fi->member_vector[i+1].dblock.pos = end; - } - return true; - } - - -void Fi_free( struct File_index * const fi ) - { - Fi_free_member_vector( fi ); - if( fi->error ) { free( fi->error ); fi->error = 0; } - fi->error_size = 0; - } @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2018 Antonio Diaz Diaz. + Copyright (C) 2010-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,7 +25,7 @@ #include <sys/stat.h> #include "lzip.h" -#include "file_index.h" +#include "lzip_index.h" static void list_line( const unsigned long long uncomp_size, @@ -53,7 +53,7 @@ int list_files( const char * const filenames[], const int num_filenames, for( i = 0; i < num_filenames; ++i ) { const char * input_filename; - struct File_index file_index; + struct Lzip_index lzip_index; struct stat in_stats; /* not used */ int infd; const bool from_stdin = ( strcmp( filenames[i], "-" ) == 0 ); @@ -63,18 +63,18 @@ int list_files( const char * const filenames[], const int num_filenames, open_instream( input_filename, &in_stats, true, true ); if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } - Fi_init( &file_index, infd, ignore_trailing, loose_trailing ); + Li_init( &lzip_index, infd, ignore_trailing, loose_trailing ); close( infd ); - if( file_index.retval != 0 ) + if( lzip_index.retval != 0 ) { - show_file_error( input_filename, file_index.error, 0 ); - if( retval < file_index.retval ) retval = file_index.retval; - Fi_free( &file_index ); continue; + show_file_error( input_filename, lzip_index.error, 0 ); + if( retval < lzip_index.retval ) retval = lzip_index.retval; + Li_free( &lzip_index ); continue; } if( verbosity >= 0 ) { - const unsigned long long udata_size = Fi_udata_size( &file_index ); - const unsigned long long cdata_size = Fi_cdata_size( &file_index ); + const unsigned long long udata_size = Li_udata_size( &lzip_index ); + const unsigned long long cdata_size = Li_cdata_size( &lzip_index ); total_comp += cdata_size; total_uncomp += udata_size; ++files; if( first_post ) { @@ -87,23 +87,23 @@ int list_files( const char * const filenames[], const int num_filenames, long long trailing_size; unsigned dictionary_size = 0; long i; - for( i = 0; i < file_index.members; ++i ) + for( i = 0; i < lzip_index.members; ++i ) dictionary_size = - max( dictionary_size, Fi_dictionary_size( &file_index, i ) ); - trailing_size = Fi_file_size( &file_index ) - cdata_size; + max( dictionary_size, Li_dictionary_size( &lzip_index, i ) ); + trailing_size = Li_file_size( &lzip_index ) - cdata_size; printf( "%s %5ld %6lld ", format_ds( dictionary_size ), - file_index.members, trailing_size ); + lzip_index.members, trailing_size ); } list_line( udata_size, cdata_size, input_filename ); - if( verbosity >= 2 && file_index.members > 1 ) + if( verbosity >= 2 && lzip_index.members > 1 ) { long i; fputs( " member data_pos data_size member_pos member_size\n", stdout ); - for( i = 0; i < file_index.members; ++i ) + for( i = 0; i < lzip_index.members; ++i ) { - const struct Block * db = Fi_dblock( &file_index, i ); - const struct Block * mb = Fi_mblock( &file_index, i ); + const struct Block * db = Li_dblock( &lzip_index, i ); + const struct Block * mb = Li_mblock( &lzip_index, i ); printf( "%5ld %15llu %15llu %15llu %15llu\n", i + 1, db->pos, db->size, mb->pos, mb->size ); } @@ -111,7 +111,7 @@ int list_files( const char * const filenames[], const int num_filenames, } fflush( stdout ); } - Fi_free( &file_index ); + Li_free( &lzip_index ); } if( verbosity >= 0 && files > 1 ) { @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2018 Antonio Diaz Diaz. + Copyright (C) 2010-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -119,7 +119,7 @@ static inline void Lm_init( struct Len_model * const lm ) /* defined in main.c */ extern int verbosity; -struct Pretty_print +struct Pretty_print /* requires global var 'int verbosity' */ { const char * name; char * padded_name; @@ -146,7 +146,7 @@ static inline void Pp_init( struct Pretty_print * const pp, { const char * const s = filenames[i]; const unsigned len = (strcmp( s, "-" ) == 0) ? stdin_name_len : strlen( s ); - if( len > pp->longest_name ) pp->longest_name = len; + if( pp->longest_name < len ) pp->longest_name = len; } if( pp->longest_name == 0 ) pp->longest_name = stdin_name_len; } @@ -209,40 +209,40 @@ static inline bool isvalid_ds( const unsigned dictionary_size ) dictionary_size <= max_dictionary_size ); } -static const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */ +static const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */ -typedef uint8_t File_header[6]; /* 0-3 magic bytes */ +typedef uint8_t Lzip_header[6]; /* 0-3 magic bytes */ /* 4 version */ /* 5 coded_dict_size */ -enum { Fh_size = 6 }; +enum { Lh_size = 6 }; -static inline bool Fh_verify_magic( const File_header data ) - { return ( memcmp( data, magic_string, 4 ) == 0 ); } +static inline bool Lh_verify_magic( const Lzip_header data ) + { return ( memcmp( data, lzip_magic, 4 ) == 0 ); } /* detect (truncated) header */ -static inline bool Fh_verify_prefix( const File_header data, const int sz ) +static inline bool Lh_verify_prefix( const Lzip_header data, const int sz ) { int i; for( i = 0; i < sz && i < 4; ++i ) - if( data[i] != magic_string[i] ) return false; + if( data[i] != lzip_magic[i] ) return false; return ( sz > 0 ); } /* detect corrupt header */ -static inline bool Fh_verify_corrupt( const File_header data ) +static inline bool Lh_verify_corrupt( const Lzip_header data ) { int matches = 0; int i; for( i = 0; i < 4; ++i ) - if( data[i] == magic_string[i] ) ++matches; + if( data[i] == lzip_magic[i] ) ++matches; return ( matches > 1 && matches < 4 ); } -static inline uint8_t Fh_version( const File_header data ) +static inline uint8_t Lh_version( const Lzip_header data ) { return data[4]; } -static inline bool Fh_verify_version( const File_header data ) +static inline bool Lh_verify_version( const Lzip_header data ) { return ( data[4] == 1 ); } -static inline unsigned Fh_get_dictionary_size( const File_header data ) +static inline unsigned Lh_get_dictionary_size( const Lzip_header data ) { unsigned sz = ( 1 << ( data[5] & 0x1F ) ); if( sz > min_dictionary_size ) @@ -251,34 +251,48 @@ static inline unsigned Fh_get_dictionary_size( const File_header data ) } -typedef uint8_t File_trailer[20]; +typedef uint8_t Lzip_trailer[20]; /* 0-3 CRC32 of the uncompressed data */ /* 4-11 size of the uncompressed data */ /* 12-19 member size including header and trailer */ +enum { Lt_size = 20 }; -enum { Ft_size = 20 }; - -static inline unsigned Ft_get_data_crc( const File_trailer data ) +static inline unsigned Lt_get_data_crc( const Lzip_trailer data ) { unsigned tmp = 0; int i; for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } -static inline unsigned long long Ft_get_data_size( const File_trailer data ) +static inline unsigned long long Lt_get_data_size( const Lzip_trailer data ) { unsigned long long tmp = 0; int i; for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } -static inline unsigned long long Ft_get_member_size( const File_trailer data ) +static inline unsigned long long Lt_get_member_size( const Lzip_trailer data ) { unsigned long long tmp = 0; int i; for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } +/* check internal consistency */ +static inline bool Lt_verify_consistency( const Lzip_trailer data ) + { + const unsigned crc = Lt_get_data_crc( data ); + const unsigned long long dsize = Lt_get_data_size( data ); + const unsigned long long msize = Lt_get_member_size( data ); + const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size; + const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1; + if( ( crc == 0 ) != ( dsize == 0 ) ) return false; + if( msize < min_member_size ) return false; + if( mlimit > dsize && msize > mlimit ) return false; + if( dlimit > msize && dsize > dlimit ) return false; + return true; + } + static const char * const bad_magic_msg = "Bad magic number (file not in lzip format)."; static const char * const bad_dict_msg = "Invalid dictionary size in member header."; diff --git a/lzip_index.c b/lzip_index.c new file mode 100644 index 0000000..44c7b17 --- /dev/null +++ b/lzip_index.c @@ -0,0 +1,273 @@ +/* Lunzip - Decompressor for the lzip format + Copyright (C) 2010-2019 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> + +#include "lzip.h" +#include "lzip_index.h" + + +static int seek_read( const int fd, uint8_t * const buf, const int size, + const long long pos ) + { + if( lseek( fd, pos, SEEK_SET ) == pos ) + return readblock( fd, buf, size ); + return 0; + } + + +static bool add_error( struct Lzip_index * const li, const char * const msg ) + { + const int len = strlen( msg ); + void * tmp = resize_buffer( li->error, li->error_size + len + 1 ); + if( !tmp ) return false; + li->error = (char *)tmp; + strncpy( li->error + li->error_size, msg, len + 1 ); + li->error_size += len; + return true; + } + + +static bool push_back_member( struct Lzip_index * const li, + const long long dp, const long long ds, + const long long mp, const long long ms, + const unsigned dict_size ) + { + struct Member * p; + void * tmp = resize_buffer( li->member_vector, + ( li->members + 1 ) * sizeof li->member_vector[0] ); + if( !tmp ) + { add_error( li, "Not enough memory." ); li->retval = 1; return false; } + li->member_vector = (struct Member *)tmp; + p = &(li->member_vector[li->members]); + init_member( p, dp, ds, mp, ms, dict_size ); + ++li->members; + return true; + } + + +static void Li_free_member_vector( struct Lzip_index * const li ) + { + if( li->member_vector ) + { free( li->member_vector ); li->member_vector = 0; } + li->members = 0; + } + + +static void Li_reverse_member_vector( struct Lzip_index * const li ) + { + struct Member tmp; + long i; + for( i = 0; i < li->members / 2; ++i ) + { + tmp = li->member_vector[i]; + li->member_vector[i] = li->member_vector[li->members-i-1]; + li->member_vector[li->members-i-1] = tmp; + } + } + + +static void Li_set_errno_error( struct Lzip_index * const li, + const char * const msg ) + { + add_error( li, msg ); add_error( li, strerror( errno ) ); + li->retval = 1; + } + +static void Li_set_num_error( struct Lzip_index * const li, + const char * const msg, unsigned long long num ) + { + char buf[80]; + snprintf( buf, sizeof buf, "%s%llu", msg, num ); + add_error( li, buf ); + li->retval = 2; + } + + +/* If successful, push last member and set pos to member header. */ +static bool Li_skip_trailing_data( struct Lzip_index * const li, + const int fd, long long * const pos, + const bool ignore_trailing, + const bool loose_trailing ) + { + enum { block_size = 16384, + buffer_size = block_size + Lt_size - 1 + Lh_size }; + uint8_t buffer[buffer_size]; + int bsize = *pos % block_size; /* total bytes in buffer */ + int search_size, rd_size; + unsigned long long ipos; + int i; + if( *pos < min_member_size ) return false; + if( bsize <= buffer_size - block_size ) bsize += block_size; + search_size = bsize; /* bytes to search for trailer */ + rd_size = bsize; /* bytes to read from file */ + ipos = *pos - rd_size; /* aligned to block_size */ + + while( true ) + { + const uint8_t max_msb = ( ipos + search_size ) >> 56; + if( seek_read( fd, buffer, rd_size, ipos ) != rd_size ) + { Li_set_errno_error( li, "Error seeking member trailer: " ); + return false; } + for( i = search_size; i >= Lt_size; --i ) + if( buffer[i-1] <= max_msb ) /* most significant byte of member_size */ + { + Lzip_header header; + const Lzip_trailer * const trailer = + (const Lzip_trailer *)( buffer + i - Lt_size ); + const unsigned long long member_size = Lt_get_member_size( *trailer ); + unsigned dictionary_size; + if( member_size == 0 ) /* skip trailing zeros */ + { while( i > Lt_size && buffer[i-9] == 0 ) --i; continue; } + if( member_size > ipos + i || !Lt_verify_consistency( *trailer ) ) + continue; + if( seek_read( fd, header, Lh_size, + ipos + i - member_size ) != Lh_size ) + { Li_set_errno_error( li, "Error reading member header: " ); + return false; } + dictionary_size = Lh_get_dictionary_size( header ); + if( !Lh_verify_magic( header ) || !Lh_verify_version( header ) || + !isvalid_ds( dictionary_size ) ) continue; + if( Lh_verify_prefix( buffer + i, bsize - i ) ) + { + add_error( li, "Last member in input file is truncated or corrupt." ); + li->retval = 2; return false; + } + if( !loose_trailing && bsize - i >= Lh_size && + Lh_verify_corrupt( buffer + i ) ) + { add_error( li, corrupt_mm_msg ); li->retval = 2; return false; } + if( !ignore_trailing ) + { add_error( li, trailing_msg ); li->retval = 2; return false; } + *pos = ipos + i - member_size; + return push_back_member( li, 0, Lt_get_data_size( *trailer ), *pos, + member_size, dictionary_size ); + } + if( ipos <= 0 ) + { Li_set_num_error( li, "Bad trailer at pos ", *pos - Lt_size ); + return false; } + bsize = buffer_size; + search_size = bsize - Lh_size; + rd_size = block_size; + ipos -= rd_size; + memcpy( buffer + rd_size, buffer, buffer_size - rd_size ); + } + } + + +bool Li_init( struct Lzip_index * const li, const int infd, + const bool ignore_trailing, const bool loose_trailing ) + { + Lzip_header header; + long long pos; + long i; + li->member_vector = 0; + li->error = 0; + li->insize = lseek( infd, 0, SEEK_END ); + li->members = 0; + li->error_size = 0; + li->retval = 0; + if( li->insize < 0 ) + { Li_set_errno_error( li, "Input file is not seekable: " ); return false; } + if( li->insize < min_member_size ) + { add_error( li, "Input file is too short." ); li->retval = 2; + return false; } + if( li->insize > INT64_MAX ) + { add_error( li, "Input file is too long (2^63 bytes or more)." ); + li->retval = 2; return false; } + + if( seek_read( infd, header, Lh_size, 0 ) != Lh_size ) + { Li_set_errno_error( li, "Error reading member header: " ); return false; } + if( !Lh_verify_magic( header ) ) + { add_error( li, bad_magic_msg ); li->retval = 2; return false; } + if( !Lh_verify_version( header ) ) + { add_error( li, bad_version( Lh_version( header ) ) ); li->retval = 2; + return false; } + if( !isvalid_ds( Lh_get_dictionary_size( header ) ) ) + { add_error( li, bad_dict_msg ); li->retval = 2; return false; } + + pos = li->insize; /* always points to a header or to EOF */ + while( pos >= min_member_size ) + { + Lzip_trailer trailer; + unsigned long long member_size; + unsigned dictionary_size; + if( seek_read( infd, trailer, Lt_size, pos - Lt_size ) != Lt_size ) + { Li_set_errno_error( li, "Error reading member trailer: " ); break; } + member_size = Lt_get_member_size( trailer ); + if( member_size > (unsigned long long)pos || !Lt_verify_consistency( trailer ) ) + { + if( li->members <= 0 ) + { if( Li_skip_trailing_data( li, infd, &pos, ignore_trailing, + loose_trailing ) ) continue; else return false; } + Li_set_num_error( li, "Bad trailer at pos ", pos - Lt_size ); + break; + } + if( seek_read( infd, header, Lh_size, pos - member_size ) != Lh_size ) + { Li_set_errno_error( li, "Error reading member header: " ); break; } + dictionary_size = Lh_get_dictionary_size( header ); + if( !Lh_verify_magic( header ) || !Lh_verify_version( header ) || + !isvalid_ds( dictionary_size ) ) + { + if( li->members <= 0 ) + { if( Li_skip_trailing_data( li, infd, &pos, ignore_trailing, + loose_trailing ) ) continue; else return false; } + Li_set_num_error( li, "Bad header at pos ", pos - member_size ); + break; + } + pos -= member_size; + if( !push_back_member( li, 0, Lt_get_data_size( trailer ), pos, + member_size, dictionary_size ) ) + return false; + } + if( pos != 0 || li->members <= 0 ) + { + Li_free_member_vector( li ); + if( li->retval == 0 ) + { add_error( li, "Can't create file index." ); li->retval = 2; } + return false; + } + Li_reverse_member_vector( li ); + for( i = 0; ; ++i ) + { + const long long end = block_end( li->member_vector[i].dblock ); + if( end < 0 || end > INT64_MAX ) + { + Li_free_member_vector( li ); + add_error( li, "Data in input file is too long (2^63 bytes or more)." ); + li->retval = 2; return false; + } + if( i + 1 >= li->members ) break; + li->member_vector[i+1].dblock.pos = end; + } + return true; + } + + +void Li_free( struct Lzip_index * const li ) + { + Li_free_member_vector( li ); + if( li->error ) { free( li->error ); li->error = 0; } + li->error_size = 0; + } diff --git a/file_index.h b/lzip_index.h index 49b6519..03be274 100644 --- a/file_index.h +++ b/lzip_index.h @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2018 Antonio Diaz Diaz. + Copyright (C) 2010-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -46,45 +46,45 @@ static inline void init_member( struct Member * const m, { init_block( &m->dblock, dp, ds ); init_block( &m->mblock, mp, ms ); m->dictionary_size = dict_size; } -struct File_index +struct Lzip_index { struct Member * member_vector; char * error; - long long isize; + long long insize; long members; int error_size; int retval; }; -bool Fi_init( struct File_index * const fi, const int infd, +bool Li_init( struct Lzip_index * const li, const int infd, const bool ignore_trailing, const bool loose_trailing ); -void Fi_free( struct File_index * const fi ); +void Li_free( struct Lzip_index * const li ); -static inline long long Fi_udata_size( const struct File_index * const fi ) +static inline long long Li_udata_size( const struct Lzip_index * const li ) { - if( fi->members <= 0 ) return 0; - return block_end( fi->member_vector[fi->members-1].dblock ); + if( li->members <= 0 ) return 0; + return block_end( li->member_vector[li->members-1].dblock ); } -static inline long long Fi_cdata_size( const struct File_index * const fi ) +static inline long long Li_cdata_size( const struct Lzip_index * const li ) { - if( fi->members <= 0 ) return 0; - return block_end( fi->member_vector[fi->members-1].mblock ); + if( li->members <= 0 ) return 0; + return block_end( li->member_vector[li->members-1].mblock ); } /* total size including trailing data (if any) */ -static inline long long Fi_file_size( const struct File_index * const fi ) - { if( fi->isize >= 0 ) return fi->isize; else return 0; } +static inline long long Li_file_size( const struct Lzip_index * const li ) + { if( li->insize >= 0 ) return li->insize; else return 0; } -static inline const struct Block * Fi_dblock( const struct File_index * const fi, +static inline const struct Block * Li_dblock( const struct Lzip_index * const li, const long i ) - { return &fi->member_vector[i].dblock; } + { return &li->member_vector[i].dblock; } -static inline const struct Block * Fi_mblock( const struct File_index * const fi, +static inline const struct Block * Li_mblock( const struct Lzip_index * const li, const long i ) - { return &fi->member_vector[i].mblock; } + { return &li->member_vector[i].mblock; } -static inline unsigned Fi_dictionary_size( const struct File_index * const fi, +static inline unsigned Li_dictionary_size( const struct Lzip_index * const li, const long i ) - { return fi->member_vector[i].dictionary_size; } + { return li->member_vector[i].dictionary_size; } @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for the lzip format - Copyright (C) 2010-2018 Antonio Diaz Diaz. + Copyright (C) 2010-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -36,19 +36,24 @@ #include <unistd.h> #include <utime.h> #include <sys/stat.h> -#if defined(__MSVCRT__) +#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) #include <io.h> +#if defined(__MSVCRT__) #define fchmod(x,y) 0 #define fchown(x,y,z) 0 #define SIGHUP SIGTERM #define S_ISSOCK(x) 0 +#ifndef S_IRGRP #define S_IRGRP 0 #define S_IWGRP 0 #define S_IROTH 0 #define S_IWOTH 0 #endif -#if defined(__OS2__) -#include <io.h> +#endif +#if defined(__DJGPP__) +#define S_ISSOCK(x) 0 +#define S_ISVTX 0 +#endif #endif #include "carg_parser.h" @@ -65,9 +70,8 @@ int verbosity = 0; -const char * const Program_name = "Lunzip"; const char * const program_name = "lunzip"; -const char * const program_year = "2018"; +const char * const program_year = "2019"; const char * invocation_name = 0; const struct { const char * from; const char * to; } known_extensions[] = { @@ -77,6 +81,8 @@ const struct { const char * from; const char * to; } known_extensions[] = { enum Mode { m_compress, m_decompress, m_list, m_test }; +/* Variables used in signal handler context. + They are not declared volatile because the handler never returns. */ char * output_filename = 0; int outfd = -1; bool delete_output_on_interrupt = false; @@ -260,7 +266,7 @@ static int get_dict_size( const char * const arg ) const long bits = strtol( arg, &tail, 0 ); if( bits >= min_dictionary_bits && bits <= max_dictionary_bits && *tail == 0 ) - return ( 1 << bits ); + return 1 << bits; return getnum( arg, min_dictionary_size, max_dictionary_size ); } @@ -368,8 +374,17 @@ static bool open_outstream( const bool force, const bool from_stdin ) } +static void set_signals( void (*action)(int) ) + { + signal( SIGHUP, action ); + signal( SIGINT, action ); + signal( SIGTERM, action ); + } + + void cleanup_and_fail( const int retval ) { + set_signals( SIG_IGN ); /* ignore signals */ if( delete_output_on_interrupt ) { delete_output_on_interrupt = false; @@ -384,6 +399,14 @@ void cleanup_and_fail( const int retval ) } +void signal_handler( int sig ) + { + if( sig ) {} /* keep compiler happy */ + show_error( "Control-C or similar caught, quitting.", 0, false ); + cleanup_and_fail( 1 ); + } + + /* Set permissions, owner and times. */ static void close_and_set_permissions( const struct stat * const in_statsp ) { @@ -455,9 +478,9 @@ static bool show_trailing_data( const uint8_t * const data, const int size, static int decompress( const unsigned long long cfile_size, const int infd, - struct Pretty_print * const pp, const unsigned buffer_size, - const bool ignore_trailing, const bool loose_trailing, - const bool testing ) + struct Pretty_print * const pp, const unsigned buffer_size, + const bool ignore_trailing, const bool loose_trailing, + const bool testing ) { unsigned long long partial_file_pos = 0; struct Range_decoder rdec; @@ -473,16 +496,16 @@ static int decompress( const unsigned long long cfile_size, const int infd, { int result, size; unsigned dictionary_size; - File_header header; + Lzip_header header; struct LZ_decoder decoder; Rd_reset_member_position( &rdec ); - size = Rd_read_data( &rdec, header, Fh_size ); + size = Rd_read_data( &rdec, header, Lh_size ); if( Rd_finished( &rdec ) ) /* End Of File */ { if( first_member ) { show_file_error( pp->name, "File ends unexpectedly at member header.", 0 ); retval = 2; } - else if( Fh_verify_prefix( header, size ) ) + else if( Lh_verify_prefix( header, size ) ) { Pp_show_msg( pp, "Truncated header in multimember file." ); show_trailing_data( header, size, pp, true, -1 ); retval = 2; } @@ -491,11 +514,11 @@ static int decompress( const unsigned long long cfile_size, const int infd, retval = 2; break; } - if( !Fh_verify_magic( header ) ) + if( !Lh_verify_magic( header ) ) { if( first_member ) { show_file_error( pp->name, bad_magic_msg, 0 ); retval = 2; } - else if( !loose_trailing && Fh_verify_corrupt( header ) ) + else if( !loose_trailing && Lh_verify_corrupt( header ) ) { Pp_show_msg( pp, corrupt_mm_msg ); show_trailing_data( header, size, pp, false, -1 ); retval = 2; } @@ -503,10 +526,10 @@ static int decompress( const unsigned long long cfile_size, const int infd, retval = 2; break; } - if( !Fh_verify_version( header ) ) - { Pp_show_msg( pp, bad_version( Fh_version( header ) ) ); + if( !Lh_verify_version( header ) ) + { Pp_show_msg( pp, bad_version( Lh_version( header ) ) ); retval = 2; break; } - dictionary_size = Fh_get_dictionary_size( header ); + dictionary_size = Lh_get_dictionary_size( header ); if( !isvalid_ds( dictionary_size ) ) { Pp_show_msg( pp, bad_dict_msg ); retval = 2; break; } @@ -528,7 +551,8 @@ static int decompress( const unsigned long long cfile_size, const int infd, { Pp_show_msg( pp, 0 ); fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ? - "File ends unexpectedly" : "Decoder error", partial_file_pos ); + "File ends unexpectedly" : "Decoder error", + partial_file_pos ); } retval = 2; break; } @@ -542,31 +566,13 @@ static int decompress( const unsigned long long cfile_size, const int infd, } -void signal_handler( int sig ) - { - if( sig ) {} /* keep compiler happy */ - show_error( "Control-C or similar caught, quitting.", 0, false ); - cleanup_and_fail( 1 ); - } - - -static void set_signals( void ) - { - signal( SIGHUP, signal_handler ); - signal( SIGINT, signal_handler ); - signal( SIGTERM, signal_handler ); - } - - void show_error( const char * const msg, const int errcode, const bool help ) { if( verbosity < 0 ) return; if( msg && msg[0] ) - { - fprintf( stderr, "%s: %s", program_name, msg ); - if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) ); - fputc( '\n', stderr ); - } + fprintf( stderr, "%s: %s%s%s\n", program_name, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? strerror( errcode ) : "" ); if( help ) fprintf( stderr, "Try '%s --help' for more information.\n", invocation_name ); @@ -576,10 +582,10 @@ void show_error( const char * const msg, const int errcode, const bool help ) void show_file_error( const char * const filename, const char * const msg, const int errcode ) { - if( verbosity < 0 ) return; - fprintf( stderr, "%s: %s: %s", program_name, filename, msg ); - if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) ); - fputc( '\n', stderr ); + if( verbosity >= 0 ) + fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? strerror( errcode ) : "" ); } @@ -698,7 +704,7 @@ int main( const int argc, const char * const argv[] ) } } /* end process options */ -#if defined(__MSVCRT__) || defined(__OS2__) +#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); #endif @@ -737,7 +743,7 @@ int main( const int argc, const char * const argv[] ) if( !to_stdout && program_mode != m_test && ( filenames_given || default_output_filename[0] ) ) - set_signals(); + set_signals( signal_handler ); Pp_init( &pp, filenames, num_filenames ); @@ -825,6 +831,12 @@ int main( const int argc, const char * const argv[] ) ( in_statsp->st_size + 99 ) / 100 : 0; tmp = decompress( cfile_size, infd, &pp, buffer_size, ignore_trailing, loose_trailing, program_mode == m_test ); + if( close( infd ) != 0 ) + { + show_error( input_filename[0] ? "Error closing input file" : + "Error closing stdin", errno, false ); + if( tmp < 1 ) tmp = 1; + } if( tmp > retval ) retval = tmp; if( tmp ) { if( program_mode != m_test ) cleanup_and_fail( retval ); @@ -834,7 +846,6 @@ int main( const int argc, const char * const argv[] ) close_and_set_permissions( in_statsp ); if( input_filename[0] ) { - close( infd ); if( !keep_input_files && !to_stdout && program_mode != m_test ) remove( input_filename ); } diff --git a/testsuite/check.sh b/testsuite/check.sh index 728c7c6..cf7bb72 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lunzip - Decompressor for the lzip format -# Copyright (C) 2010-2018 Antonio Diaz Diaz. +# Copyright (C) 2010-2019 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -38,9 +38,10 @@ printf "testing lunzip-%s..." "$2" cat "${in_lz}" > uin.lz || framework_failure for i in bad_size -1 0 4095 513MiB 1G 1T 1P 1E 1Z 1Y 10KB ; do "${LZIP}" -dfkqu $i uin.lz - { [ $? = 1 ] && [ ! -e uin ] ; } || test_failed $LINENO $i + [ $? = 1 ] || test_failed $LINENO $i + [ ! -e uin ] || test_failed $LINENO $i done -rm -f uin.lz +rm -f uin.lz || framework_failure "${LZIP}" -lq in [ $? = 2 ] || test_failed $LINENO "${LZIP}" -tq in @@ -89,26 +90,28 @@ printf "\ntesting decompression..." "${LZIP}" -cd "${in_lz}" > copy || test_failed $LINENO cmp in copy || test_failed $LINENO -rm -f copy +rm -f copy || framework_failure cat "${in_lz}" > copy.lz || framework_failure "${LZIP}" -dk copy.lz || test_failed $LINENO cmp in copy || test_failed $LINENO printf "to be overwritten" > copy || framework_failure "${LZIP}" -d copy.lz 2> /dev/null [ $? = 1 ] || test_failed $LINENO -"${LZIP}" -df copy.lz -{ [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; } || test_failed $LINENO +"${LZIP}" -df copy.lz || test_failed $LINENO +[ ! -e copy.lz ] || test_failed $LINENO +cmp in copy || test_failed $LINENO printf "to be overwritten" > copy || framework_failure "${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO cmp in copy || test_failed $LINENO -rm -f copy +rm -f copy || framework_failure cat "${in_lz}" > anyothername || framework_failure -"${LZIP}" -dv --output copy - anyothername - < "${in_lz}" 2> /dev/null -{ [ $? = 0 ] && cmp in copy && cmp in anyothername.out ; } || +"${LZIP}" -dv --output copy - anyothername - < "${in_lz}" 2> /dev/null || test_failed $LINENO -rm -f copy anyothername.out +cmp in copy || test_failed $LINENO +cmp in anyothername.out || test_failed $LINENO +rm -f copy anyothername.out || framework_failure "${LZIP}" -lq in "${in_lz}" [ $? = 2 ] || test_failed $LINENO @@ -119,10 +122,12 @@ rm -f copy anyothername.out "${LZIP}" -tq nx_file.lz "${in_lz}" [ $? = 1 ] || test_failed $LINENO "${LZIP}" -cdq in "${in_lz}" > copy -{ [ $? = 2 ] && cat copy in | cmp in - ; } || test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO +cat copy in | cmp in - || test_failed $LINENO "${LZIP}" -cdq nx_file.lz "${in_lz}" > copy -{ [ $? = 1 ] && cmp in copy ; } || test_failed $LINENO -rm -f copy +[ $? = 1 ] || test_failed $LINENO +cmp in copy || test_failed $LINENO +rm -f copy || framework_failure cat "${in_lz}" > copy.lz || framework_failure for i in 1 2 3 4 5 6 7 ; do printf "g" >> copy.lz || framework_failure @@ -132,11 +137,15 @@ for i in 1 2 3 4 5 6 7 ; do [ $? = 2 ] || test_failed $LINENO $i done "${LZIP}" -dq in copy.lz -{ [ $? = 2 ] && [ -e copy.lz ] && [ ! -e copy ] && [ ! -e in.out ] ; } || - test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO +[ -e copy.lz ] || test_failed $LINENO +[ ! -e copy ] || test_failed $LINENO +[ ! -e in.out ] || test_failed $LINENO "${LZIP}" -dq nx_file.lz copy.lz -{ [ $? = 1 ] && [ ! -e copy.lz ] && [ ! -e nx_file ] && cmp in copy ; } || - test_failed $LINENO +[ $? = 1 ] || test_failed $LINENO +[ ! -e copy.lz ] || test_failed $LINENO +[ ! -e nx_file ] || test_failed $LINENO +cmp in copy || test_failed $LINENO cat in in > in2 || framework_failure cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure @@ -148,7 +157,7 @@ cmp in2 copy2 || test_failed $LINENO cat in2.lz > copy2.lz || framework_failure printf "\ngarbage" >> copy2.lz || framework_failure "${LZIP}" -tvvvv copy2.lz 2> /dev/null || test_failed $LINENO -rm -f copy2 +rm -f copy2 || framework_failure "${LZIP}" -alq copy2.lz [ $? = 2 ] || test_failed $LINENO "${LZIP}" -atq copy2.lz @@ -156,12 +165,15 @@ rm -f copy2 "${LZIP}" -atq < copy2.lz [ $? = 2 ] || test_failed $LINENO "${LZIP}" -adkq copy2.lz -{ [ $? = 2 ] && [ ! -e copy2 ] ; } || test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO +[ ! -e copy2 ] || test_failed $LINENO "${LZIP}" -adkq -o copy2 < copy2.lz -{ [ $? = 2 ] && [ ! -e copy2 ] ; } || test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO +[ ! -e copy2 ] || test_failed $LINENO printf "to be overwritten" > copy2 || framework_failure "${LZIP}" -df copy2.lz || test_failed $LINENO cmp in2 copy2 || test_failed $LINENO +rm -f in2 copy2 || framework_failure for i in 12 5120 6Ki 29 512KiB ; do printf "to be overwritten" > copy || framework_failure @@ -173,38 +185,58 @@ printf "\ntesting bad input..." headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP' body='\001\014\000\203\377\373\377\377\300\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000$\000\000\000\000\000\000\000' -cat "${in_lz}" > in0.lz -printf "LZIP${body}" >> in0.lz -if "${LZIP}" -tq in0.lz ; then +cat "${in_lz}" > int.lz +printf "LZIP${body}" >> int.lz +if "${LZIP}" -tq int.lz ; then for header in ${headers} ; do - printf "${header}${body}" > in0.lz # first member - "${LZIP}" -lq in0.lz + printf "${header}${body}" > int.lz # first member + "${LZIP}" -lq int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq < int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -cdq int.lz > /dev/null + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -lq --loose-trailing int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing < int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -cdq --loose-trailing int.lz > /dev/null + [ $? = 2 ] || test_failed $LINENO ${header} + cat "${in_lz}" > int.lz + printf "${header}${body}" >> int.lz # trailing data + "${LZIP}" -lq int.lz [ $? = 2 ] || test_failed $LINENO ${header} - "${LZIP}" -tq in0.lz + "${LZIP}" -tq int.lz [ $? = 2 ] || test_failed $LINENO ${header} - "${LZIP}" -lq --loose-trailing in0.lz + "${LZIP}" -tq < int.lz [ $? = 2 ] || test_failed $LINENO ${header} - "${LZIP}" -tq --loose-trailing in0.lz + "${LZIP}" -cdq int.lz > /dev/null [ $? = 2 ] || test_failed $LINENO ${header} - cat "${in_lz}" > in0.lz - printf "${header}${body}" >> in0.lz # trailing data - "${LZIP}" -lq in0.lz + "${LZIP}" -lq --loose-trailing int.lz || + test_failed $LINENO ${header} + "${LZIP}" -t --loose-trailing int.lz || + test_failed $LINENO ${header} + "${LZIP}" -t --loose-trailing < int.lz || + test_failed $LINENO ${header} + "${LZIP}" -cd --loose-trailing int.lz > /dev/null || + test_failed $LINENO ${header} + "${LZIP}" -lq --loose-trailing --trailing-error int.lz [ $? = 2 ] || test_failed $LINENO ${header} - "${LZIP}" -tq in0.lz + "${LZIP}" -tq --loose-trailing --trailing-error int.lz [ $? = 2 ] || test_failed $LINENO ${header} - "${LZIP}" -lq --loose-trailing in0.lz - [ $? = 0 ] || test_failed $LINENO ${header} - "${LZIP}" -t --loose-trailing in0.lz - [ $? = 0 ] || test_failed $LINENO ${header} - "${LZIP}" -lq --loose-trailing --trailing-error in0.lz + "${LZIP}" -tq --loose-trailing --trailing-error < int.lz [ $? = 2 ] || test_failed $LINENO ${header} - "${LZIP}" -tq --loose-trailing --trailing-error in0.lz + "${LZIP}" -cdq --loose-trailing --trailing-error int.lz > /dev/null [ $? = 2 ] || test_failed $LINENO ${header} done else printf "\nwarning: skipping header test: 'printf' does not work on your system." fi -rm -f in0.lz +rm -f int.lz || framework_failure cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && @@ -225,7 +257,7 @@ if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && else printf "\nwarning: skipping truncation test: 'dd' does not work on your system." fi -rm -f in3.lz trunc.lz +rm -f in2.lz in3.lz trunc.lz out || framework_failure cat "${in_lz}" > ingin.lz || framework_failure printf "g" >> ingin.lz || framework_failure @@ -238,7 +270,7 @@ cmp in copy || test_failed $LINENO "${LZIP}" -t < ingin.lz || test_failed $LINENO "${LZIP}" -d < ingin.lz > copy || test_failed $LINENO cmp in copy || test_failed $LINENO -rm -f ingin.lz +rm -f copy ingin.lz || framework_failure echo if [ ${fail} = 0 ] ; then |