From d1eec8184551651d58eefdea942648f2c8432240 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 28 Jun 2020 11:38:47 +0200 Subject: Merging upstream version 0.6. Signed-off-by: Daniel Baumann --- ChangeLog | 8 +++ INSTALL | 6 +- Makefile.in | 4 +- NEWS | 41 ++++++------ README | 100 +++++++++++++++++++++++------- carg_parser.c | 26 ++++---- carg_parser.h | 66 ++++++++++---------- configure | 11 ++-- doc/xlunzip.1 | 33 +++++----- in_place.c | 72 ++++++++++++---------- lzip.h | 26 ++++---- main.c | 178 ++++++++++++++++++++++++----------------------------- testsuite/check.sh | 92 +++++++++++++++++++++------ 13 files changed, 383 insertions(+), 280 deletions(-) diff --git a/ChangeLog b/ChangeLog index 127eba9..218c806 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2020-06-24 Antonio Diaz Diaz + + * Version 0.6 released. + * Make '-o' behave like '-c', but writing to file instead of stdout. + * in_place.c: Multimember decompression should now work for any file + that fits in memory and is smaller than (LONG_MAX - extra_bytes). + * README: Add an analysis of the in-place decompression. + 2020-04-27 Antonio Diaz Diaz * Version 0.5 released. diff --git a/INSTALL b/INSTALL index c4b56f1..d6f1bc5 100644 --- a/INSTALL +++ b/INSTALL @@ -1,6 +1,6 @@ Requirements ------------ -You will need a C compiler. +You will need a C99 compiler. (gcc 2.95 or newer is recommended). I use gcc 6.1.0 and 4.1.2, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -41,8 +41,8 @@ the main archive. documentation. Or type 'make install-compress', which additionally compresses the - man page after installation. (Installing compressed docs may become - the default in the future). + man page after installation. + (Installing compressed docs may become the default in the future). You can install only the program or the man page by typing 'make install-bin' or 'make install-man' respectively. diff --git a/Makefile.in b/Makefile.in index 7bf71b7..1270170 100644 --- a/Makefile.in +++ b/Makefile.in @@ -71,7 +71,7 @@ install-info : -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info" -if $(CAN_RUN_INSTALLINFO) ; then \ - install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ + install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ fi install-info-compress : install-info @@ -96,7 +96,7 @@ uninstall-bin : uninstall-info : -if $(CAN_RUN_INSTALLINFO) ; then \ - install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ + install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ fi -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* diff --git a/NEWS b/NEWS index abe699b..e78dfd5 100644 --- a/NEWS +++ b/NEWS @@ -1,22 +1,19 @@ -Changes in version 0.5: - -The lzip_decompress module has been updated to version 5.4.18. - -Xlunzip now reports an error if a file name is empty (xlunzip -t ""). - -The words 'decompressed' and 'compressed' have been replaced with the -shorter 'out' and 'in' in the verbose output when decompressing or testing. - -Decompression speed has been slightly increased. - -A compilation error with GCC 10 has been fixed. (Reported by Daniel Baumann). - -The commands needed to extract files from a tar.lz archive have been -documented in the output of '--help' and in the man page. - -Xlunzip now compiles on DOS with DJGPP. (Patch from Robert Riebisch). - -The configure script now accepts appending options to CFLAGS using the -syntax 'CFLAGS+=OPTIONS'. - -9 new test files have been added to the testsuite. +Changes in version 0.6: + +Option '-o, --output' now behaves like '-c, --stdout', but sending the +output unconditionally to a file instead of to standard output. See the new +description of '-o' in the manual. This change is not backwards compatible. +Therefore commands like: + xlunzip -d -o foo - bar.lz < foo.lz +must now be split into: + xlunzip -d -o foo - < foo.lz + xlunzip -d bar.lz +or rewritten as: + xlunzip -d - bar.lz < foo.lz > foo + +In-place decompression of concatenated files should now work for any +combination of files as long as they fit in memory and their total +decompressed size is smaller than LONG_MAX minus a small extra space. + +The README file now includes an analysis of the amount of memory required +for in-place decompression. diff --git a/README b/README index c5ebbf3..3e26a40 100644 --- a/README +++ b/README @@ -1,25 +1,23 @@ Description -Xlunzip is a test tool for the lzip decompression code of my lzip patch -for linux. Xlunzip is similar to lunzip, but it uses the lzip_decompress -linux module as a backend. Xlunzip tests the module for stream, -buffer-to-buffer and mixed decompression modes, including in-place -decompression (using the same buffer for input and output). You can use -xlunzip to verify that the module produces correct results when -decompressing single member files, multimember files, or the -concatenation of two or more compressed files. Xlunzip can be used with -unzcrash to test the robustness of the module to the decompression of -corrupted data. - -Note that the in-place decompression of concatenated files can't be -guaranteed to work because an arbitrarily low compression ratio of the -last part of the data can be achieved by appending enough empty -compressed members to a file, masking a high compression ratio at the -beginning of the data. - -The xlunzip tarball contains a copy of the lzip_decompress module and -can be compiled and tested without downloading or applying the patch to -the kernel. +Xlunzip is a test tool for the lzip decompression code of my lzip patch for +linux. Xlunzip is similar to lunzip, but it uses the lzip_decompress linux +module as a backend. Xlunzip tests the module for stream, buffer-to-buffer, +and mixed decompression modes, including in-place decompression (using the +same buffer for input and output). You can use xlunzip to verify that the +module produces correct results when decompressing single member files, +multimember files, or the concatenation of two or more compressed files. +Xlunzip can be used with unzcrash to test the robustness of the module to +the decompression of corrupted data. + +The distributed index feature of the lzip format allows xlunzip to +decompress concatenated files in place. This can't be guaranteed to work +with formats like gzip or bzip2 because they can't detect whether a high +compression ratio in the first members of the multimember data is being +masked by a low compression ratio in the last members. + +The xlunzip tarball contains a copy of the lzip_decompress module and can be +compiled and tested without downloading or applying the patch to the kernel. My lzip patch for linux can be found at http://download.savannah.gnu.org/releases/lzip/kernel/ @@ -29,14 +27,72 @@ Lzip related components in the kernel The lzip_decompress module in lib/lzip_decompress.c provides a versatile lzip decompression function able to do buffer to buffer decompression or -stream decompression with fill and flush callback functions. The usage -of the function is documented in include/linux/lzip.h. +stream decompression with fill and flush callback functions. The usage of +the function is documented in include/linux/lzip.h. For decompressing the kernel image, initramfs, and initrd, there is a wrapper function in lib/decompress_lunzip.c providing the same common interface as the other decompress_*.c files, which is defined in include/linux/decompress/generic.h. +Analysis of the in-place decompression +====================================== + +In order to decompress the kernel in place (using the same buffer for input +and output), the compressed data is placed at the end of the buffer used to +hold the decompressed data. The buffer must be large enough to contain after +the decompressed data extra space for a marker, a trailer, the maximum +possible data expansion, and (if the compressed data consists of more than +one member) N-1 empty members. + + |------ compressed data ------| + V V +|----------------|-------------------|---------| +^ ^ extra +|-------- decompressed data ---------| + +The input pointer initially points to the beginning of the compressed data +and the output pointer initially points to the beginning of the buffer. +Decompressing compressible data reduces the distance between the pointers, +while decompressing uncompressible data increases the distance. The extra +space must be large enough that the output pointer does not overrun the +input pointer even if all the overlap between compressed and decompressed +data is uncompressible. The worst case is very compressible data followed by +uncompressible data because in this case the output pointer increases faster +when the input pointer is smaller. + + | * <-- input pointer + | * , <-- output pointer + | * , ' + | x ' <-- overrun (x) +memory | * ,' +address | * ,' + |* ,' + | ,' + | ,' + |,' + `-------------------------- + time + +All we need to know to calculate the minimum required extra space is: + The maximum expansion ratio. + The size of the last part of a member required to verify integrity. + For multimember data, the overhead per member. (36 bytes for lzip). + +The maximum expansion ratio of LZMA data is of about 1.4%. Rounding this up +to 1/64 (1.5625%) and adding 36 bytes per input member, the extra space +required to decompress lzip data in place is: + extra_bytes = ( compressed_size >> 6 ) + members * 36 + +Using the compressed size to calculate the extra_bytes (as in the equation +above) may slightly overestimate the amount of space required in the worst +case. But calculating the extra_bytes from the uncompressed size (as does +linux) is wrong (and inefficient for high compression ratios). The formula +used in arch/x86/boot/header.S + extra_bytes = (uncompressed_size >> 8) + 65536 +fails with 1 MB of zeros followed by 8 MB of random data, and wastes memory +for compression ratios > 4:1. + Copyright (C) 2016-2020 Antonio Diaz Diaz. diff --git a/carg_parser.c b/carg_parser.c index 27d1a71..0f60878 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,20 +1,20 @@ -/* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2020 Antonio Diaz Diaz. +/* Arg_parser - POSIX/GNU command line argument parser. (C version) + Copyright (C) 2006-2020 Antonio Diaz Diaz. - This library is free software. Redistribution and use in source and - binary forms, with or without modification, are permitted provided - that the following conditions are met: + This library is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ #include diff --git a/carg_parser.h b/carg_parser.h index 60472ef..a442b7b 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,44 +1,44 @@ -/* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2020 Antonio Diaz Diaz. +/* Arg_parser - POSIX/GNU command line argument parser. (C version) + Copyright (C) 2006-2020 Antonio Diaz Diaz. - This library is free software. Redistribution and use in source and - binary forms, with or without modification, are permitted provided - that the following conditions are met: + This library is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ -/* Arg_parser reads the arguments in 'argv' and creates a number of - option codes, option arguments, and non-option arguments. +/* Arg_parser reads the arguments in 'argv' and creates a number of + option codes, option arguments, and non-option arguments. - In case of error, 'ap_error' returns a non-null pointer to an error - message. + In case of error, 'ap_error' returns a non-null pointer to an error + message. - 'options' is an array of 'struct ap_Option' terminated by an element - containing a code which is zero. A null name means a short-only - option. A code value outside the unsigned char range means a - long-only option. + 'options' is an array of 'struct ap_Option' terminated by an element + containing a code which is zero. A null name means a short-only + option. A code value outside the unsigned char range means a + long-only option. - Arg_parser normally makes it appear as if all the option arguments - were specified before all the non-option arguments for the purposes - of parsing, even if the user of your program intermixed option and - non-option arguments. If you want the arguments in the exact order - the user typed them, call 'ap_init' with 'in_order' = true. + Arg_parser normally makes it appear as if all the option arguments + were specified before all the non-option arguments for the purposes + of parsing, even if the user of your program intermixed option and + non-option arguments. If you want the arguments in the exact order + the user typed them, call 'ap_init' with 'in_order' = true. - The argument '--' terminates all options; any following arguments are - treated as non-option arguments, even if they begin with a hyphen. + The argument '--' terminates all options; any following arguments are + treated as non-option arguments, even if they begin with a hyphen. - The syntax for optional option arguments is '-' - (without whitespace), or '--='. + The syntax for optional option arguments is '-' + (without whitespace), or '--='. */ #ifdef __cplusplus @@ -79,11 +79,11 @@ void ap_free( struct Arg_parser * const ap ); const char * ap_error( const struct Arg_parser * const ap ); - /* The number of arguments parsed (may be different from argc) */ +/* The number of arguments parsed. May be different from argc. */ int ap_arguments( const struct Arg_parser * const ap ); - /* If ap_code( i ) is 0, ap_argument( i ) is a non-option. - Else ap_argument( i ) is the option's argument (or empty). */ +/* If ap_code( i ) is 0, ap_argument( i ) is a non-option. + Else ap_argument( i ) is the option's argument (or empty). */ int ap_code( const struct Arg_parser * const ap, const int i ); const char * ap_argument( const struct Arg_parser * const ap, const int i ); diff --git a/configure b/configure index 94a1ec9..da06771 100755 --- a/configure +++ b/configure @@ -6,7 +6,7 @@ # to copy, distribute, and modify it. pkgname=xlunzip -pkgversion=0.5 +pkgversion=0.6 progname=xlunzip srctrigger=doc/${progname}.1 @@ -26,11 +26,7 @@ CFLAGS='-Wall -W -O2' LDFLAGS= # checking whether we are using GNU C. -/bin/sh -c "${CC} --version" > /dev/null 2>&1 || - { - CC=cc - CFLAGS=-O2 - } +/bin/sh -c "${CC} --version" > /dev/null 2>&1 || { CC=cc ; CFLAGS=-O2 ; } # Loop over all args args= @@ -42,7 +38,8 @@ while [ $# != 0 ] ; do shift # Add the argument quoted to args - args="${args} \"${option}\"" + if [ -z "${args}" ] ; then args="\"${option}\"" + else args="${args} \"${option}\"" ; fi # Split out the argument for options that take them case ${option} in diff --git a/doc/xlunzip.1 b/doc/xlunzip.1 index ea83510..2f7ccb1 100644 --- a/doc/xlunzip.1 +++ b/doc/xlunzip.1 @@ -1,27 +1,26 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH XLUNZIP "1" "April 2020" "xlunzip 0.5" "User Commands" +.TH XLUNZIP "1" "June 2020" "xlunzip 0.6" "User Commands" .SH NAME xlunzip \- test tool for the lzip_decompress linux module .SH SYNOPSIS .B xlunzip [\fI\,options\/\fR] [\fI\,files\/\fR] .SH DESCRIPTION -Xlunzip is a test tool for the lzip decompression code of my lzip patch -for linux. Xlunzip is similar to lunzip, but it uses the lzip_decompress -linux module as a backend. Xlunzip tests the module for stream, -buffer\-to\-buffer and mixed decompression modes, including in\-place -decompression (using the same buffer for input and output). You can use -xlunzip to verify that the module produces correct results when -decompressing single member files, multimember files, or the -concatenation of two or more compressed files. Xlunzip can be used with -unzcrash to test the robustness of the module to the decompression of -corrupted data. +Xlunzip is a test tool for the lzip decompression code of my lzip patch for +linux. Xlunzip is similar to lunzip, but it uses the lzip_decompress linux +module as a backend. Xlunzip tests the module for stream, buffer\-to\-buffer, +and mixed decompression modes, including in\-place decompression (using the +same buffer for input and output). You can use xlunzip to verify that the +module produces correct results when decompressing single member files, +multimember files, or the concatenation of two or more compressed files. +Xlunzip can be used with unzcrash to test the robustness of the module to +the decompression of corrupted data. .PP -Note that the in\-place decompression of concatenated files can't be -guaranteed to work because an arbitrarily low compression ratio of the -last part of the data can be achieved by appending enough empty -compressed members to a file, masking a high compression ratio at the -beginning of the data. +The distributed index feature of the lzip format allows xlunzip to +decompress concatenated files in place. This can't be guaranteed to work +with formats like gzip or bzip2 because they can't detect whether a high +compression ratio in the first members of the multimember data is being +masked by a low compression ratio in the last members. .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR @@ -46,7 +45,7 @@ decompress or test using only one buffer keep (don't delete) input files .TP \fB\-o\fR, \fB\-\-output=\fR -if reading standard input, write to +write to , keep input files .TP \fB\-q\fR, \fB\-\-quiet\fR suppress all messages diff --git a/in_place.c b/in_place.c index 1ada24e..7f00933 100644 --- a/in_place.c +++ b/in_place.c @@ -1,18 +1,18 @@ -/* Xlunzip - Test tool for the lzip_decompress linux module - Copyright (C) 2016-2020 Antonio Diaz Diaz. +/* Xlunzip - Test tool for the lzip_decompress linux module + Copyright (C) 2016-2020 Antonio Diaz Diaz. - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ #define _FILE_OFFSET_BITS 64 @@ -58,8 +58,7 @@ static uint8_t * read_file( const int infd, long * const buffer_sizep, { long buffer_size = 1 << 20; uint8_t * buffer = (uint8_t *)malloc( buffer_size ); - if( !buffer ) - { show_file_error( filename, "Not enough memory.", 0 ); return 0; } + if( !buffer ) { show_file_error( filename, mem_msg, 0 ); return 0; } long file_size = readblock( infd, buffer, buffer_size ); while( file_size >= buffer_size && !errno ) @@ -70,8 +69,7 @@ static uint8_t * read_file( const int infd, long * const buffer_sizep, buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX; uint8_t * const tmp = (uint8_t *)realloc( buffer, buffer_size ); if( !tmp ) - { show_file_error( filename, "Not enough memory.", 0 ); free( buffer ); - return 0; } + { show_file_error( filename, mem_msg, 0 ); free( buffer ); return 0; } buffer = tmp; file_size += readblock( infd, buffer + file_size, buffer_size - file_size ); } @@ -88,6 +86,7 @@ struct File_sizes { unsigned long long csize; unsigned long long dsize; + long members; long trailing; }; @@ -102,7 +101,8 @@ static const char * set_file_sizes( struct File_sizes * const file_sizes, if( !Lh_verify_version( *header ) ) return "Version of lzip member format not supported."; - file_sizes->csize = file_sizes->dsize = file_sizes->trailing = 0; + file_sizes->csize = file_sizes->dsize = 0; + file_sizes->members = file_sizes->trailing = 0; unsigned long pos = file_size; /* always points to a header or to EOF */ while( pos >= min_member_size ) { @@ -137,6 +137,7 @@ static const char * set_file_sizes( struct File_sizes * const file_sizes, pos -= member_size; file_sizes->csize += member_size; file_sizes->dsize += Lt_get_data_size( *trailer ); + ++file_sizes->members; } if( pos != 0 || file_sizes->csize == 0 ) return "Can't get file sizes."; if( file_sizes->csize + file_sizes->trailing != (unsigned long)file_size ) @@ -154,13 +155,15 @@ static void error(char *x) { show_file_error( global_name, x, 0 ); } /* * Load the compressed file at the end of the buffer used to hold the * decompressed data. Verify that the in-place decompression does not - * overwrite the compressed data. + * overwrite the compressed data. The buffer must be large enough to contain + * after the decompressed data extra space for a marker, a trailer, the + * maximum possible data expansion, and (if multimember) N-1 empty members. * - * |----- compressed data ------| - * V V - * |---------------|-------------------|--------| - * ^ ^ - * |------- decompressed data ---------| + * |------ compressed data ------| + * V V + * |----------------|-------------------|---------| + * ^ ^ extra + * |-------- decompressed data ---------| */ int decompress_in_place( const int infd, struct Pretty_print * const pp, @@ -175,19 +178,22 @@ int decompress_in_place( const int infd, struct Pretty_print * const pp, const long csize = file_sizes.csize; const long dsize = file_sizes.dsize; - /* const long trailing = file_sizes.trailing; */ - /* ( (csize-36+63) >> 6 ) + 36 never failed with single member */ - const long rextra = ( csize >> 5 ) + 72; - if( buffer_size < dsize + rextra ) /* avoid realloc if big enough */ +/* const long extra_bytes = ( dsize >> 8 ) + 65536; wrong linux formula */ + const long extra_bytes = ( csize >> 6 ) + file_sizes.members * 36; + const long long target_buffer_size = max( dsize, csize ) + extra_bytes; + if( target_buffer_size > LONG_MAX ) + { show_file_error( pp->name, "Buffer is larger than LONG_MAX.", 0 ); + return 1; } + if( buffer_size < target_buffer_size ) /* avoid realloc if big enough */ { - buffer_size = dsize + rextra; - buffer = (uint8_t *)realloc( buffer, buffer_size ); - if( !buffer ) - { show_file_error( pp->name, "Not enough memory.", 0 ); return 1; } + buffer = (uint8_t *)realloc( buffer, target_buffer_size ); + if( !buffer ) { show_file_error( pp->name, mem_msg, 0 ); return 1; } } - else buffer_size = max( dsize + rextra, csize ); - const long cbegin = buffer_size - csize; + buffer_size = target_buffer_size; + const long cbegin = buffer_size - csize; /* overwrite trailing data */ if( cbegin > 0 ) memmove( buffer + cbegin, buffer, csize ); +/*fprintf( stderr, "buffer_size = %ld, cbegin = %ld, extra_bytes = %ld\n", + buffer_size, cbegin, extra_bytes );*/ long in_pos, out_pos; int retval; diff --git a/lzip.h b/lzip.h index 0840e35..f06c084 100644 --- a/lzip.h +++ b/lzip.h @@ -1,18 +1,18 @@ -/* Xlunzip - Test tool for the lzip_decompress linux module - Copyright (C) 2016-2020 Antonio Diaz Diaz. +/* Xlunzip - Test tool for the lzip_decompress linux module + Copyright (C) 2016-2020 Antonio Diaz Diaz. - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ #ifndef max @@ -75,6 +75,8 @@ static inline unsigned long long Lt_get_member_size( const Lzip_trailer data ) } +static const char * const mem_msg = "Not enough memory."; + /* defined in in_place.c */ int decompress_in_place( const int infd, struct Pretty_print * const pp, const bool testing ); diff --git a/main.c b/main.c index f267c15..0dc02c7 100644 --- a/main.c +++ b/main.c @@ -1,24 +1,24 @@ -/* Xlunzip - Test tool for the lzip_decompress linux module - Copyright (C) 2016-2020 Antonio Diaz Diaz. +/* Xlunzip - Test tool for the lzip_decompress linux module + Copyright (C) 2016-2020 Antonio Diaz Diaz. - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see . + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ /* - Exit status: 0 for a normal exit, 1 for environmental problems - (file not found, invalid flags, I/O errors, etc), 2 to indicate a - corrupt or invalid input file, 3 for an internal consistency error - (eg, bug) which caused xlunzip to panic. + Exit status: 0 for a normal exit, 1 for environmental problems + (file not found, invalid flags, I/O errors, etc), 2 to indicate a + corrupt or invalid input file, 3 for an internal consistency error + (eg, bug) which caused xlunzip to panic. */ #define _FILE_OFFSET_BITS 64 @@ -94,21 +94,20 @@ static bool delete_output_on_interrupt = false; static void show_help( void ) { - printf( "Xlunzip is a test tool for the lzip decompression code of my lzip patch\n" - "for linux. Xlunzip is similar to lunzip, but it uses the lzip_decompress\n" - "linux module as a backend. Xlunzip tests the module for stream,\n" - "buffer-to-buffer and mixed decompression modes, including in-place\n" - "decompression (using the same buffer for input and output). You can use\n" - "xlunzip to verify that the module produces correct results when\n" - "decompressing single member files, multimember files, or the\n" - "concatenation of two or more compressed files. Xlunzip can be used with\n" - "unzcrash to test the robustness of the module to the decompression of\n" - "corrupted data.\n" - "\nNote that the in-place decompression of concatenated files can't be\n" - "guaranteed to work because an arbitrarily low compression ratio of the\n" - "last part of the data can be achieved by appending enough empty\n" - "compressed members to a file, masking a high compression ratio at the\n" - "beginning of the data.\n" + printf( "Xlunzip is a test tool for the lzip decompression code of my lzip patch for\n" + "linux. Xlunzip is similar to lunzip, but it uses the lzip_decompress linux\n" + "module as a backend. Xlunzip tests the module for stream, buffer-to-buffer,\n" + "and mixed decompression modes, including in-place decompression (using the\n" + "same buffer for input and output). You can use xlunzip to verify that the\n" + "module produces correct results when decompressing single member files,\n" + "multimember files, or the concatenation of two or more compressed files.\n" + "Xlunzip can be used with unzcrash to test the robustness of the module to\n" + "the decompression of corrupted data.\n" + "\nThe distributed index feature of the lzip format allows xlunzip to\n" + "decompress concatenated files in place. This can't be guaranteed to work\n" + "with formats like gzip or bzip2 because they can't detect whether a high\n" + "compression ratio in the first members of the multimember data is being\n" + "masked by a low compression ratio in the last members.\n" "\nUsage: %s [options] [files]\n", invocation_name ); printf( "\nOptions:\n" " -h, --help display this help and exit\n" @@ -118,7 +117,7 @@ static void show_help( void ) " -f, --force overwrite existing output files\n" " -I, --in-place decompress or test using only one buffer\n" " -k, --keep keep (don't delete) input files\n" - " -o, --output= if reading standard input, write to \n" + " -o, --output= write to , keep input files\n" " -q, --quiet suppress all messages\n" " -t, --test test compressed file integrity\n" " -v, --verbose be verbose (a 2nd -v gives more)\n" @@ -157,11 +156,7 @@ static void * resize_buffer( void * buf, const unsigned min_size ) { if( buf ) buf = realloc( buf, min_size ); else buf = malloc( min_size ); - if( !buf ) - { - show_error( "Not enough memory.", 0, false ); - cleanup_and_fail( 1 ); - } + if( !buf ) { show_error( mem_msg, 0, false ); cleanup_and_fail( 1 ); } return buf; } @@ -316,7 +311,7 @@ static void set_d_outname( const char * const name, const int eindex ) static int open_instream( const char * const name, struct stat * const in_statsp, - const bool no_ofile ) + const bool one_to_one ) { int infd = open( name, O_RDONLY | O_BINARY ); if( infd < 0 ) @@ -328,13 +323,12 @@ static int open_instream( const char * const name, struct stat * const in_statsp const bool can_read = ( i == 0 && ( S_ISBLK( mode ) || S_ISCHR( mode ) || S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); - if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || !no_ofile ) ) ) + if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) ) { if( verbosity >= 0 ) fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", - program_name, name, - ( can_read && !no_ofile ) ? - ",\n and '--stdout' was not specified" : "" ); + program_name, name, ( can_read && one_to_one ) ? + ",\n and neither '-c' nor '-o' were specified" : "" ); close( infd ); infd = -1; } @@ -343,11 +337,11 @@ static int open_instream( const char * const name, struct stat * const in_statsp } -static bool open_outstream( const bool force, const bool from_stdin ) +static bool open_outstream( const bool force, const bool protect ) { const mode_t usr_rw = S_IRUSR | S_IWUSR; const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; - const mode_t outfd_mode = from_stdin ? all_rw : usr_rw; + const mode_t outfd_mode = protect ? usr_rw : all_rw; int flags = O_APPEND | O_CREAT | O_RDWR | O_BINARY; if( force ) flags |= O_TRUNC; else flags |= O_EXCL; @@ -399,7 +393,7 @@ static void signal_handler( int sig ) } - /* Set permissions, owner, and times. */ +/* Set permissions, owner, and times. */ static void close_and_set_permissions( const struct stat * const in_statsp ) { bool warning = false; @@ -499,7 +493,7 @@ static int decompress( struct Pretty_print * const pp, const long cl_insize, long in_pos, out_pos; int retval; if( ( in_len > 0 && !inbuf ) || ( out_size > 0 && !outbuf ) ) - { show_error( "Not enough memory.", 0, false ); return 1; } + { show_error( mem_msg, 0, false ); return 1; } if( inbuf ) { @@ -544,6 +538,9 @@ void show_results( struct Pretty_print * const pp, const long in_pos, } +static inline void set_retval( int * retval, const int new_val ) + { if( *retval < new_val ) *retval = new_val; } + static void show_error( const char * const msg, const int errcode, const bool help ) { @@ -623,7 +620,7 @@ int main( const int argc, const char * const argv[] ) if( argc > 0 ) invocation_name = argv[0]; if( !ap_init( &parser, argc, argv, options, 0 ) ) - { show_error( "Not enough memory.", 0, false ); return 1; } + { show_error( mem_msg, 0, false ); return 1; } if( ap_error( &parser ) ) /* bad option */ { show_error( ap_error( &parser ), 0, true ); return 1; } @@ -641,7 +638,8 @@ int main( const int argc, const char * const argv[] ) case 'I': in_place = true; break; case 'k': keep_input_files = true; break; case 'n': break; - case 'o': default_output_filename = arg; break; + case 'o': if( strcmp( arg, "-" ) == 0 ) to_stdout = true; + else { default_output_filename = arg; } break; case 'q': verbosity = -1; break; case 't': testing = true; break; case 'v': if( verbosity < 4 ) ++verbosity; break; @@ -672,64 +670,44 @@ int main( const int argc, const char * const argv[] ) if( strcmp( filenames[i], "-" ) != 0 ) filenames_given = true; } - if( testing ) - outfd = -1; + if( testing ) to_stdout = false; /* apply overrides */ + if( testing || to_stdout ) default_output_filename = ""; - if( !to_stdout && !testing && - ( filenames_given || default_output_filename[0] ) ) + output_filename = resize_buffer( output_filename, 1 ); + output_filename[0] = 0; + if( to_stdout && !testing ) outfd = STDOUT_FILENO; + else outfd = -1; + + const bool to_file = !to_stdout && !testing && default_output_filename[0]; + if( !to_stdout && !testing && ( filenames_given || to_file ) ) set_signals( signal_handler ); Pp_init( &pp, filenames, num_filenames ); - output_filename = resize_buffer( output_filename, 1 ); + const bool one_to_one = !to_stdout && !testing && !to_file; for( i = 0; i < num_filenames; ++i ) { const char * input_filename = ""; int tmp; struct stat in_stats; const struct stat * in_statsp; - output_filename[0] = 0; if( strcmp( filenames[i], "-" ) == 0 ) { if( stdin_used ) continue; else stdin_used = true; infd = STDIN_FILENO; - if( !testing ) - { - if( to_stdout || !default_output_filename[0] ) - outfd = STDOUT_FILENO; - else - { - output_filename = resize_buffer( output_filename, - strlen( default_output_filename ) + 1 ); - strcpy( output_filename, default_output_filename ); - if( !open_outstream( force, true ) ) - { - if( retval < 1 ) retval = 1; - close( infd ); infd = -1; - continue; - } - } - } + if( one_to_one ) { outfd = STDOUT_FILENO; output_filename[0] = 0; } } else { input_filename = filenames[i]; - infd = open_instream( input_filename, &in_stats, to_stdout || testing ); - if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } - if( !testing ) + infd = open_instream( input_filename, &in_stats, one_to_one ); + if( infd < 0 ) { set_retval( &retval, 1 ); continue; } + if( one_to_one ) { - if( to_stdout ) outfd = STDOUT_FILENO; - else - { - set_d_outname( input_filename, extension_index( input_filename ) ); - if( !open_outstream( force, false ) ) - { - if( retval < 1 ) retval = 1; - close( infd ); infd = -1; - continue; - } - } + set_d_outname( input_filename, extension_index( input_filename ) ); + if( !open_outstream( force, true ) ) + { set_retval( &retval, 1 ); close( infd ); infd = -1; continue; } } } @@ -738,37 +716,43 @@ int main( const int argc, const char * const argv[] ) { show_file_error( pp.name, "I won't read compressed data from a terminal.", 0 ); - if( retval < 1 ) retval = 1; + set_retval( &retval, 1 ); if( testing ) { close( infd ); infd = -1; continue; } cleanup_and_fail( retval ); } - in_statsp = input_filename[0] ? &in_stats : 0; + if( to_file && outfd < 0 ) /* open outfd after verifying infd */ + { + output_filename = resize_buffer( output_filename, + strlen( default_output_filename ) + 1 ); + strcpy( output_filename, default_output_filename ); + if( !open_outstream( force, false ) ) return 1; + } + + in_statsp = ( input_filename[0] && one_to_one ) ? &in_stats : 0; if( in_place ) tmp = decompress_in_place( infd, &pp, testing ); else tmp = decompress( &pp, cl_insize, cl_outsize, nofill, noflush, testing ); if( close( infd ) != 0 ) - { - show_error( input_filename[0] ? "Error closing input file" : - "Error closing stdin", errno, false ); - if( tmp < 1 ) tmp = 1; - } + { show_file_error( pp.name, "Error closing input file", errno ); + set_retval( &tmp, 1 ); } infd = -1; - if( tmp > retval ) retval = tmp; + set_retval( &retval, tmp ); if( tmp ) { if( !testing ) cleanup_and_fail( retval ); else ++failed_tests; } - if( delete_output_on_interrupt ) + if( delete_output_on_interrupt && one_to_one ) close_and_set_permissions( in_statsp ); - if( input_filename[0] && !keep_input_files && !to_stdout && !testing ) + if( input_filename[0] && !keep_input_files && one_to_one ) remove( input_filename ); } - if( outfd >= 0 && close( outfd ) != 0 ) + if( delete_output_on_interrupt ) close_and_set_permissions( 0 ); /* -o */ + else if( outfd >= 0 && close( outfd ) != 0 ) /* -c */ { show_error( "Error closing stdout", errno, false ); - if( retval < 1 ) retval = 1; + set_retval( &retval, 1 ); } if( failed_tests > 0 && verbosity >= 1 && num_filenames > 1 ) fprintf( stderr, "%s: warning: %d %s failed the test.\n", diff --git a/testsuite/check.sh b/testsuite/check.sh index 853deda..5625ac2 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -48,8 +48,13 @@ printf "testing xlunzip-%s..." "$2" [ $? = 2 ] || test_failed $LINENO "${LZIP}" -dq -o in < "${in_lz}" [ $? = 1 ] || test_failed $LINENO +"${LZIP}" -dq -o in "${in_lz}" +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" -dq -o out nx_file.lz +[ $? = 1 ] || test_failed $LINENO +[ ! -e out ] || test_failed $LINENO # these are for code coverage -"${LZIP}" -t -- nx_file 2> /dev/null +"${LZIP}" -t -- nx_file.lz 2> /dev/null [ $? = 1 ] || test_failed $LINENO "${LZIP}" -t "" < /dev/null 2> /dev/null [ $? = 1 ] || test_failed $LINENO @@ -77,11 +82,17 @@ printf "\ntesting decompression..." for i in "${in_lz}" "${in_em}" ; do "${LZIP}" -t "$i" || test_failed $LINENO "$i" + "${LZIP}" -d "$i" -o copy || test_failed $LINENO "$i" + cmp in copy || test_failed $LINENO "$i" "${LZIP}" -cd "$i" > copy || test_failed $LINENO "$i" cmp in copy || test_failed $LINENO "$i" + "${LZIP}" -d "$i" -o - > copy || test_failed $LINENO "$i" + cmp in copy || test_failed $LINENO "$i" + "${LZIP}" -d < "$i" > copy || test_failed $LINENO "$i" + cmp in copy || test_failed $LINENO "$i" + rm -f copy || framework_failure done -rm -f copy || framework_failure cat "${in_lz}" > copy.lz || framework_failure "${LZIP}" -dk copy.lz || test_failed $LINENO cmp in copy || test_failed $LINENO @@ -95,10 +106,16 @@ cmp in copy || test_failed $LINENO printf "to be overwritten" > copy || framework_failure "${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO cmp in copy || test_failed $LINENO +rm -f out copy || framework_failure +"${LZIP}" -d -o ./- "${in_lz}" || test_failed $LINENO +cmp in ./- || test_failed $LINENO +rm -f ./- || framework_failure +"${LZIP}" -d -o ./- < "${in_lz}" || test_failed $LINENO +cmp in ./- || test_failed $LINENO +rm -f ./- || framework_failure -rm -f copy || framework_failure cat "${in_lz}" > anyothername || framework_failure -"${LZIP}" -dv --output copy - anyothername - < "${in_lz}" 2> /dev/null || +"${LZIP}" -dv - anyothername - < "${in_lz}" > copy 2> /dev/null || test_failed $LINENO cmp in copy || test_failed $LINENO cmp in anyothername.out || test_failed $LINENO @@ -128,12 +145,16 @@ cat "${in_lz}" > copy.lz || framework_failure cmp in copy || test_failed $LINENO cat in in > in2 || framework_failure -cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure -"${LZIP}" -t in2.lz || test_failed $LINENO -"${LZIP}" -cd in2.lz > copy2 || test_failed $LINENO +"${LZIP}" -t "${in_lz}" "${in_lz}" || test_failed $LINENO +"${LZIP}" -cd "${in_lz}" "${in_lz}" -o out > copy2 || test_failed $LINENO +[ ! -e out ] || test_failed $LINENO # override -o +cmp in2 copy2 || test_failed $LINENO +rm -f copy2 || framework_failure +"${LZIP}" -d "${in_lz}" "${in_lz}" -o copy2 || test_failed $LINENO cmp in2 copy2 || test_failed $LINENO +rm -f copy2 || framework_failure -cat in2.lz > copy2.lz || framework_failure +cat "${in_lz}" "${in_lz}" > copy2.lz || framework_failure printf "\ngarbage" >> copy2.lz || framework_failure "${LZIP}" -tvvvv copy2.lz 2> /dev/null || test_failed $LINENO printf "to be overwritten" > copy2 || framework_failure @@ -183,6 +204,7 @@ for i in fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do done rm -f fox out || framework_failure +cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && [ -e trunc.lz ] && cmp in2.lz trunc.lz > /dev/null 2>&1 ; then @@ -283,30 +305,62 @@ cmp -s in copy && test_failed $LINENO [ $? = 1 ] || test_failed $LINENO cmp -s in copy && test_failed $LINENO +# decompress in place +rm -f copy copy2 || framework_failure "${LZIP}" -t --in-place "${in_lz}" || test_failed $LINENO +"${LZIP}" -t --in-place < "${in_lz}" || test_failed $LINENO +"${LZIP}" -d --in-place "${in_lz}" -o copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +rm -f copy || framework_failure +"${LZIP}" -d --in-place < "${in_lz}" -o copy || test_failed $LINENO +cmp in copy || test_failed $LINENO "${LZIP}" -cd --in-place "${in_lz}" > copy || test_failed $LINENO cmp in copy || test_failed $LINENO +"${LZIP}" -cd --in-place < "${in_lz}" > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +"${LZIP}" -t --in-place "${in_lz}" "${in_lz}" || test_failed $LINENO +"${LZIP}" -d --in-place "${in_lz}" "${in_lz}" -o copy2 || test_failed $LINENO +cmp in2 copy2 || test_failed $LINENO +"${LZIP}" -cd --in-place "${in_lz}" "${in_lz}" > copy2 || test_failed $LINENO +cmp in2 copy2 || test_failed $LINENO + +# decompress multimember in place cat in in in > in3 || framework_failure for i in in2 in3 ; do "${LZIP}" -t --in-place $i.lz || test_failed $LINENO + "${LZIP}" -t --in-place < $i.lz || test_failed $LINENO + rm -f copy || framework_failure + "${LZIP}" -d --in-place $i.lz -o copy || test_failed $LINENO + cmp $i copy || test_failed $LINENO + rm -f copy || framework_failure + "${LZIP}" -d --in-place < $i.lz -o copy || test_failed $LINENO + cmp $i copy || test_failed $LINENO "${LZIP}" -cd --in-place $i.lz > copy || test_failed $LINENO cmp $i copy || test_failed $LINENO + "${LZIP}" -cd --in-place < $i.lz > copy || test_failed $LINENO + cmp $i copy || test_failed $LINENO done "${LZIP}" -tq --in-place ingin.lz [ $? = 2 ] || test_failed $LINENO +rm -f copy copy2 ingin.lz in2 in2.lz in3 in3.lz || framework_failure + +cat "${in_lz}" > inz.lz || framework_failure +counter=0 +while [ ${counter} -lt 20 ] ; do + cat "${zero_lz}" >> inz.lz || framework_failure + "${LZIP}" -t --in-place inz.lz || test_failed $LINENO + "${LZIP}" -cd --in-place inz.lz > copy || test_failed $LINENO + cmp in copy || test_failed $LINENO + counter=$((counter+1)) +done +rm -f copy inz.lz || framework_failure -cat "${in_lz}" "${zero_lz}" "${zero_lz}" "${zero_lz}" "${zero_lz}" \ - "${zero_lz}" "${zero_lz}" "${zero_lz}" > inz.lz || framework_failure -"${LZIP}" -t --in-place inz.lz || test_failed $LINENO -"${LZIP}" -cd --in-place inz.lz > copy || test_failed $LINENO +# decompress with trailing data in place +cat "${in_lz}" in in in in > int.lz || framework_failure +"${LZIP}" -t --in-place int.lz || test_failed $LINENO +"${LZIP}" -cd --in-place int.lz > copy || test_failed $LINENO cmp in copy || test_failed $LINENO -cat "${zero_lz}" >> inz.lz || framework_failure -"${LZIP}" -tq --in-place inz.lz -[ $? = 2 ] || test_failed $LINENO -"${LZIP}" -cdq --in-place inz.lz > copy -[ $? = 2 ] || test_failed $LINENO -cmp -s in copy && test_failed $LINENO -rm -f copy ingin.lz in2 in2.lz in3 in3.lz || framework_failure +rm -f copy int.lz || framework_failure echo if [ ${fail} = 0 ] ; then -- cgit v1.2.3