diff options
Diffstat (limited to 'ztest.cc')
-rw-r--r-- | ztest.cc | 369 |
1 files changed, 369 insertions, 0 deletions
diff --git a/ztest.cc b/ztest.cc new file mode 100644 index 0000000..204fee8 --- /dev/null +++ b/ztest.cc @@ -0,0 +1,369 @@ +/* Ztest - verify the integrity of compressed files + Copyright (C) 2010-2023 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <list> +#include <string> +#include <vector> +#include <dirent.h> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <sys/stat.h> +#if defined __MSVCRT__ || defined __OS2__ +#include <io.h> +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +namespace { + +#include "recursive.cc" + +void show_help() + { + std::printf( "ztest verifies the integrity of the compressed files specified. It\n" + "also warns if an uncompressed file has a compressed file name extension, or\n" + "if a compressed file has a wrong compressed extension. Uncompressed files\n" + "are otherwise ignored. If a file is specified as '-', the integrity of\n" + "compressed data read from standard input is verified. Data read from\n" + "standard input must be all in the same compressed format. If a file fails to\n" + "decompress, does not exist, can't be opened, or is a terminal, ztest\n" + "continues verifying the rest of the files. A final diagnostic is shown at\n" + "verbosity level 1 or higher if any file fails the test when testing multiple\n" + "files.\n" + "\nIf no files are specified, recursive searches examine the current\n" + "working directory, and nonrecursive searches read standard input.\n" + "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n" + "\nNote that error detection in the xz format is broken. First, some xz\n" + "files lack integrity information. Second, not all xz decompressors can\n" + "verify the integrity of all xz files. Third, section 2.1.1.2 'Stream\n" + "Flags' of the xz format specification allows xz decompressors to produce\n" + "garbage output without issuing any warning. Therefore, xz files can't\n" + "always be verified as reliably as files in the other formats can.\n" + "\nUsage: ztest [options] [files]\n" + "\nExit status is 0 if all compressed files verify OK, 1 if environmental\n" + "problems (file not found, invalid command line options, I/O errors, etc),\n" + "2 if any compressed file is corrupt or invalid, or if any file has an\n" + "incorrect file name extension.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -M, --format=<list> process only the formats in <list>\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -O, --force-format=<fmt> force the input format\n" + " -q, --quiet suppress all messages\n" + " -r, --recursive operate recursively on directories\n" + " -R, --dereference-recursive recursively follow symbolic links\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " --bz2=<command> set compressor and options for bzip2 format\n" + " --gz=<command> set compressor and options for gzip format\n" + " --lz=<command> set compressor and options for lzip format\n" + " --xz=<command> set compressor and options for xz format\n" + " --zst=<command> set compressor and options for zstd format\n" + "\nValid formats for options '-M' and '-O' are 'bz2', 'gz', 'lz', 'xz', and 'zst'.\n" ); + show_help_addr(); + } + + +int open_instream( const std::string & input_filename ) + { + const int infd = open( input_filename.c_str(), O_RDONLY | O_BINARY ); + if( infd < 0 ) + show_file_error( input_filename.c_str(), "Can't open input file", errno ); + return infd; + } + + +int ztest_stdin( const int infd, int format_index, + const std::vector< const char * > & ztest_args ) + { + uint8_t magic_data[magic_buf_size]; + int magic_size = 0; + if( format_index < 0 ) + format_index = test_format( infd, magic_data, &magic_size ); + const char * const compressor_name = get_compressor_name( format_index ); + if( !compressor_name ) + { show_error( "Unknown data format read from stdin." ); return 2; } + int fda[2]; // pipe from feeder + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return 1; } + + const pid_t pid = fork(); + if( pid == 0 ) // child1 (compressor feeder) + { + if( close( fda[0] ) != 0 || + !feed_data( "-", infd, fda[1], magic_data, magic_size ) ) _exit( 1 ); + if( close( fda[1] ) != 0 ) { show_close_error(); _exit( 1 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return 1; } + + const pid_t pid2 = fork(); + if( pid2 == 0 ) // child2 (compressor) + { + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const int size2 = ztest_args.size(); + const char ** const argv = new const char *[size+size2+3]; + argv[0] = compressor_name; + for( int i = 0; i < size; ++i ) + argv[i+1] = compressor_args[i].c_str(); + for( int i = 0; i < size2; ++i ) + argv[i+size+1] = ztest_args[i]; + argv[size+size2+1] = "-t"; + argv[size+size2+2] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( compressor_name ); + _exit( 1 ); + } + if( pid2 < 0 ) // parent + { show_fork_error( compressor_name ); return 1; } + + close( fda[0] ); close( fda[1] ); + const bool isgzxz = ( format_index == fmt_gz || format_index == fmt_xz ); + int retval = wait_for_child( pid2, compressor_name, 1, isgzxz ); + if( retval == 0 && wait_for_child( pid, "data feeder" ) != 0 ) + retval = 1; + return retval; + } + + +int ztest_file( const int infd, int format_index, + const std::string & input_filename, + const std::vector< const char * > & ztest_args ) + { + // bzip2, gzip, and lzip are the primary formats. xz and zstd are optional. + static int disable_xz = -1; // tri-state bool + static int disable_zst = -1; // tri-state bool + uint8_t magic_data[magic_buf_size]; + int magic_size = 0; + const int format_index_e = test_extension( input_filename ); + if( format_index < 0 ) + format_index = test_format( infd, magic_data, &magic_size ); + const char * const compressor_name = get_compressor_name( format_index ); + if( !compressor_name ) + { + if( format_index < 0 && format_index_e >= 0 ) + { show_file_error( input_filename.c_str(), + "Uncompressed file has compressed extension." ); return 2; } + return 0; // ignore this file + } + if( format_index == fmt_xz ) + { + if( disable_xz < 0 ) + { + std::string command( compressor_name ); command += " -V > /dev/null 2>&1"; + disable_xz = ( std::system( command.c_str() ) != 0 ); + if( disable_xz && verbosity >= 2 ) + std::fprintf( stderr, "%s: '%s' not found. Ignoring xz files.\n", + program_name, compressor_name ); + } + if( disable_xz ) return 0; // ignore this file if no xz installed + } + else if( format_index == fmt_zst ) + { + if( disable_zst < 0 ) + { + std::string command( compressor_name ); command += " -V > /dev/null 2>&1"; + disable_zst = ( std::system( command.c_str() ) != 0 ); + if( disable_zst && verbosity >= 2 ) + std::fprintf( stderr, "%s: '%s' not found. Ignoring zstd files.\n", + program_name, compressor_name ); + } + if( disable_zst ) return 0; // ignore this file if no zstd installed + } + + const pid_t pid = fork(); + + if( pid == 0 ) // child (compressor) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const int size2 = ztest_args.size(); + const char ** const argv = new const char *[size+size2+5]; + argv[0] = compressor_name; + for( int i = 0; i < size; ++i ) + argv[i+1] = compressor_args[i].c_str(); + for( int i = 0; i < size2; ++i ) + argv[i+size+1] = ztest_args[i]; + argv[size+size2+1] = "-t"; + argv[size+size2+2] = "--"; + argv[size+size2+3] = input_filename.c_str(); + argv[size+size2+4] = 0; + execvp( argv[0], (char **)argv ); + show_exec_error( compressor_name ); + _exit( 1 ); + } + if( pid < 0 ) // parent + { show_fork_error( compressor_name ); return 1; } + + const bool isgzxz = ( format_index == fmt_gz || format_index == fmt_xz ); + int retval = wait_for_child( pid, compressor_name, 1, isgzxz ); + if( retval == 0 && format_index >= 0 && format_index_e >= 0 && + format_index != format_index_e ) + { show_file_error( input_filename.c_str(), + "Compressed file has wrong compressed extension." ); retval = 2; } + return retval; + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt }; + int format_index = -1; // undefined + int recursive = 0; // 1 = '-r', 2 = '-R' + std::list< std::string > filenames; + std::vector< const char * > ztest_args; // args to ztest, maybe empty + program_name = "ztest"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'h', "help", Arg_parser::no }, + { 'M', "format", Arg_parser::yes }, + { 'N', "no-rcfile", Arg_parser::no }, + { 'O', "force-format", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "recursive", Arg_parser::no }, + { 'R', "dereference-recursive", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { zst_opt, "zst", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 1; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case 'h': show_help(); return 0; + case 'M': parse_format_list( arg, pn ); break; + case 'N': break; + case 'O': format_index = parse_format_type( arg, pn, false ); break; + case 'q': verbosity = -1; ztest_args.push_back( "-q" ); break; + case 'r': recursive = 1; break; + case 'R': recursive = 2; break; + case 'v': if( verbosity < 4 ) ++verbosity; + ztest_args.push_back( "-v" ); break; + case 'V': show_version(); return 0; + case bz2_opt: parse_compressor( arg, pn, fmt_bz2, 1 ); break; + case gz_opt: parse_compressor( arg, pn, fmt_gz, 1 ); break; + case lz_opt: parse_compressor( arg, pn, fmt_lz, 1 ); break; + case xz_opt: parse_compressor( arg, pn, fmt_xz, 1 ); break; + case zst_opt: parse_compressor( arg, pn, fmt_zst, 1 ); break; + default : internal_error( "uncaught option." ); + } + } // end process options + +#if defined __MSVCRT__ || defined __OS2__ + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + for( ; argind < parser.arguments(); ++argind ) + filenames.push_back( parser.argument( argind ) ); + + if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" ); + + std::string input_filename; + int files_tested = 0, failed_tests = 0; + int retval = 0; + bool error = false; + bool stdin_used = false; + while( next_filename( filenames, input_filename, error, recursive ) ) + { + int infd; + if( input_filename == "." ) + { + if( stdin_used ) continue; else stdin_used = true; + infd = STDIN_FILENO; input_filename = "-"; + } + else + { + infd = open_instream( input_filename ); + if( infd < 0 ) { error = true; continue; } + } + + if( isatty( infd ) ) // for example /dev/tty + { + show_file_error( name_or_stdin( input_filename.c_str() ), + "I won't read compressed data from a terminal." ); + close( infd ); error = true; continue; + } + + int tmp; + if( infd == STDIN_FILENO ) + tmp = ztest_stdin( infd, format_index, ztest_args ); + else tmp = ztest_file( infd, format_index, input_filename, ztest_args ); + if( tmp > retval ) retval = tmp; + ++files_tested; if( tmp ) ++failed_tests; + + if( close( infd ) != 0 ) + { show_file_error( input_filename.c_str(), "Error closing input file", + errno ); error = true; } + } + + if( std::fclose( stdout ) != 0 ) // in case decompressor writes to stdout + { + show_error( "Error closing stdout", errno ); + error = true; + } + if( error && retval == 0 ) retval = 1; + if( failed_tests > 0 && verbosity >= 1 && files_tested > 1 ) + std::fprintf( stderr, "%s: warning: %d %s failed the test.\n", + program_name, failed_tests, + ( failed_tests == 1 ) ? "file" : "files" ); + return retval; + } |