summaryrefslogtreecommitdiffstats
path: root/ztest.cc
diff options
context:
space:
mode:
Diffstat (limited to 'ztest.cc')
-rw-r--r--ztest.cc369
1 files changed, 369 insertions, 0 deletions
diff --git a/ztest.cc b/ztest.cc
new file mode 100644
index 0000000..5f74c20
--- /dev/null
+++ b/ztest.cc
@@ -0,0 +1,369 @@
+/* Ztest - check the integrity of compressed files
+ Copyright (C) 2010-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <cerrno>
+#include <climits>
+#include <csignal>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <list>
+#include <string>
+#include <vector>
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#if defined __MSVCRT__ || defined __OS2__
+#include <io.h>
+#endif
+
+#include "arg_parser.h"
+#include "rc.h"
+#include "zutils.h"
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+
+namespace {
+
+#include "recursive.cc"
+
+void show_help()
+ {
+ std::printf( "ztest checks the integrity of the compressed files specified. It\n"
+ "also warns if an uncompressed file has a compressed file name extension, or\n"
+ "if a compressed file has a wrong compressed extension. Uncompressed files\n"
+ "are otherwise ignored. If a file is specified as '-', the integrity of\n"
+ "compressed data read from standard input is checked. Data read from\n"
+ "standard input must be all in the same compressed format. If a file fails to\n"
+ "decompress, does not exist, can't be opened, or is a terminal, ztest\n"
+ "continues testing the rest of the files. A final diagnostic is shown at\n"
+ "verbosity level 1 or higher if any file fails the test when testing multiple\n"
+ "files.\n"
+ "\nIf no files are specified, recursive searches examine the current\n"
+ "working directory, and nonrecursive searches read standard input.\n"
+ "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n"
+ "\nNote that error detection in the xz format is broken. First, some xz files\n"
+ "lack integrity information. Second, not all xz decompressors can check the\n"
+ "integrity of all xz files. Third, section 2.1.1.2 'Stream Flags' of the\n"
+ "xz format specification allows xz decompressors to produce garbage output\n"
+ "without issuing any warning. Therefore, xz files can't always be checked as\n"
+ "reliably as files in the other formats can.\n"
+ "\nUsage: ztest [options] [files]\n"
+ "\nExit status is 0 if all compressed files check OK, 1 if environmental\n"
+ "problems (file not found, invalid command-line options, I/O errors, etc),\n"
+ "2 if any compressed file is corrupt or invalid, or if any file has an\n"
+ "incorrect file name extension.\n"
+ "\nOptions:\n"
+ " -h, --help display this help and exit\n"
+ " -V, --version output version information and exit\n"
+ " -M, --format=<list> process only the formats in <list>\n"
+ " -N, --no-rcfile don't read runtime configuration file\n"
+ " -O, --force-format=<fmt> force the input format\n"
+ " -q, --quiet suppress all messages\n"
+ " -r, --recursive operate recursively on directories\n"
+ " -R, --dereference-recursive recursively follow symbolic links\n"
+ " -v, --verbose be verbose (a 2nd -v gives more)\n"
+ " --bz2=<command> set compressor and options for bzip2 format\n"
+ " --gz=<command> set compressor and options for gzip format\n"
+ " --lz=<command> set compressor and options for lzip format\n"
+ " --xz=<command> set compressor and options for xz format\n"
+ " --zst=<command> set compressor and options for zstd format\n"
+ "\nValid formats for options '-M' and '-O' are 'bz2', 'gz', 'lz', 'xz', and 'zst'.\n" );
+ show_help_addr();
+ }
+
+
+int open_instream( const std::string & input_filename )
+ {
+ const int infd = open( input_filename.c_str(), O_RDONLY | O_BINARY );
+ if( infd < 0 )
+ show_file_error( input_filename.c_str(), "Can't open input file", errno );
+ return infd;
+ }
+
+
+int ztest_stdin( const int infd, int format_index,
+ const std::vector< const char * > & ztest_args )
+ {
+ uint8_t magic_data[magic_buf_size];
+ int magic_size = 0;
+ if( format_index < 0 )
+ format_index = test_format( infd, magic_data, &magic_size );
+ const char * const compressor_name = get_compressor_name( format_index );
+ if( !compressor_name )
+ { show_error( "Unknown data format read from stdin." ); return 2; }
+ int fda[2]; // pipe from feeder
+ if( pipe( fda ) < 0 )
+ { show_error( "Can't create pipe", errno ); return 1; }
+
+ const pid_t pid = fork();
+ if( pid == 0 ) // child1 (compressor feeder)
+ {
+ if( close( fda[0] ) != 0 ||
+ !feed_data( "-", infd, fda[1], magic_data, magic_size ) ) _exit( 1 );
+ if( close( fda[1] ) != 0 ) { show_close_error(); _exit( 1 ); }
+ _exit( 0 );
+ }
+ if( pid < 0 ) // parent
+ { show_fork_error( "data feeder" ); return 1; }
+
+ const pid_t pid2 = fork();
+ if( pid2 == 0 ) // child2 (compressor)
+ {
+ if( dup2( fda[0], STDIN_FILENO ) >= 0 &&
+ close( fda[0] ) == 0 && close( fda[1] ) == 0 )
+ {
+ const std::vector< std::string > & compressor_args =
+ get_compressor_args( format_index );
+ const int size = compressor_args.size();
+ const int size2 = ztest_args.size();
+ const char ** const argv = new const char *[size+size2+3];
+ argv[0] = compressor_name;
+ for( int i = 0; i < size; ++i )
+ argv[i+1] = compressor_args[i].c_str();
+ for( int i = 0; i < size2; ++i )
+ argv[i+size+1] = ztest_args[i];
+ argv[size+size2+1] = "-t";
+ argv[size+size2+2] = 0;
+ execvp( argv[0], (char **)argv );
+ }
+ show_exec_error( compressor_name );
+ _exit( 1 );
+ }
+ if( pid2 < 0 ) // parent
+ { show_fork_error( compressor_name ); return 1; }
+
+ close( fda[0] ); close( fda[1] );
+ const bool isgzxz = ( format_index == fmt_gz || format_index == fmt_xz );
+ int retval = wait_for_child( pid2, compressor_name, 1, isgzxz );
+ if( retval == 0 && wait_for_child( pid, "data feeder" ) != 0 )
+ retval = 1;
+ return retval;
+ }
+
+
+int ztest_file( const int infd, int format_index,
+ const std::string & input_filename,
+ const std::vector< const char * > & ztest_args )
+ {
+ // bzip2, gzip, and lzip are the primary formats. xz and zstd are optional.
+ static int disable_xz = -1; // tri-state bool
+ static int disable_zst = -1; // tri-state bool
+ uint8_t magic_data[magic_buf_size];
+ int magic_size = 0;
+ const int format_index_e = test_extension( input_filename );
+ if( format_index < 0 )
+ format_index = test_format( infd, magic_data, &magic_size );
+ const char * const compressor_name = get_compressor_name( format_index );
+ if( !compressor_name )
+ {
+ if( format_index < 0 && format_index_e >= 0 )
+ { show_file_error( input_filename.c_str(),
+ "Uncompressed file has compressed extension." ); return 2; }
+ return 0; // ignore this file
+ }
+ if( format_index == fmt_xz )
+ {
+ if( disable_xz < 0 )
+ {
+ std::string command( compressor_name ); command += " -V > /dev/null 2>&1";
+ disable_xz = ( std::system( command.c_str() ) != 0 );
+ if( disable_xz && verbosity >= 2 )
+ std::fprintf( stderr, "%s: '%s' not found. Ignoring xz files.\n",
+ program_name, compressor_name );
+ }
+ if( disable_xz ) return 0; // ignore this file if no xz installed
+ }
+ else if( format_index == fmt_zst )
+ {
+ if( disable_zst < 0 )
+ {
+ std::string command( compressor_name ); command += " -V > /dev/null 2>&1";
+ disable_zst = ( std::system( command.c_str() ) != 0 );
+ if( disable_zst && verbosity >= 2 )
+ std::fprintf( stderr, "%s: '%s' not found. Ignoring zstd files.\n",
+ program_name, compressor_name );
+ }
+ if( disable_zst ) return 0; // ignore this file if no zstd installed
+ }
+
+ const pid_t pid = fork();
+
+ if( pid == 0 ) // child (compressor)
+ {
+ const std::vector< std::string > & compressor_args =
+ get_compressor_args( format_index );
+ const int size = compressor_args.size();
+ const int size2 = ztest_args.size();
+ const char ** const argv = new const char *[size+size2+5];
+ argv[0] = compressor_name;
+ for( int i = 0; i < size; ++i )
+ argv[i+1] = compressor_args[i].c_str();
+ for( int i = 0; i < size2; ++i )
+ argv[i+size+1] = ztest_args[i];
+ argv[size+size2+1] = "-t";
+ argv[size+size2+2] = "--";
+ argv[size+size2+3] = input_filename.c_str();
+ argv[size+size2+4] = 0;
+ execvp( argv[0], (char **)argv );
+ show_exec_error( compressor_name );
+ _exit( 1 );
+ }
+ if( pid < 0 ) // parent
+ { show_fork_error( compressor_name ); return 1; }
+
+ const bool isgzxz = ( format_index == fmt_gz || format_index == fmt_xz );
+ int retval = wait_for_child( pid, compressor_name, 1, isgzxz );
+ if( retval == 0 && format_index >= 0 && format_index_e >= 0 &&
+ format_index != format_index_e )
+ { show_file_error( input_filename.c_str(),
+ "Compressed file has wrong compressed extension." ); retval = 2; }
+ return retval;
+ }
+
+} // end namespace
+
+
+int main( const int argc, const char * const argv[] )
+ {
+ enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt };
+ int format_index = -1; // undefined
+ int recursive = 0; // 1 = '-r', 2 = '-R'
+ std::list< std::string > filenames;
+ std::vector< const char * > ztest_args; // args to ztest, maybe empty
+ program_name = "ztest";
+ invocation_name = ( argc > 0 ) ? argv[0] : program_name;
+
+ const Arg_parser::Option options[] =
+ {
+ { 'h', "help", Arg_parser::no },
+ { 'M', "format", Arg_parser::yes },
+ { 'N', "no-rcfile", Arg_parser::no },
+ { 'O', "force-format", Arg_parser::yes },
+ { 'q', "quiet", Arg_parser::no },
+ { 'r', "recursive", Arg_parser::no },
+ { 'R', "dereference-recursive", Arg_parser::no },
+ { 'v', "verbose", Arg_parser::no },
+ { 'V', "version", Arg_parser::no },
+ { bz2_opt, "bz2", Arg_parser::yes },
+ { gz_opt, "gz", Arg_parser::yes },
+ { lz_opt, "lz", Arg_parser::yes },
+ { xz_opt, "xz", Arg_parser::yes },
+ { zst_opt, "zst", Arg_parser::yes },
+ { 0, 0, Arg_parser::no } };
+
+ const Arg_parser parser( argc, argv, options );
+ if( parser.error().size() ) // bad option
+ { show_error( parser.error().c_str(), 0, true ); return 1; }
+
+ maybe_process_config_file( parser );
+
+ int argind = 0;
+ for( ; argind < parser.arguments(); ++argind )
+ {
+ const int code = parser.code( argind );
+ if( !code ) break; // no more options
+ const char * const pn = parser.parsed_name( argind ).c_str();
+ const std::string & arg = parser.argument( argind );
+ switch( code )
+ {
+ case 'h': show_help(); return 0;
+ case 'M': parse_format_list( arg, pn ); break;
+ case 'N': break;
+ case 'O': format_index = parse_format_type( arg, pn, false ); break;
+ case 'q': verbosity = -1; ztest_args.push_back( "-q" ); break;
+ case 'r': recursive = 1; break;
+ case 'R': recursive = 2; break;
+ case 'v': if( verbosity < 4 ) ++verbosity;
+ ztest_args.push_back( "-v" ); break;
+ case 'V': show_version(); return 0;
+ case bz2_opt: parse_compressor( arg, pn, fmt_bz2, 1 ); break;
+ case gz_opt: parse_compressor( arg, pn, fmt_gz, 1 ); break;
+ case lz_opt: parse_compressor( arg, pn, fmt_lz, 1 ); break;
+ case xz_opt: parse_compressor( arg, pn, fmt_xz, 1 ); break;
+ case zst_opt: parse_compressor( arg, pn, fmt_zst, 1 ); break;
+ default: internal_error( "uncaught option." );
+ }
+ } // end process options
+
+#if defined __MSVCRT__ || defined __OS2__
+ setmode( STDIN_FILENO, O_BINARY );
+ setmode( STDOUT_FILENO, O_BINARY );
+#endif
+
+ for( ; argind < parser.arguments(); ++argind )
+ filenames.push_back( parser.argument( argind ) );
+
+ if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" );
+
+ std::string input_filename;
+ int files_tested = 0, failed_tests = 0;
+ int retval = 0;
+ bool error = false;
+ bool stdin_used = false;
+ while( next_filename( filenames, input_filename, error, recursive ) )
+ {
+ int infd;
+ if( input_filename == "." )
+ {
+ if( stdin_used ) continue; else stdin_used = true;
+ infd = STDIN_FILENO; input_filename = "-";
+ }
+ else
+ {
+ infd = open_instream( input_filename );
+ if( infd < 0 ) { error = true; continue; }
+ }
+
+ if( isatty( infd ) ) // for example /dev/tty
+ {
+ show_file_error( name_or_stdin( input_filename.c_str() ),
+ "I won't read compressed data from a terminal." );
+ close( infd ); error = true; continue;
+ }
+
+ int tmp;
+ if( infd == STDIN_FILENO )
+ tmp = ztest_stdin( infd, format_index, ztest_args );
+ else tmp = ztest_file( infd, format_index, input_filename, ztest_args );
+ if( tmp > retval ) retval = tmp;
+ ++files_tested; if( tmp ) ++failed_tests;
+
+ if( close( infd ) != 0 )
+ { show_file_error( input_filename.c_str(), "Error closing input file",
+ errno ); error = true; }
+ }
+
+ if( std::fclose( stdout ) != 0 ) // in case decompressor writes to stdout
+ {
+ show_error( "Error closing stdout", errno );
+ error = true;
+ }
+ if( error && retval == 0 ) retval = 1;
+ if( failed_tests > 0 && verbosity >= 1 && files_tested > 1 )
+ std::fprintf( stderr, "%s: warning: %d %s failed the test.\n",
+ program_name, failed_tests,
+ ( failed_tests == 1 ) ? "file" : "files" );
+ return retval;
+ }