diff options
Diffstat (limited to 'zdiff.cc')
-rw-r--r-- | zdiff.cc | 446 |
1 files changed, 446 insertions, 0 deletions
diff --git a/zdiff.cc b/zdiff.cc new file mode 100644 index 0000000..a601459 --- /dev/null +++ b/zdiff.cc @@ -0,0 +1,446 @@ +/* Zdiff - decompress and compare two files line by line + Copyright (C) 2010-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cctype> +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <string> +#include <vector> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <sys/stat.h> +#if defined __MSVCRT__ || defined __OS2__ +#include <io.h> +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + + +namespace { + +std::string fifonames[2]; // names of the two fifos passed to diff + +#include "zcmpdiff.cc" + +void show_help() + { + std::printf( "zdiff compares two files and, if they differ, writes to standard output the\n" + "differences line by line. A hyphen '-' used as a file argument means standard\n" + "input. If any file given is compressed, its decompressed content is used.\n" + "zdiff is a front end to the program diff and has the limitation that messages\n" + "from diff refer to temporary file names instead of those specified.\n" + "\n'zdiff -v -V' prints the version of the diff program used.\n" + "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n" + "\nUsage: zdiff [options] file1 [file2]\n" + "\nzdiff compares file1 to file2. The standard input is used only if file1 or\n" + "file2 refers to standard input. If file2 is omitted zdiff tries to compare\n" + "file1 with the corresponding uncompressed file (if file1 is compressed), and\n" + "then with the corresponding compressed files of the remaining formats until\n" + "one is found.\n" + "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" + "Some options only work if the diff program used supports them.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -a, --text treat all files as text\n" + " -b, --ignore-space-change ignore changes in the amount of white space\n" + " -B, --ignore-blank-lines ignore changes whose lines are all blank\n" + " -c use the context output format\n" + " -C, --context=<n> same as -c but use <n> lines of context\n" + " -d, --minimal try hard to find a smaller set of changes\n" + " -E, --ignore-tab-expansion ignore changes due to tab expansion\n" + " -i, --ignore-case ignore case differences\n" + " -M, --format=<list> process only the formats in <list>\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -O, --force-format=[<f1>][,<f2>] force one or both input formats\n" + " -p, --show-c-function show which C function each change is in\n" + " -q, --brief output only whether files differ\n" + " -s, --report-identical-files report when two files are identical\n" + " -t, --expand-tabs expand tabs to spaces in output\n" + " -T, --initial-tab make tabs line up by prepending a tab\n" + " -u use the unified output format\n" + " -U, --unified=<n> same as -u but use <n> lines of context\n" + " -v, --verbose verbose mode (for --version)\n" + " -w, --ignore-all-space ignore all white space\n" + " -W, --width=<n> output at most <n> print columns (for -y)\n" + " -y, --side-by-side output in two columns\n" + " --bz2=<command> set compressor and options for bzip2 format\n" + " --gz=<command> set compressor and options for gzip format\n" + " --lz=<command> set compressor and options for lzip format\n" + " --xz=<command> set compressor and options for xz format\n" + " --zst=<command> set compressor and options for zstd format\n" + "\nValid formats for options '-M' and '-O' are 'bz2', 'gz', 'lz', 'xz', 'zst',\n" + "and 'un' for uncompressed.\n" ); + show_help_addr(); + } + + +const char * my_basename( const char * filename ) + { + const char * c = filename; + while( *c ) { if( *c == '/' ) { filename = c + 1; } ++c; } + return filename; + } + + +extern "C" void remove_fifos() + { + if( fifonames[0].size() ) + { std::remove( fifonames[0].c_str() ); fifonames[0].clear(); } + if( fifonames[1].size() ) + { std::remove( fifonames[1].c_str() ); fifonames[1].clear(); } + } + + +/* Set fifonames[i] to "${TMPDIR}/<coded_pid>[_-]<basename(filenames[i])>" + and create FIFOs. The pid is coded in little endian order. +*/ +bool set_fifonames( const std::string filenames[2] ) + { + enum { num_codes = 36 }; + const char * const codes = "0123456789abcdefghijklmnopqrstuvwxyz"; + const char * p = std::getenv( "TMPDIR" ); + + if( p ) { fifonames[0] = p; fifonames[0] += '/'; } + else fifonames[0] = "/tmp/"; + unsigned n = getpid(); + do fifonames[0] += codes[n % num_codes]; while( n /= num_codes ); + const unsigned pos = fifonames[0].size(); + fifonames[0] += '_'; + fifonames[1] = fifonames[0]; + fifonames[0] += my_basename( filenames[0].c_str() ); + fifonames[1] += my_basename( filenames[1].c_str() ); + if( fifonames[1] == fifonames[0] ) fifonames[1][pos] = '-'; + + for( int i = 0; i < 2; ++i ) + if( mkfifo( fifonames[i].c_str(), S_IRUSR | S_IWUSR ) != 0 ) + { + if( errno == EEXIST ) + { + std::remove( fifonames[i].c_str() ); + if( mkfifo( fifonames[i].c_str(), S_IRUSR | S_IWUSR ) == 0 ) + continue; + } + show_file_error( fifonames[i].c_str(), "Can't create FIFO", errno ); + return false; + } + return true; + } + + +bool set_data_feeder( const std::string & filename, + const std::string & fifoname, const int infd, + Children & children, int format_index ) + { + uint8_t magic_data[magic_buf_size]; + int magic_size = 0; + if( format_index < 0 ) + format_index = test_format( infd, magic_data, &magic_size ); + children.compressor_name = get_compressor_name( format_index ); + + if( children.compressor_name ) // compressed + { + int fda[2]; // pipe from feeder to compressor + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return false; } + const pid_t pid = fork(); + if( pid == 0 ) // child 1 (compressor feeder) + { + if( close( fda[0] ) != 0 || + !feed_data( filename, infd, fda[1], magic_data, magic_size ) ) + _exit( 2 ); + if( close( fda[1] ) != 0 ) + { show_close_error(); _exit( 2 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return false; } + + const pid_t pid2 = fork(); + if( pid2 == 0 ) // child 2 (compressor) + { + const int outfd = open( fifoname.c_str(), O_WRONLY | O_BINARY ); + if( outfd < 0 ) + { show_file_error( fifoname.c_str(), "Can't open FIFO for writing", + errno ); _exit( 2 ); } + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + dup2( outfd, STDOUT_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 && + close( outfd ) == 0 ) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const char ** const argv = new const char *[size+3]; + argv[0] = children.compressor_name; + for( int i = 0; i < size; ++i ) + argv[i+1] = compressor_args[i].c_str(); + argv[size+1] = ( verbosity >= 0 ) ? "-d" : "-dq"; + argv[size+2] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( children.compressor_name ); + _exit( 2 ); + } + if( pid2 < 0 ) // parent + { show_fork_error( children.compressor_name ); return false; } + + close( fda[0] ); close( fda[1] ); + children.pid[0] = pid; + children.pid[1] = pid2; + } + else // uncompressed + { + const pid_t pid = fork(); + if( pid == 0 ) // child (feeder) + { + const int outfd = open( fifoname.c_str(), O_WRONLY | O_BINARY ); + if( outfd < 0 ) + { show_file_error( fifoname.c_str(), "Can't open FIFO for writing", + errno ); _exit( 2 ); } + if( !feed_data( filename, infd, outfd, magic_data, magic_size ) ) + _exit( 2 ); + if( close( outfd ) != 0 ) + { show_close_error(); _exit( 2 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return false; } + children.pid[0] = pid; + children.pid[1] = 0; + } + return true; + } + + +extern "C" void signal_handler( int sig ) + { + remove_fifos(); + std::signal( sig, SIG_DFL ); + std::raise( sig ); + } + + +void set_signals() + { + std::signal( SIGHUP, signal_handler ); + std::signal( SIGINT, signal_handler ); + std::signal( SIGTERM, signal_handler ); + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt }; + std::vector< const char * > diff_args; // args to diff, maybe empty + int format_types[2] = { -1, -1 }; // < 0 means undefined + program_name = "zdiff"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'a', "text", Arg_parser::no }, + { 'b', "ignore-space-change", Arg_parser::no }, + { 'B', "ignore-blank-lines", Arg_parser::no }, + { 'c', 0, Arg_parser::no }, + { 'C', "context", Arg_parser::yes }, + { 'd', "minimal", Arg_parser::no }, + { 'E', "ignore-tab-expansion", Arg_parser::no }, + { 'h', "help", Arg_parser::no }, + { 'i', "ignore-case", Arg_parser::no }, + { 'M', "format", Arg_parser::yes }, + { 'N', "no-rcfile", Arg_parser::no }, + { 'O', "force-format", Arg_parser::yes }, + { 'p', "show-c-function", Arg_parser::no }, + { 'q', "brief", Arg_parser::no }, + { 's', "report-identical-files", Arg_parser::no }, + { 't', "expand-tabs", Arg_parser::no }, + { 'T', "initial-tab", Arg_parser::no }, + { 'u', 0, Arg_parser::no }, + { 'U', "unified", Arg_parser::yes }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { 'w', "ignore-all-space", Arg_parser::no }, + { 'W', "width", Arg_parser::yes }, + { 'y', "side-by-side", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { zst_opt, "zst", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 2; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); + const std::string & sarg = parser.argument( argind ); + const char * const arg = sarg.c_str(); + switch( code ) + { + case 'a': diff_args.push_back( "-a" ); break; + case 'b': diff_args.push_back( "-b" ); break; + case 'B': diff_args.push_back( "-B" ); break; + case 'c': diff_args.push_back( "-c" ); break; + case 'C': diff_args.push_back( "-C" ); diff_args.push_back( arg ); break; + case 'd': diff_args.push_back( "-d" ); break; + case 'E': diff_args.push_back( "-E" ); break; + case 'h': show_help(); return 0; + case 'i': diff_args.push_back( "-i" ); break; + case 'M': parse_format_list( sarg, pn ); break; + case 'N': break; + case 'O': parse_format_types2( sarg, pn, format_types ); break; + case 'p': diff_args.push_back( "-p" ); break; + case 'q': diff_args.push_back( "-q" ); break; + case 's': diff_args.push_back( "-s" ); break; + case 't': diff_args.push_back( "-t" ); break; + case 'T': diff_args.push_back( "-T" ); break; + case 'u': diff_args.push_back( "-u" ); break; + case 'U': diff_args.push_back( "-U" ); diff_args.push_back( arg ); break; + case 'v': if( verbosity < 4 ) ++verbosity; break; + case 'V': show_version( DIFF " --version" ); return 0; + case 'w': diff_args.push_back( "-w" ); break; + case 'W': diff_args.push_back( "-W" ); diff_args.push_back( arg ); break; + case 'y': diff_args.push_back( "-y" ); break; + case bz2_opt: parse_compressor( sarg, pn, fmt_bz2 ); break; + case gz_opt: parse_compressor( sarg, pn, fmt_gz ); break; + case lz_opt: parse_compressor( sarg, pn, fmt_lz ); break; + case xz_opt: parse_compressor( sarg, pn, fmt_xz ); break; + case zst_opt: parse_compressor( sarg, pn, fmt_zst ); break; + default: internal_error( "uncaught option." ); + } + } // end process options + +#if defined __MSVCRT__ || defined __OS2__ + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + const int files = parser.arguments() - argind; + if( files < 1 ) { show_error( "No files given.", 0, true ); return 2; } + if( files > 2 ) { show_error( "Too many files.", 0, true ); return 2; } + + std::string filenames[2]; // file names of the two input files + filenames[0] = parser.argument( argind ); + if( files == 2 ) filenames[1] = parser.argument( argind + 1 ); + + int infd[2]; // file descriptors of the two files + infd[0] = ( filenames[0] == "-" ) ? + STDIN_FILENO : open_instream( filenames[0] ); + if( infd[0] < 0 ) return 2; + + if( files == 2 ) + { + if( check_identical( filenames[0].c_str(), filenames[1].c_str() ) ) + return 0; + infd[1] = ( filenames[1] == "-" ) ? + STDIN_FILENO : open_instream( filenames[1] ); + if( infd[1] < 0 ) return 2; + } + else + { + if( filenames[0] == "-" ) + { show_error( "Missing operand after '-'.", 0, true ); return 2; } + if( format_types[0] >= 0 || format_types[1] >= 0 ) + { show_error( "Two files must be given when format is specified.", 0, true ); + return 2; } + filenames[1] = filenames[0]; + infd[1] = open_other_instream( filenames[1] ); + if( infd[1] < 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't find file to compare with '%s'.\n", + program_name, filenames[0].c_str() ); + show_error( 0, 0, true ); return 2; + } + } + + std::atexit( remove_fifos ); + set_signals(); + if( !set_fifonames( filenames ) ) return 2; + + Children children[2]; + if( !set_data_feeder( filenames[0], fifonames[0], infd[0], children[0], + format_types[0] ) || + !set_data_feeder( filenames[1], fifonames[1], infd[1], children[1], + format_types[1] ) ) + return 2; + + const pid_t diff_pid = fork(); + if( diff_pid == 0 ) // child (diff) + { + const char ** const argv = new const char *[diff_args.size()+5]; + argv[0] = DIFF; + for( unsigned i = 0; i < diff_args.size(); ++i ) + argv[i+1] = diff_args[i]; + argv[diff_args.size()+1] = "--"; + argv[diff_args.size()+2] = fifonames[0].c_str(); + argv[diff_args.size()+3] = fifonames[1].c_str(); + argv[diff_args.size()+4] = 0; + execvp( argv[0], (char **)argv ); + show_exec_error( DIFF ); + _exit( 2 ); + } + if( diff_pid < 0 ) // parent + { show_fork_error( DIFF ); return 2; } + + int retval = wait_for_child( diff_pid, DIFF ); + + for( int i = 0; i < 2; ++i ) + { + int infd; // fifo from decompressor + do infd = open( fifonames[i].c_str(), O_RDONLY | O_NONBLOCK | O_BINARY ); + while( infd < 0 && errno == EINTR ); + bool finished = false; // set to true if fifo is empty and at EOF + if( infd >= 0 ) + { + uint8_t b; + if( readblock( infd, &b, 1 ) <= 0 && errno == 0 ) finished = true; + close( infd ); + } + if( !good_status( children[i], finished ) ) retval = 2; + } + + for( int i = 0; i < 2; ++i ) + if( filenames[i] != "-" && close( infd[i] ) != 0 ) + { + show_file_error( filenames[i].c_str(), "Error closing input file", errno ); + retval = 2; + } + + return retval; + } |