diff options
Diffstat (limited to 'zgrep.cc')
-rw-r--r-- | zgrep.cc | 401 |
1 files changed, 401 insertions, 0 deletions
diff --git a/zgrep.cc b/zgrep.cc new file mode 100644 index 0000000..1454e77 --- /dev/null +++ b/zgrep.cc @@ -0,0 +1,401 @@ +/* Zgrep - search compressed files for a regular expression + Copyright (C) 2010-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <list> +#include <string> +#include <vector> +#include <dirent.h> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <sys/stat.h> +#if defined(__MSVCRT__) || defined(__OS2__) +#include <io.h> +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + + +namespace { + +#include "recursive.cc" +#include "zcatgrep.cc" + +void show_help() + { + std::printf( "zgrep is a front end to the program grep that allows transparent search\n" + "on any combination of compressed and uncompressed files. If any file\n" + "given is compressed, its decompressed content is used. If a file given\n" + "does not exist, and its name does not end with one of the known\n" + "extensions, zgrep tries the compressed file names corresponding to the\n" + "formats supported. If a file fails to decompress, zgrep continues\n" + "searching the rest of the files.\n" + "\nIf a file is specified as '-', data are read from standard input,\n" + "decompressed if needed, and fed to grep. Data read from standard input\n" + "must be of the same type; all uncompressed or all in the same\n" + "compressed format.\n" + "\nIf no files are specified, recursive searches examine the current\n" + "working directory, and nonrecursive searches read standard input.\n" + "\nThe formats supported are bzip2, gzip, lzip, and xz.\n" + "\nUsage: zgrep [options] <pattern> [files]\n" + "\nExit status is 0 if match, 1 if no match, 2 if trouble.\n" + "Some options only work if the grep program used supports them.\n" + "\nOptions:\n" + " --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -a, --text treat all files as text\n" + " -A, --after-context=<n> print <n> lines of trailing context\n" + " -b, --byte-offset print the byte offset of each line\n" + " -B, --before-context=<n> print <n> lines of leading context\n" + " -c, --count only print a count of matching lines per file\n" + " -C, --context=<n> print <n> lines of output context\n" + " --color[=<when>] show matched strings in color\n" + " -e, --regexp=<pattern> use <pattern> as the pattern to match\n" + " -E, --extended-regexp <pattern> is an extended regular expression\n" + " -f, --file=<file> obtain patterns from <file>\n" + " -F, --fixed-strings <pattern> is a set of newline-separated strings\n" + " -h, --no-filename suppress the prefixing filename on output\n" + " -H, --with-filename print the filename for each match\n" + " -i, --ignore-case ignore case distinctions\n" + " -I ignore binary files\n" + " -l, --files-with-matches only print names of files containing matches\n" + " -L, --files-without-match only print names of files containing no matches\n" + " -m, --max-count=<n> stop after <n> matches\n" + " -M, --format=<list> process only the formats in <list>\n" + " -n, --line-number print the line number of each line\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -o, --only-matching show only the part of a line matching <pattern>\n" + " -O, --force-format=<fmt> force the format given (bz2, gz, lz, xz)\n" + " -q, --quiet suppress all messages\n" + " -r, --recursive operate recursively on directories\n" + " -R, --dereference-recursive recursively follow symbolic links\n" + " -s, --no-messages suppress error messages\n" + " -v, --invert-match select non-matching lines\n" + " --verbose verbose mode (show error messages)\n" + " -w, --word-regexp match only whole words\n" + " -x, --line-regexp match only whole lines\n" + " --bz2=<command> set compressor and options for bzip2 format\n" + " --gz=<command> set compressor and options for gzip format\n" + " --lz=<command> set compressor and options for lzip format\n" + " --xz=<command> set compressor and options for xz format\n" + "\nNumbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" + "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); + show_help_addr(); + } + + +int zgrep_stdin( int infd, const int format_index, + const std::vector< const char * > & grep_args ) + { + Children children; + if( !set_data_feeder( "", &infd, children, format_index ) ) return 2; + const pid_t grep_pid = fork(); + if( grep_pid == 0 ) // child (grep) + { + if( dup2( infd, STDIN_FILENO ) >= 0 && close( infd ) == 0 ) + { + const char ** const argv = new const char *[grep_args.size()+2]; + argv[0] = GREP; + for( unsigned i = 0; i < grep_args.size(); ++i ) + argv[i+1] = grep_args[i]; + argv[grep_args.size()+1] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( GREP ); + _exit( 2 ); + } + if( grep_pid < 0 ) // parent + { show_fork_error( GREP ); return 2; } + + int retval = wait_for_child( grep_pid, GREP ); + + if( !good_status( children, retval == 1 ) ) retval = 2; + + if( close( infd ) != 0 ) + { show_close_error(); return 2; } + return retval; + } + + +int zgrep_file( int infd, const int format_index, + const std::string & input_filename, + const std::vector< const char * > & grep_args, + const int list_mode, const bool show_name ) + { + Children children; + if( !set_data_feeder( input_filename, &infd, children, format_index ) ) + return 2; + int fda[2]; // pipe from grep + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return 2; } + const pid_t grep_pid = fork(); + if( grep_pid == 0 ) // child (grep) + { + if( dup2( infd, STDIN_FILENO ) >= 0 && + dup2( fda[1], STDOUT_FILENO ) >= 0 && + close( infd ) == 0 && close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + { + const char ** const argv = new const char *[grep_args.size()+2]; + argv[0] = GREP; + for( unsigned i = 0; i < grep_args.size(); ++i ) + argv[i+1] = grep_args[i]; + argv[grep_args.size()+1] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( GREP ); + _exit( 2 ); + } + if( grep_pid < 0 ) // parent + { show_fork_error( GREP ); return 2; } + + close( fda[1] ); + enum { buffer_size = 256 }; + uint8_t buffer[buffer_size]; + bool line_begin = true; + while( true ) + { + const int size = readblock( fda[0], buffer, buffer_size ); + if( size != buffer_size && errno ) + { show_error( "Read error", errno ); return 2; } + if( size > 0 && !list_mode ) + { + if( show_name ) + for( int i = 0; i < size; ++i ) + { + if( line_begin ) + { line_begin = false; std::printf( "%s:", input_filename.c_str() ); } + if( buffer[i] == '\n' ) line_begin = true; + putchar( buffer[i] ); + } + else if( std::fwrite( buffer, 1, size, stdout ) != (unsigned)size ) + { std::fflush( stdout ); show_error( "Write error", errno ); return 2; } + std::fflush( stdout ); + } + if( size < buffer_size ) break; // end of grep's output + } + + int retval = wait_for_child( grep_pid, GREP ); + + if( !good_status( children, retval == 1 ) ) retval = 2; + + if( list_mode && (retval == 0) == (list_mode == 1) ) + { std::printf( "%s\n", input_filename.c_str() ); std::fflush( stdout ); } + if( close( infd ) != 0 ) + { show_close_error(); return 2; } + if( close( fda[0] ) != 0 ) + { show_close_error( GREP ); return 2; } + return retval; + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { help_opt = 256, verbose_opt, color_opt, + bz2_opt, gz_opt, lz_opt, xz_opt }; + int format_index = -1; + int list_mode = 0; // 1 = list matches, -1 = list non-matches + int recursive = 0; // 1 = '-r', 2 = '-R' + int show_name = -1; // tri-state bool + bool no_messages = false; + std::list< std::string > filenames; + std::vector< const char * > grep_args; // args to grep, maybe empty + std::string color_option; // needed because of optional arg + program_name = "zgrep"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'a', "text", Arg_parser::no }, // grep GNU + { 'A', "after-context", Arg_parser::yes }, // grep GNU + { 'b', "byte-offset", Arg_parser::no }, // grep GNU + { 'B', "before-context", Arg_parser::yes }, // grep GNU + { 'c', "count", Arg_parser::no }, // grep + { 'C', "context", Arg_parser::yes }, // grep GNU + { 'e', "regexp", Arg_parser::yes }, // grep + { 'E', "extended-regexp", Arg_parser::no }, // grep + { 'f', "file ", Arg_parser::yes }, // grep + { 'F', "fixed-strings", Arg_parser::no }, // grep + { 'h', "no-filename", Arg_parser::no }, // grep GNU + { 'H', "with-filename", Arg_parser::no }, // grep GNU + { 'i', "ignore-case", Arg_parser::no }, // grep + { 'I', 0, Arg_parser::no }, // grep GNU + { 'l', "files-with-matches", Arg_parser::no }, // grep + { 'L', "files-without-match", Arg_parser::no }, // grep GNU + { 'm', "max-count", Arg_parser::yes }, // grep GNU + { 'M', "format", Arg_parser::yes }, + { 'n', "line-number", Arg_parser::no }, // grep + { 'N', "no-rcfile", Arg_parser::no }, + { 'o', "only-matching", Arg_parser::no }, // grep + { 'O', "force-format", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "recursive", Arg_parser::no }, + { 'R', "dereference-recursive", Arg_parser::no }, + { 's', "no-messages", Arg_parser::no }, // grep + { 'v', "invert-match", Arg_parser::no }, // grep + { 'V', "version", Arg_parser::no }, + { 'w', "word-regexp", Arg_parser::no }, // grep GNU + { 'x', "line-regexp", Arg_parser::no }, // grep + { help_opt, "help", Arg_parser::no }, + { verbose_opt, "verbose", Arg_parser::no }, + { color_opt, "color", Arg_parser::maybe }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { 0 , 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 2; } + + maybe_process_config_file( parser ); + + int argind = 0; + bool pattern_found = false; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case 'a': grep_args.push_back( "-a" ); break; + case 'A': grep_args.push_back( "-A" ); + grep_args.push_back( arg.c_str() ); break; + case 'b': grep_args.push_back( "-b" ); break; + case 'B': grep_args.push_back( "-B" ); + grep_args.push_back( arg.c_str() ); break; + case 'c': grep_args.push_back( "-c" ); break; + case 'C': grep_args.push_back( "-C" ); + grep_args.push_back( arg.c_str() ); break; + case 'e': grep_args.push_back( "-e" ); + grep_args.push_back( arg.c_str() ); pattern_found = true; break; + case 'E': grep_args.push_back( "-E" ); break; + case 'f': grep_args.push_back( "-f" ); + grep_args.push_back( arg.c_str() ); pattern_found = true; break; + case 'F': grep_args.push_back( "-F" ); break; + case 'h': show_name = false; break; + case 'H': show_name = true; break; + case 'i': grep_args.push_back( "-i" ); break; + case 'I': grep_args.push_back( "-I" ); break; + case 'l': grep_args.push_back( "-l" ); list_mode = 1; break; + case 'L': grep_args.push_back( "-L" ); list_mode = -1; break; + case 'm': grep_args.push_back( "-m" ); + grep_args.push_back( arg.c_str() ); break; + case 'M': parse_format_list( arg ); break; + case 'n': grep_args.push_back( "-n" ); break; + case 'N': break; + case 'o': grep_args.push_back( "-o" ); break; + case 'O': format_index = parse_format_type( arg ); break; + case 'q': grep_args.push_back( "-q" ); verbosity = -1; break; + case 'r': recursive = 1; break; + case 'R': recursive = 2; break; + case 's': grep_args.push_back( "-s" ); no_messages = true; break; + case 'v': grep_args.push_back( "-v" ); break; + case 'V': show_version(); return 0; + case 'w': grep_args.push_back( "-w" ); break; + case 'x': grep_args.push_back( "-x" ); break; + case help_opt : show_help(); return 0; + case verbose_opt: if( verbosity < 4 ) ++verbosity; + no_messages = false; break; + case color_opt: color_option = "--color"; + if( !arg.empty() ) { color_option += '='; color_option += arg; } + break; + case bz2_opt: parse_compressor( arg, fmt_bz2 ); break; + case gz_opt: parse_compressor( arg, fmt_gz ); break; + case lz_opt: parse_compressor( arg, fmt_lz ); break; + case xz_opt: parse_compressor( arg, fmt_xz ); break; + default : internal_error( "uncaught option." ); + } + } // end process options + + if( !color_option.empty() ) // push the last value set + grep_args.push_back( color_option.c_str() ); + +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + if( !pattern_found ) + { + if( argind >= parser.arguments() ) + { show_error( "Pattern not found." ); return 2; } + const std::string & arg = parser.argument( argind++ ); + if( arg.size() && arg[0] == '-' ) grep_args.push_back( "-e" ); + grep_args.push_back( arg.c_str() ); + } + + for( ; argind < parser.arguments(); ++argind ) + filenames.push_back( parser.argument( argind ) ); + + if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" ); + + if( show_name < 0 ) show_name = ( filenames.size() != 1 || recursive ); + + std::string input_filename; + int retval = 1; + bool error = false; + bool stdin_used = false; + while( next_filename( filenames, input_filename, error, recursive, + false, no_messages ) ) + { + int infd; + if( input_filename == "." ) + { + if( stdin_used ) continue; else stdin_used = true; + infd = STDIN_FILENO; input_filename = "-"; + } + else + { + infd = open_instream( input_filename, format_index < 0, no_messages ); + if( infd < 0 ) { error = true; continue; } + } + + int tmp; + if( infd == STDIN_FILENO ) + tmp = zgrep_stdin( infd, format_index, grep_args ); + else tmp = zgrep_file( infd, format_index, input_filename, grep_args, + list_mode, show_name ); + if( tmp == 0 || ( tmp == 2 && retval == 1 ) ) retval = tmp; + + if( close( infd ) != 0 ) + { show_file_error( input_filename.c_str(), "Error closing input file", + errno ); error = true; } + if( retval == 0 && verbosity < 0 ) break; + } + + if( std::fclose( stdout ) != 0 ) + { + show_error( "Error closing stdout", errno ); + error = true; + } + if( error && ( retval != 0 || verbosity >= 0 ) ) retval = 2; + return retval; + } |