Adding upstream version 1.10.upstream/1.10 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 13:35:06 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 13:35:06 +0000
commit: f9be52fa859528b0439964589d03d85796275cdb (patch)
tree: 174763c6a2c37083bf3e81c8a9aca0b2eb40c9cc /zgrep.cc
parent: Initial commit. (diff)
download: zutils-f9be52fa859528b0439964589d03d85796275cdb.tar.xz
zutils-f9be52fa859528b0439964589d03d85796275cdb.zip
1 files changed, 401 insertions, 0 deletions
diff --git a/zgrep.cc b/zgrep.cc
new file mode 100644
index 0000000..1454e77
--- /dev/null
+++ b/zgrep.cc
@@ -0,0 +1,401 @@
+/* Zgrep - search compressed files for a regular expression
+   Copyright (C) 2010-2021 Antonio Diaz Diaz.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <cerrno>
+#include <climits>
+#include <csignal>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <list>
+#include <string>
+#include <vector>
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#if defined(__MSVCRT__) || defined(__OS2__)
+#include <io.h>
+#endif
+
+#include "arg_parser.h"
+#include "rc.h"
+#include "zutils.h"
+
+
+namespace {
+
+#include "recursive.cc"
+#include "zcatgrep.cc"
+
+void show_help()
+  {
+  std::printf( "zgrep is a front end to the program grep that allows transparent search\n"
+               "on any combination of compressed and uncompressed files. If any file\n"
+               "given is compressed, its decompressed content is used. If a file given\n"
+               "does not exist, and its name does not end with one of the known\n"
+               "extensions, zgrep tries the compressed file names corresponding to the\n"
+               "formats supported. If a file fails to decompress, zgrep continues\n"
+               "searching the rest of the files.\n"
+               "\nIf a file is specified as '-', data are read from standard input,\n"
+               "decompressed if needed, and fed to grep. Data read from standard input\n"
+               "must be of the same type; all uncompressed or all in the same\n"
+               "compressed format.\n"
+               "\nIf no files are specified, recursive searches examine the current\n"
+               "working directory, and nonrecursive searches read standard input.\n"
+               "\nThe formats supported are bzip2, gzip, lzip, and xz.\n"
+               "\nUsage: zgrep [options] <pattern> [files]\n"
+               "\nExit status is 0 if match, 1 if no match, 2 if trouble.\n"
+               "Some options only work if the grep program used supports them.\n"
+               "\nOptions:\n"
+               "      --help                   display this help and exit\n"
+               "  -V, --version                output version information and exit\n"
+               "  -a, --text                   treat all files as text\n"
+               "  -A, --after-context=<n>      print <n> lines of trailing context\n"
+               "  -b, --byte-offset            print the byte offset of each line\n"
+               "  -B, --before-context=<n>     print <n> lines of leading context\n"
+               "  -c, --count                  only print a count of matching lines per file\n"
+               "  -C, --context=<n>            print <n> lines of output context\n"
+               "      --color[=<when>]         show matched strings in color\n"
+               "  -e, --regexp=<pattern>       use <pattern> as the pattern to match\n"
+               "  -E, --extended-regexp        <pattern> is an extended regular expression\n"
+               "  -f, --file=<file>            obtain patterns from <file>\n"
+               "  -F, --fixed-strings          <pattern> is a set of newline-separated strings\n"
+               "  -h, --no-filename            suppress the prefixing filename on output\n"
+               "  -H, --with-filename          print the filename for each match\n"
+               "  -i, --ignore-case            ignore case distinctions\n"
+               "  -I                           ignore binary files\n"
+               "  -l, --files-with-matches     only print names of files containing matches\n"
+               "  -L, --files-without-match    only print names of files containing no matches\n"
+               "  -m, --max-count=<n>          stop after <n> matches\n"
+               "  -M, --format=<list>          process only the formats in <list>\n"
+               "  -n, --line-number            print the line number of each line\n"
+               "  -N, --no-rcfile              don't read runtime configuration file\n"
+               "  -o, --only-matching          show only the part of a line matching <pattern>\n"
+               "  -O, --force-format=<fmt>     force the format given (bz2, gz, lz, xz)\n"
+               "  -q, --quiet                  suppress all messages\n"
+               "  -r, --recursive              operate recursively on directories\n"
+               "  -R, --dereference-recursive  recursively follow symbolic links\n"
+               "  -s, --no-messages            suppress error messages\n"
+               "  -v, --invert-match           select non-matching lines\n"
+               "      --verbose                verbose mode (show error messages)\n"
+               "  -w, --word-regexp            match only whole words\n"
+               "  -x, --line-regexp            match only whole lines\n"
+               "      --bz2=<command>          set compressor and options for bzip2 format\n"
+               "      --gz=<command>           set compressor and options for gzip format\n"
+               "      --lz=<command>           set compressor and options for lzip format\n"
+               "      --xz=<command>           set compressor and options for xz format\n"
+               "\nNumbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
+               "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" );
+  show_help_addr();
+  }
+
+
+int zgrep_stdin( int infd, const int format_index,
+                 const std::vector< const char * > & grep_args )
+  {
+  Children children;
+  if( !set_data_feeder( "", &infd, children, format_index ) ) return 2;
+  const pid_t grep_pid = fork();
+  if( grep_pid == 0 )			// child (grep)
+    {
+    if( dup2( infd, STDIN_FILENO ) >= 0 && close( infd ) == 0 )
+      {
+      const char ** const argv = new const char *[grep_args.size()+2];
+      argv[0] = GREP;
+      for( unsigned i = 0; i < grep_args.size(); ++i )
+        argv[i+1] = grep_args[i];
+      argv[grep_args.size()+1] = 0;
+      execvp( argv[0], (char **)argv );
+      }
+    show_exec_error( GREP );
+    _exit( 2 );
+    }
+  if( grep_pid < 0 )			// parent
+    { show_fork_error( GREP ); return 2; }
+
+  int retval = wait_for_child( grep_pid, GREP );
+
+  if( !good_status( children, retval == 1 ) ) retval = 2;
+
+  if( close( infd ) != 0 )
+    { show_close_error(); return 2; }
+  return retval;
+  }
+
+
+int zgrep_file( int infd, const int format_index,
+                const std::string & input_filename,
+                const std::vector< const char * > & grep_args,
+                const int list_mode, const bool show_name )
+  {
+  Children children;
+  if( !set_data_feeder( input_filename, &infd, children, format_index ) )
+    return 2;
+  int fda[2];				// pipe from grep
+  if( pipe( fda ) < 0 )
+    { show_error( "Can't create pipe", errno ); return 2; }
+  const pid_t grep_pid = fork();
+  if( grep_pid == 0 )			// child (grep)
+    {
+    if( dup2( infd, STDIN_FILENO ) >= 0 &&
+        dup2( fda[1], STDOUT_FILENO ) >= 0 &&
+        close( infd ) == 0 && close( fda[0] ) == 0 && close( fda[1] ) == 0 )
+      {
+      const char ** const argv = new const char *[grep_args.size()+2];
+      argv[0] = GREP;
+      for( unsigned i = 0; i < grep_args.size(); ++i )
+        argv[i+1] = grep_args[i];
+      argv[grep_args.size()+1] = 0;
+      execvp( argv[0], (char **)argv );
+      }
+    show_exec_error( GREP );
+    _exit( 2 );
+    }
+  if( grep_pid < 0 )			// parent
+    { show_fork_error( GREP ); return 2; }
+
+  close( fda[1] );
+  enum { buffer_size = 256 };
+  uint8_t buffer[buffer_size];
+  bool line_begin = true;
+  while( true )
+    {
+    const int size = readblock( fda[0], buffer, buffer_size );
+    if( size != buffer_size && errno )
+      { show_error( "Read error", errno ); return 2; }
+    if( size > 0 && !list_mode )
+      {
+      if( show_name )
+        for( int i = 0; i < size; ++i )
+          {
+          if( line_begin )
+            { line_begin = false; std::printf( "%s:", input_filename.c_str() ); }
+          if( buffer[i] == '\n' ) line_begin = true;
+          putchar( buffer[i] );
+          }
+      else if( std::fwrite( buffer, 1, size, stdout ) != (unsigned)size )
+        { std::fflush( stdout ); show_error( "Write error", errno ); return 2; }
+      std::fflush( stdout );
+      }
+    if( size < buffer_size ) break;		// end of grep's output
+    }
+
+  int retval = wait_for_child( grep_pid, GREP );
+
+  if( !good_status( children, retval == 1 ) ) retval = 2;
+
+  if( list_mode && (retval == 0) == (list_mode == 1) )
+    { std::printf( "%s\n", input_filename.c_str() ); std::fflush( stdout ); }
+  if( close( infd ) != 0 )
+    { show_close_error(); return 2; }
+  if( close( fda[0] ) != 0 )
+    { show_close_error( GREP ); return 2; }
+  return retval;
+  }
+
+} // end namespace
+
+
+int main( const int argc, const char * const argv[] )
+  {
+  enum { help_opt = 256, verbose_opt, color_opt,
+         bz2_opt, gz_opt, lz_opt, xz_opt };
+  int format_index = -1;
+  int list_mode = 0;		// 1 = list matches, -1 = list non-matches
+  int recursive = 0;		// 1 = '-r', 2 = '-R'
+  int show_name = -1;		// tri-state bool
+  bool no_messages = false;
+  std::list< std::string > filenames;
+  std::vector< const char * > grep_args;	// args to grep, maybe empty
+  std::string color_option;		// needed because of optional arg
+  program_name = "zgrep";
+  invocation_name = ( argc > 0 ) ? argv[0] : program_name;
+
+  const Arg_parser::Option options[] =
+    {
+    { 'a', "text",                  Arg_parser::no  },	// grep GNU
+    { 'A', "after-context",         Arg_parser::yes },	// grep GNU
+    { 'b', "byte-offset",           Arg_parser::no  },	// grep GNU
+    { 'B', "before-context",        Arg_parser::yes },	// grep GNU
+    { 'c', "count",                 Arg_parser::no  },	// grep
+    { 'C', "context",               Arg_parser::yes },	// grep GNU
+    { 'e', "regexp",                Arg_parser::yes },	// grep
+    { 'E', "extended-regexp",       Arg_parser::no  },	// grep
+    { 'f', "file ",                 Arg_parser::yes },	// grep
+    { 'F', "fixed-strings",         Arg_parser::no  },	// grep
+    { 'h', "no-filename",           Arg_parser::no  },	// grep GNU
+    { 'H', "with-filename",         Arg_parser::no  },	// grep GNU
+    { 'i', "ignore-case",           Arg_parser::no  },	// grep
+    { 'I',  0,                      Arg_parser::no  },	// grep GNU
+    { 'l', "files-with-matches",    Arg_parser::no  },	// grep
+    { 'L', "files-without-match",   Arg_parser::no  },	// grep GNU
+    { 'm', "max-count",             Arg_parser::yes },	// grep GNU
+    { 'M', "format",                Arg_parser::yes },
+    { 'n', "line-number",           Arg_parser::no  },	// grep
+    { 'N', "no-rcfile",             Arg_parser::no  },
+    { 'o', "only-matching",         Arg_parser::no  },	// grep
+    { 'O', "force-format",          Arg_parser::yes },
+    { 'q', "quiet",                 Arg_parser::no  },
+    { 'r', "recursive",             Arg_parser::no  },
+    { 'R', "dereference-recursive", Arg_parser::no  },
+    { 's', "no-messages",           Arg_parser::no  },	// grep
+    { 'v', "invert-match",          Arg_parser::no  },	// grep
+    { 'V', "version",               Arg_parser::no  },
+    { 'w', "word-regexp",           Arg_parser::no  },	// grep GNU
+    { 'x', "line-regexp",           Arg_parser::no  },	// grep
+    { help_opt,    "help",          Arg_parser::no  },
+    { verbose_opt, "verbose",       Arg_parser::no  },
+    { color_opt,   "color",         Arg_parser::maybe },
+    { bz2_opt,     "bz2",           Arg_parser::yes },
+    { gz_opt,      "gz",            Arg_parser::yes },
+    { lz_opt,      "lz",            Arg_parser::yes },
+    { xz_opt,      "xz",            Arg_parser::yes },
+    {  0 ,  0,                      Arg_parser::no  } };
+
+  const Arg_parser parser( argc, argv, options );
+  if( parser.error().size() )				// bad option
+    { show_error( parser.error().c_str(), 0, true ); return 2; }
+
+  maybe_process_config_file( parser );
+
+  int argind = 0;
+  bool pattern_found = false;
+  for( ; argind < parser.arguments(); ++argind )
+    {
+    const int code = parser.code( argind );
+    if( !code ) break;					// no more options
+    const std::string & arg = parser.argument( argind );
+    switch( code )
+      {
+      case 'a': grep_args.push_back( "-a" ); break;
+      case 'A': grep_args.push_back( "-A" );
+                grep_args.push_back( arg.c_str() ); break;
+      case 'b': grep_args.push_back( "-b" ); break;
+      case 'B': grep_args.push_back( "-B" );
+                grep_args.push_back( arg.c_str() ); break;
+      case 'c': grep_args.push_back( "-c" ); break;
+      case 'C': grep_args.push_back( "-C" );
+                grep_args.push_back( arg.c_str() ); break;
+      case 'e': grep_args.push_back( "-e" );
+                grep_args.push_back( arg.c_str() ); pattern_found = true; break;
+      case 'E': grep_args.push_back( "-E" ); break;
+      case 'f': grep_args.push_back( "-f" );
+                grep_args.push_back( arg.c_str() ); pattern_found = true; break;
+      case 'F': grep_args.push_back( "-F" ); break;
+      case 'h': show_name = false; break;
+      case 'H': show_name = true; break;
+      case 'i': grep_args.push_back( "-i" ); break;
+      case 'I': grep_args.push_back( "-I" ); break;
+      case 'l': grep_args.push_back( "-l" ); list_mode = 1; break;
+      case 'L': grep_args.push_back( "-L" ); list_mode = -1; break;
+      case 'm': grep_args.push_back( "-m" );
+                grep_args.push_back( arg.c_str() ); break;
+      case 'M': parse_format_list( arg ); break;
+      case 'n': grep_args.push_back( "-n" ); break;
+      case 'N': break;
+      case 'o': grep_args.push_back( "-o" ); break;
+      case 'O': format_index = parse_format_type( arg ); break;
+      case 'q': grep_args.push_back( "-q" ); verbosity = -1; break;
+      case 'r': recursive = 1; break;
+      case 'R': recursive = 2; break;
+      case 's': grep_args.push_back( "-s" ); no_messages = true; break;
+      case 'v': grep_args.push_back( "-v" ); break;
+      case 'V': show_version(); return 0;
+      case 'w': grep_args.push_back( "-w" ); break;
+      case 'x': grep_args.push_back( "-x" ); break;
+      case help_opt   : show_help(); return 0;
+      case verbose_opt: if( verbosity < 4 ) ++verbosity;
+                        no_messages = false; break;
+      case color_opt: color_option = "--color";
+        if( !arg.empty() ) { color_option += '='; color_option += arg; }
+        break;
+      case bz2_opt: parse_compressor( arg, fmt_bz2 ); break;
+      case gz_opt: parse_compressor( arg, fmt_gz ); break;
+      case lz_opt: parse_compressor( arg, fmt_lz ); break;
+      case xz_opt: parse_compressor( arg, fmt_xz ); break;
+      default : internal_error( "uncaught option." );
+      }
+    } // end process options
+
+  if( !color_option.empty() )		// push the last value set
+    grep_args.push_back( color_option.c_str() );
+
+#if defined(__MSVCRT__) || defined(__OS2__)
+  setmode( STDIN_FILENO, O_BINARY );
+  setmode( STDOUT_FILENO, O_BINARY );
+#endif
+
+  if( !pattern_found )
+    {
+    if( argind >= parser.arguments() )
+      { show_error( "Pattern not found." ); return 2; }
+    const std::string & arg = parser.argument( argind++ );
+    if( arg.size() && arg[0] == '-' ) grep_args.push_back( "-e" );
+    grep_args.push_back( arg.c_str() );
+    }
+
+  for( ; argind < parser.arguments(); ++argind )
+    filenames.push_back( parser.argument( argind ) );
+
+  if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" );
+
+  if( show_name < 0 ) show_name = ( filenames.size() != 1 || recursive );
+
+  std::string input_filename;
+  int retval = 1;
+  bool error = false;
+  bool stdin_used = false;
+  while( next_filename( filenames, input_filename, error, recursive,
+                        false, no_messages ) )
+    {
+    int infd;
+    if( input_filename == "." )
+      {
+      if( stdin_used ) continue; else stdin_used = true;
+      infd = STDIN_FILENO; input_filename = "-";
+      }
+    else
+      {
+      infd = open_instream( input_filename, format_index < 0, no_messages );
+      if( infd < 0 ) { error = true; continue; }
+      }
+
+    int tmp;
+    if( infd == STDIN_FILENO )
+      tmp = zgrep_stdin( infd, format_index, grep_args );
+    else tmp = zgrep_file( infd, format_index, input_filename, grep_args,
+                           list_mode, show_name );
+    if( tmp == 0 || ( tmp == 2 && retval == 1 ) ) retval = tmp;
+
+    if( close( infd ) != 0 )
+      { show_file_error( input_filename.c_str(), "Error closing input file",
+                         errno ); error = true; }
+    if( retval == 0 && verbosity < 0 ) break;
+    }
+
+  if( std::fclose( stdout ) != 0 )
+    {
+    show_error( "Error closing stdout", errno );
+    error = true;
+    }
+  if( error && ( retval != 0 || verbosity >= 0 ) ) retval = 2;
+  return retval;
+  }
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 13:35:06 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 13:35:06 +0000
commit	f9be52fa859528b0439964589d03d85796275cdb (patch)
tree	174763c6a2c37083bf3e81c8a9aca0b2eb40c9cc /zgrep.cc
parent	Initial commit. (diff)
download	zutils-f9be52fa859528b0439964589d03d85796275cdb.tar.xz zutils-f9be52fa859528b0439964589d03d85796275cdb.zip