summaryrefslogtreecommitdiffstats
path: root/zgrep.cc
blob: c642da7fd847dc9e6a43bff33d4f61953546e8f1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
/*  Zgrep - search compressed files for a regular expression
    Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

void show_zgrep_help()
  {
  std::printf( "Zgrep is a front end to the grep program that allows transparent search\n"
               "on any combination of compressed and non-compressed files. If any given\n"
               "file is compressed, its uncompressed content is used. If a given file\n"
               "does not exist, and its name does not end with one of the known\n"
               "extensions, zgrep tries the compressed file names corresponding to the\n"
               "supported formats. If no files are specified, data is read from\n"
               "standard input, decompressed if needed, and fed to grep. Data read from\n"
               "standard input must be of the same type; all uncompressed or all\n"
               "in the same compression format.\n"
               "\nThe supported formats are bzip2, gzip, lzip and xz.\n"
               "\nUsage: zgrep [options] <pattern> [files]\n"
               "\nExit status is 0 if match, 1 if no match, 2 if trouble.\n"
               "\nOptions:\n"
               "      --help                 display this help and exit\n"
               "  -V, --version              output version information and exit\n"
               "  -a, --text                 treat all files as text\n"
               "  -A, --after-context=<n>    print <n> lines of trailing context\n"
               "  -b, --byte-offset          print the byte offset of each line\n"
               "  -B, --before-context=<n>   print <n> lines of leading context\n"
               "  -c, --count                only print a count of matching lines per file\n"
               "  -C, --context=<n>          print <n> lines of output context\n"
               "  -e, --regexp=<pattern>     use <pattern> as the pattern to match\n"
               "  -E, --extended-regexp      <pattern> is an extended regular expression\n"
               "  -f, --file=<file>          obtain patterns from <file>\n"
               "  -F, --fixed-strings        <pattern> is a set of newline-separated strings\n"
               "      --format=<fmt>         force given format (bz2, gz, lz, xz)\n"
               "  -h, --no-filename          suppress the prefixing filename on output\n"
               "  -H, --with-filename        print the filename for each match\n"
               "  -i, --ignore-case          ignore case distinctions\n"
               "  -I                         ignore binary files\n"
               "  -l, --files-with-matches   only print names of files containing matches\n"
               "  -L, --files-without-match  only print names of files containing no matches\n"
               "  -m, --max-count=<n>        stop after <n> matches\n"
               "  -n, --line-number          print the line number of each line\n"
               "  -o, --only-matching        show only the part of a line matching <pattern>\n"
               "  -q, --quiet                suppress all messages\n"
               "  -r, --recursive            operate recursively on directories\n"
               "  -s, --no-messages          suppress error messages\n"
               "  -v, --invert-match         select non-matching lines\n"
               "      --verbose              verbose mode (show error messages)\n"
               "  -w, --word-regexp          match only whole words\n"
               "  -x, --line-regexp          match only whole lines\n" );
  show_help_addr();
  }


int zgrep_stdin( int infd, const int format_type,
                 const std::vector< const char * > & grep_args )
  {
  pid_t pid;
  if( !set_data_feeder( &infd, &pid, format_type ) ) return 2;
  const pid_t grep_pid = fork();
  if( grep_pid == 0 )			// child (grep)
    {
    if( dup2( infd, STDIN_FILENO ) >= 0 && close( infd ) == 0 )
      {
      const char ** const argv = new const char *[grep_args.size()+2];
      argv[0] = GREP;
      for( unsigned i = 0; i < grep_args.size(); ++i )
        argv[i+1] = grep_args[i];
      argv[grep_args.size()+1] = 0;
      execvp( argv[0], (char **)argv );
      }
    show_exec_error( GREP );
    _exit( 2 );
    }
					// parent
  if( grep_pid < 0 )
    { show_fork_error( GREP ); return 2; }

  int retval = wait_for_child( grep_pid, GREP );
  if( retval != 1 )
    { if( pid ) kill( pid, SIGTERM ); }
  else
    if( pid && wait_for_child( pid, "data feeder" ) != 0 ) retval = 2;
  if( close( infd ) != 0 )
    { show_close_error( "data feeder" ); return 2; }
  return retval;
  }


int zgrep_file( int infd, const int format_type,
                const std::string & input_filename,
                const std::vector< const char * > & grep_args,
                const bool grep_list, const bool grep_show_name )
  {
  pid_t pid;
  if( !set_data_feeder( &infd, &pid, format_type ) ) return 2;
  int fda[2];				// pipe from grep
  if( pipe( fda ) < 0 )
    { show_error( "Can't create pipe", errno ); return 2; }
  const pid_t grep_pid = fork();
  if( grep_pid == 0 )			// child (grep)
    {
    if( dup2( infd, STDIN_FILENO ) >= 0 &&
        dup2( fda[1], STDOUT_FILENO ) >= 0 &&
        close( infd ) == 0 && close( fda[0] ) == 0 && close( fda[1] ) == 0 )
      {
      const char ** const argv = new const char *[grep_args.size()+2];
      argv[0] = GREP;
      for( unsigned i = 0; i < grep_args.size(); ++i )
        argv[i+1] = grep_args[i];
      argv[grep_args.size()+1] = 0;
      execvp( argv[0], (char **)argv );
      }
    show_exec_error( GREP );
    _exit( 2 );
    }
					// parent
  close( fda[1] );
  if( grep_pid < 0 )
    { show_fork_error( GREP ); return 2; }
  enum { buffer_size = 256 };
  uint8_t buffer[buffer_size];
  bool line_begin = true;
  while( true )
    {
    const int size = readblock( fda[0], buffer, buffer_size );
    if( size != buffer_size && errno )
      { show_error( "Read error", errno ); return 2; }
    if( size > 0 && !grep_list )
      {
      if( grep_show_name )
        for( int i = 0; i < size; ++i )
          {
          if( line_begin )
            { line_begin = false; std::printf( "%s:", input_filename.c_str() ); }
          if( buffer[i] == '\n' ) line_begin = true;
          putchar( buffer[i] );
          }
      else if( std::fwrite( buffer, 1, size, stdout ) != (unsigned)size )
        { show_error( "Write error", errno ); return 2; }
      }
    if( size < buffer_size ) break;
    }

  int retval = wait_for_child( grep_pid, GREP );
  if( retval != 1 )
    { if( pid ) kill( pid, SIGTERM ); }
  else
    if( pid && wait_for_child( pid, "data feeder" ) != 0 ) retval = 2;
  if( grep_list && retval == 0 )
    std::printf( "%s\n", input_filename.c_str() );
  if( close( infd ) != 0 )
    { show_close_error( "data feeder" ); return 2; }
  if( close( fda[0] ) != 0 )
    { show_close_error( GREP ); return 2; }
  return retval;
  }