summaryrefslogtreecommitdiffstats
path: root/zcat.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--zcat.cc392
1 files changed, 392 insertions, 0 deletions
diff --git a/zcat.cc b/zcat.cc
new file mode 100644
index 0000000..e055fea
--- /dev/null
+++ b/zcat.cc
@@ -0,0 +1,392 @@
+/* Zcat - decompress and concatenate files to standard output
+ Copyright (C) 2010-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <cerrno>
+#include <climits>
+#include <csignal>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <list>
+#include <string>
+#include <vector>
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#if defined __MSVCRT__ || defined __OS2__
+#include <io.h>
+#endif
+
+#include "arg_parser.h"
+#include "rc.h"
+#include "zutils.h"
+
+
+namespace {
+
+#include "recursive.cc"
+#include "zcatgrep.cc"
+
+struct Cat_options
+ {
+ int number_lines; // 0 = no, 1 = nonblank, 2 = all
+ bool show_ends;
+ bool show_nonprinting;
+ bool show_tabs;
+ bool squeeze_blank;
+
+ Cat_options()
+ : number_lines( 0 ), show_ends( false ), show_nonprinting( false ),
+ show_tabs( false ), squeeze_blank( false ) {}
+ };
+
+
+class Line_number // unlimited size line counter
+ {
+ std::string str;
+ unsigned first_digit_pos;
+
+public:
+ Line_number() : str( " 0\t" ), first_digit_pos( 5 ) {}
+
+ void next()
+ {
+ for( unsigned i = str.size() - 1; i > first_digit_pos; )
+ {
+ if( str[--i] < '9' ) { ++str[i]; return; }
+ str[i] = '0';
+ }
+ if( first_digit_pos > 0 ) str[--first_digit_pos] = '1';
+ else str.insert( str.begin() + first_digit_pos, '1' );
+ }
+
+ int sprint( uint8_t * const buf )
+ {
+ std::memcpy( buf, str.c_str(), str.size() );
+ return str.size();
+ }
+ };
+
+Line_number line_number;
+
+
+void show_help()
+ {
+ std::printf( "zcat copies each file argument to standard output in sequence. If any\n"
+ "file given is compressed, its decompressed content is copied. If a file\n"
+ "given does not exist, and its name does not end with one of the known\n"
+ "extensions, zcat tries the compressed file names corresponding to the\n"
+ "formats supported until one is found. If a file fails to decompress, zcat\n"
+ "continues copying the rest of the files.\n"
+ "\nIf a file is specified as '-', data are read from standard input,\n"
+ "decompressed if needed, and sent to standard output. Data read from\n"
+ "standard input must be of the same type; all uncompressed or all in the\n"
+ "same compressed format.\n"
+ "\nIf no files are specified, recursive searches examine the current\n"
+ "working directory, and nonrecursive searches read standard input.\n"
+ "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n"
+ "\nUsage: zcat [options] [files]\n"
+ "\nExit status is 0 if no errors occurred, 1 otherwise.\n"
+ "\nOptions:\n"
+ " -h, --help display this help and exit\n"
+ " -V, --version output version information and exit\n"
+ " -A, --show-all equivalent to '-vET'\n"
+ " -b, --number-nonblank number nonblank output lines\n"
+ " -e equivalent to '-vE'\n"
+ " -E, --show-ends display '$' at end of each line\n"
+ " -M, --format=<list> process only the formats in <list>\n"
+ " -n, --number number all output lines\n"
+ " -N, --no-rcfile don't read runtime configuration file\n"
+ " -O, --force-format=<fmt> force the input format\n"
+ " -q, --quiet suppress all messages\n"
+ " -r, --recursive operate recursively on directories\n"
+ " -R, --dereference-recursive recursively follow symbolic links\n"
+ " -s, --squeeze-blank never more than one single blank line\n"
+ " -t equivalent to '-vT'\n"
+ " -T, --show-tabs display TAB characters as '^I'\n"
+ " -v, --show-nonprinting use '^' and 'M-' notation, except for LF and TAB\n"
+ " --verbose verbose mode (show error messages)\n"
+ " --bz2=<command> set compressor and options for bzip2 format\n"
+ " --gz=<command> set compressor and options for gzip format\n"
+ " --lz=<command> set compressor and options for lzip format\n"
+ " --xz=<command> set compressor and options for xz format\n"
+ " --zst=<command> set compressor and options for zstd format\n"
+ "\nValid formats for options '-M' and '-O' are 'bz2', 'gz', 'lz', 'xz', 'zst',\n"
+ "and 'un' for uncompressed.\n" );
+ show_help_addr();
+ }
+
+
+bool do_cat( const int infd, const int buffer_size,
+ uint8_t * const inbuf, uint8_t * const outbuf,
+ const std::string & input_filename,
+ const Cat_options & cat_options )
+ {
+ static int at_bol = 1; // at begin of line. 0 = false, 1 = true,
+ // 2 = at begin of second blank line.
+ int inpos = 0; // positions in buffers
+ int outpos = 0;
+ int rd = -1; // bytes read by the last readblock
+ unsigned char c;
+
+ while( true )
+ {
+ do {
+ if( outpos >= buffer_size )
+ {
+ if( writeblock( STDOUT_FILENO, outbuf, outpos ) != outpos )
+ { show_error( "Write error", errno ); return false; }
+ outpos = 0;
+ }
+ if( inpos > rd ) // inbuf is empty
+ {
+ rd = readblock( infd, inbuf, buffer_size );
+ if( rd != buffer_size && errno )
+ {
+ show_file_error( input_filename.c_str(), "Read error", errno );
+ return false;
+ }
+ if( rd == 0 )
+ {
+ if( writeblock( STDOUT_FILENO, outbuf, outpos ) != outpos )
+ { show_error( "Write error", errno ); return false; }
+ outpos = 0;
+ return true;
+ }
+ inpos = 0;
+ inbuf[rd] = '\n'; // sentinel newline
+ }
+ else // a real newline was found
+ {
+ if( at_bol > 1 )
+ {
+ if( cat_options.squeeze_blank ) { c = inbuf[inpos++]; continue; }
+ }
+ else ++at_bol;
+ if( at_bol > 1 && cat_options.number_lines == 2 )
+ {
+ line_number.next();
+ outpos += line_number.sprint( &outbuf[outpos] );
+ }
+ if( cat_options.show_ends ) outbuf[outpos++] = '$';
+ outbuf[outpos++] = '\n'; // output the newline
+ }
+ c = inbuf[inpos++];
+ }
+ while( c == '\n' );
+
+ if( at_bol > 0 && cat_options.number_lines )
+ {
+ line_number.next();
+ outpos += line_number.sprint( &outbuf[outpos] );
+ }
+ at_bol = 0;
+
+ // the loops below continue until a newline (real or sentinel) is found
+
+ if( cat_options.show_nonprinting )
+ while( true )
+ {
+ if( c < 32 || c >= 127 )
+ {
+ if( c == '\n' ) break;
+ if( c != '\t' || cat_options.show_tabs )
+ {
+ if( c >= 128 )
+ { c -= 128; outbuf[outpos++] = 'M'; outbuf[outpos++] = '-'; }
+ if( c < 32 ) { c += 64; outbuf[outpos++] = '^'; }
+ else if( c == 127 ) { c = '?'; outbuf[outpos++] = '^'; }
+ }
+ }
+ outbuf[outpos++] = c;
+ c = inbuf[inpos++];
+ }
+ else // not quoting
+ while( c != '\n' )
+ {
+ if( c == '\t' && cat_options.show_tabs )
+ { c += 64; outbuf[outpos++] = '^'; }
+ outbuf[outpos++] = c;
+ c = inbuf[inpos++];
+ }
+ }
+ }
+
+
+bool cat( int infd, const int format_index, const std::string & input_filename,
+ const Cat_options & cat_options )
+ {
+ enum { buffer_size = 4096, outbuf_size = (5 * buffer_size) + 256 + 1 };
+ // input buffer with space for sentinel newline at the end
+ uint8_t * const inbuf = new uint8_t[buffer_size+1];
+ /* output buffer with space for character quoting, 255-digit line number,
+ worst case flushing respect to inbuf, and a canary byte. */
+ uint8_t * const outbuf = new uint8_t[outbuf_size];
+ outbuf[outbuf_size-1] = 0; // canary byte; quoting does not print 0
+ Children children;
+ bool error = false;
+
+ if( !set_data_feeder( input_filename, &infd, children, format_index ) ||
+ !do_cat( infd, buffer_size, inbuf, outbuf, input_filename, cat_options ) )
+ error = true;
+ if( !good_status( children, !error ) ) error = true;
+ if( !error && close( infd ) != 0 ) { show_close_error(); error = true; }
+ if( outbuf[outbuf_size-1] != 0 ) internal_error( "buffer overflow." );
+ delete[] outbuf; delete[] inbuf;
+ return !error;
+ }
+
+} // end namespace
+
+
+int main( const int argc, const char * const argv[] )
+ {
+ enum { verbose_opt = 256, bz2_opt, gz_opt, lz_opt, xz_opt, zst_opt };
+ int format_index = -1; // undefined
+ int recursive = 0; // 1 = '-r', 2 = '-R'
+ std::list< std::string > filenames;
+ Cat_options cat_options;
+ program_name = "zcat";
+ invocation_name = ( argc > 0 ) ? argv[0] : program_name;
+
+ const Arg_parser::Option options[] =
+ {
+ { 'A', "show-all", Arg_parser::no }, // cat
+ { 'b', "number-nonblank", Arg_parser::no }, // cat
+ { 'c', "stdout", Arg_parser::no }, // gzip
+ { 'd', "decompress", Arg_parser::no }, // gzip
+ { 'e', 0, Arg_parser::no }, // cat
+ { 'E', "show-ends", Arg_parser::no }, // cat
+ { 'f', "force", Arg_parser::no }, // gzip
+ { 'h', "help", Arg_parser::no },
+ { 'l', "list", Arg_parser::no }, // gzip
+ { 'L', "license", Arg_parser::no }, // gzip
+ { 'M', "format", Arg_parser::yes },
+ { 'n', "number", Arg_parser::no }, // cat
+ { 'N', "no-rcfile", Arg_parser::no },
+ { 'O', "force-format", Arg_parser::yes },
+ { 'q', "quiet", Arg_parser::no },
+ { 'r', "recursive", Arg_parser::no },
+ { 'R', "dereference-recursive", Arg_parser::no },
+ { 's', "squeeze-blank", Arg_parser::no }, // cat
+ { 't', 0, Arg_parser::no }, // cat
+ { 'T', "show-tabs", Arg_parser::no }, // cat
+ { 'v', "show-nonprinting", Arg_parser::no }, // cat
+ { 'V', "version", Arg_parser::no },
+ { verbose_opt, "verbose", Arg_parser::no },
+ { bz2_opt, "bz2", Arg_parser::yes },
+ { gz_opt, "gz", Arg_parser::yes },
+ { lz_opt, "lz", Arg_parser::yes },
+ { xz_opt, "xz", Arg_parser::yes },
+ { zst_opt, "zst", Arg_parser::yes },
+ { 0, 0, Arg_parser::no } };
+
+ const Arg_parser parser( argc, argv, options );
+ if( parser.error().size() ) // bad option
+ { show_error( parser.error().c_str(), 0, true ); return 1; }
+
+ maybe_process_config_file( parser );
+
+ int argind = 0;
+ for( ; argind < parser.arguments(); ++argind )
+ {
+ const int code = parser.code( argind );
+ if( !code ) break; // no more options
+ const char * const pn = parser.parsed_name( argind ).c_str();
+ const std::string & arg = parser.argument( argind );
+ switch( code )
+ {
+ case 'A': cat_options.show_ends = true;
+ cat_options.show_nonprinting = true;
+ cat_options.show_tabs = true; break;
+ case 'b': cat_options.number_lines = 1; break;
+ case 'c': break;
+ case 'd': break;
+ case 'e': cat_options.show_nonprinting = true; // fall through
+ case 'E': cat_options.show_ends = true; break;
+ case 'f': break;
+ case 'h': show_help(); return 0;
+ case 'l': break;
+ case 'L': break;
+ case 'M': parse_format_list( arg, pn ); break;
+ case 'n': if( cat_options.number_lines == 0 )
+ { cat_options.number_lines = 2; } break;
+ case 'N': break;
+ case 'O': format_index = parse_format_type( arg, pn ); break;
+ case 'q': verbosity = -1; break;
+ case 'r': recursive = 1; break;
+ case 'R': recursive = 2; break;
+ case 's': cat_options.squeeze_blank = true; break;
+ case 't': cat_options.show_nonprinting = true; // fall through
+ case 'T': cat_options.show_tabs = true; break;
+ case 'v': cat_options.show_nonprinting = true; break;
+ case 'V': show_version(); return 0;
+ case verbose_opt: if( verbosity < 4 ) ++verbosity; break;
+ case bz2_opt: parse_compressor( arg, pn, fmt_bz2, 1 ); break;
+ case gz_opt: parse_compressor( arg, pn, fmt_gz, 1 ); break;
+ case lz_opt: parse_compressor( arg, pn, fmt_lz, 1 ); break;
+ case xz_opt: parse_compressor( arg, pn, fmt_xz, 1 ); break;
+ case zst_opt: parse_compressor( arg, pn, fmt_zst, 1 ); break;
+ default: internal_error( "uncaught option." );
+ }
+ } // end process options
+
+#if defined __MSVCRT__ || defined __OS2__
+ setmode( STDIN_FILENO, O_BINARY );
+ setmode( STDOUT_FILENO, O_BINARY );
+#endif
+
+ for( ; argind < parser.arguments(); ++argind )
+ filenames.push_back( parser.argument( argind ) );
+
+ if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" );
+
+ std::string input_filename;
+ bool error = false;
+ bool stdin_used = false;
+ while( next_filename( filenames, input_filename, error, recursive ) )
+ {
+ int infd;
+ if( input_filename == "." )
+ {
+ if( stdin_used ) continue; else stdin_used = true;
+ infd = STDIN_FILENO; input_filename = "-";
+ }
+ else
+ {
+ infd = open_instream( input_filename, format_index < 0 );
+ if( infd < 0 ) { error = true; continue; }
+ }
+
+ if( !cat( infd, format_index, input_filename, cat_options ) ) error = true;
+
+ if( close( infd ) != 0 )
+ { show_file_error( input_filename.c_str(), "Error closing input file",
+ errno ); error = true; }
+ }
+
+ if( std::fclose( stdout ) != 0 )
+ {
+ show_error( "Error closing stdout", errno );
+ error = true;
+ }
+ return error;
+ }