diff options
Diffstat (limited to 'zcat.cc')
-rw-r--r-- | zcat.cc | 392 |
1 files changed, 392 insertions, 0 deletions
@@ -0,0 +1,392 @@ +/* Zcat - decompress and concatenate files to standard output + Copyright (C) 2010-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <list> +#include <string> +#include <vector> +#include <dirent.h> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <sys/stat.h> +#if defined __MSVCRT__ || defined __OS2__ +#include <io.h> +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + + +namespace { + +#include "recursive.cc" +#include "zcatgrep.cc" + +struct Cat_options + { + int number_lines; // 0 = no, 1 = nonblank, 2 = all + bool show_ends; + bool show_nonprinting; + bool show_tabs; + bool squeeze_blank; + + Cat_options() + : number_lines( 0 ), show_ends( false ), show_nonprinting( false ), + show_tabs( false ), squeeze_blank( false ) {} + }; + + +class Line_number // unlimited size line counter + { + std::string str; + unsigned first_digit_pos; + +public: + Line_number() : str( " 0\t" ), first_digit_pos( 5 ) {} + + void next() + { + for( unsigned i = str.size() - 1; i > first_digit_pos; ) + { + if( str[--i] < '9' ) { ++str[i]; return; } + str[i] = '0'; + } + if( first_digit_pos > 0 ) str[--first_digit_pos] = '1'; + else str.insert( str.begin() + first_digit_pos, '1' ); + } + + int sprint( uint8_t * const buf ) + { + std::memcpy( buf, str.c_str(), str.size() ); + return str.size(); + } + }; + +Line_number line_number; + + +void show_help() + { + std::printf( "zcat copies each file argument to standard output in sequence. If any\n" + "file given is compressed, its decompressed content is copied. If a file\n" + "given does not exist, and its name does not end with one of the known\n" + "extensions, zcat tries the compressed file names corresponding to the\n" + "formats supported until one is found. If a file fails to decompress, zcat\n" + "continues copying the rest of the files.\n" + "\nIf a file is specified as '-', data are read from standard input,\n" + "decompressed if needed, and sent to standard output. Data read from\n" + "standard input must be of the same type; all uncompressed or all in the\n" + "same compressed format.\n" + "\nIf no files are specified, recursive searches examine the current\n" + "working directory, and nonrecursive searches read standard input.\n" + "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n" + "\nUsage: zcat [options] [files]\n" + "\nExit status is 0 if no errors occurred, 1 otherwise.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -A, --show-all equivalent to '-vET'\n" + " -b, --number-nonblank number nonblank output lines\n" + " -e equivalent to '-vE'\n" + " -E, --show-ends display '$' at end of each line\n" + " -M, --format=<list> process only the formats in <list>\n" + " -n, --number number all output lines\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -O, --force-format=<fmt> force the input format\n" + " -q, --quiet suppress all messages\n" + " -r, --recursive operate recursively on directories\n" + " -R, --dereference-recursive recursively follow symbolic links\n" + " -s, --squeeze-blank never more than one single blank line\n" + " -t equivalent to '-vT'\n" + " -T, --show-tabs display TAB characters as '^I'\n" + " -v, --show-nonprinting use '^' and 'M-' notation, except for LF and TAB\n" + " --verbose verbose mode (show error messages)\n" + " --bz2=<command> set compressor and options for bzip2 format\n" + " --gz=<command> set compressor and options for gzip format\n" + " --lz=<command> set compressor and options for lzip format\n" + " --xz=<command> set compressor and options for xz format\n" + " --zst=<command> set compressor and options for zstd format\n" + "\nValid formats for options '-M' and '-O' are 'bz2', 'gz', 'lz', 'xz', 'zst',\n" + "and 'un' for uncompressed.\n" ); + show_help_addr(); + } + + +bool do_cat( const int infd, const int buffer_size, + uint8_t * const inbuf, uint8_t * const outbuf, + const std::string & input_filename, + const Cat_options & cat_options ) + { + static int at_bol = 1; // at begin of line. 0 = false, 1 = true, + // 2 = at begin of second blank line. + int inpos = 0; // positions in buffers + int outpos = 0; + int rd = -1; // bytes read by the last readblock + unsigned char c; + + while( true ) + { + do { + if( outpos >= buffer_size ) + { + if( writeblock( STDOUT_FILENO, outbuf, outpos ) != outpos ) + { show_error( "Write error", errno ); return false; } + outpos = 0; + } + if( inpos > rd ) // inbuf is empty + { + rd = readblock( infd, inbuf, buffer_size ); + if( rd != buffer_size && errno ) + { + show_file_error( input_filename.c_str(), "Read error", errno ); + return false; + } + if( rd == 0 ) + { + if( writeblock( STDOUT_FILENO, outbuf, outpos ) != outpos ) + { show_error( "Write error", errno ); return false; } + outpos = 0; + return true; + } + inpos = 0; + inbuf[rd] = '\n'; // sentinel newline + } + else // a real newline was found + { + if( at_bol > 1 ) + { + if( cat_options.squeeze_blank ) { c = inbuf[inpos++]; continue; } + } + else ++at_bol; + if( at_bol > 1 && cat_options.number_lines == 2 ) + { + line_number.next(); + outpos += line_number.sprint( &outbuf[outpos] ); + } + if( cat_options.show_ends ) outbuf[outpos++] = '$'; + outbuf[outpos++] = '\n'; // output the newline + } + c = inbuf[inpos++]; + } + while( c == '\n' ); + + if( at_bol > 0 && cat_options.number_lines ) + { + line_number.next(); + outpos += line_number.sprint( &outbuf[outpos] ); + } + at_bol = 0; + + // the loops below continue until a newline (real or sentinel) is found + + if( cat_options.show_nonprinting ) + while( true ) + { + if( c < 32 || c >= 127 ) + { + if( c == '\n' ) break; + if( c != '\t' || cat_options.show_tabs ) + { + if( c >= 128 ) + { c -= 128; outbuf[outpos++] = 'M'; outbuf[outpos++] = '-'; } + if( c < 32 ) { c += 64; outbuf[outpos++] = '^'; } + else if( c == 127 ) { c = '?'; outbuf[outpos++] = '^'; } + } + } + outbuf[outpos++] = c; + c = inbuf[inpos++]; + } + else // not quoting + while( c != '\n' ) + { + if( c == '\t' && cat_options.show_tabs ) + { c += 64; outbuf[outpos++] = '^'; } + outbuf[outpos++] = c; + c = inbuf[inpos++]; + } + } + } + + +bool cat( int infd, const int format_index, const std::string & input_filename, + const Cat_options & cat_options ) + { + enum { buffer_size = 4096, outbuf_size = (5 * buffer_size) + 256 + 1 }; + // input buffer with space for sentinel newline at the end + uint8_t * const inbuf = new uint8_t[buffer_size+1]; + /* output buffer with space for character quoting, 255-digit line number, + worst case flushing respect to inbuf, and a canary byte. */ + uint8_t * const outbuf = new uint8_t[outbuf_size]; + outbuf[outbuf_size-1] = 0; // canary byte; quoting does not print 0 + Children children; + bool error = false; + + if( !set_data_feeder( input_filename, &infd, children, format_index ) || + !do_cat( infd, buffer_size, inbuf, outbuf, input_filename, cat_options ) ) + error = true; + if( !good_status( children, !error ) ) error = true; + if( !error && close( infd ) != 0 ) { show_close_error(); error = true; } + if( outbuf[outbuf_size-1] != 0 ) internal_error( "buffer overflow." ); + delete[] outbuf; delete[] inbuf; + return !error; + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { verbose_opt = 256, bz2_opt, gz_opt, lz_opt, xz_opt, zst_opt }; + int format_index = -1; // undefined + int recursive = 0; // 1 = '-r', 2 = '-R' + std::list< std::string > filenames; + Cat_options cat_options; + program_name = "zcat"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'A', "show-all", Arg_parser::no }, // cat + { 'b', "number-nonblank", Arg_parser::no }, // cat + { 'c', "stdout", Arg_parser::no }, // gzip + { 'd', "decompress", Arg_parser::no }, // gzip + { 'e', 0, Arg_parser::no }, // cat + { 'E', "show-ends", Arg_parser::no }, // cat + { 'f', "force", Arg_parser::no }, // gzip + { 'h', "help", Arg_parser::no }, + { 'l', "list", Arg_parser::no }, // gzip + { 'L', "license", Arg_parser::no }, // gzip + { 'M', "format", Arg_parser::yes }, + { 'n', "number", Arg_parser::no }, // cat + { 'N', "no-rcfile", Arg_parser::no }, + { 'O', "force-format", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "recursive", Arg_parser::no }, + { 'R', "dereference-recursive", Arg_parser::no }, + { 's', "squeeze-blank", Arg_parser::no }, // cat + { 't', 0, Arg_parser::no }, // cat + { 'T', "show-tabs", Arg_parser::no }, // cat + { 'v', "show-nonprinting", Arg_parser::no }, // cat + { 'V', "version", Arg_parser::no }, + { verbose_opt, "verbose", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { zst_opt, "zst", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 1; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case 'A': cat_options.show_ends = true; + cat_options.show_nonprinting = true; + cat_options.show_tabs = true; break; + case 'b': cat_options.number_lines = 1; break; + case 'c': break; + case 'd': break; + case 'e': cat_options.show_nonprinting = true; // fall through + case 'E': cat_options.show_ends = true; break; + case 'f': break; + case 'h': show_help(); return 0; + case 'l': break; + case 'L': break; + case 'M': parse_format_list( arg, pn ); break; + case 'n': if( cat_options.number_lines == 0 ) + { cat_options.number_lines = 2; } break; + case 'N': break; + case 'O': format_index = parse_format_type( arg, pn ); break; + case 'q': verbosity = -1; break; + case 'r': recursive = 1; break; + case 'R': recursive = 2; break; + case 's': cat_options.squeeze_blank = true; break; + case 't': cat_options.show_nonprinting = true; // fall through + case 'T': cat_options.show_tabs = true; break; + case 'v': cat_options.show_nonprinting = true; break; + case 'V': show_version(); return 0; + case verbose_opt: if( verbosity < 4 ) ++verbosity; break; + case bz2_opt: parse_compressor( arg, pn, fmt_bz2, 1 ); break; + case gz_opt: parse_compressor( arg, pn, fmt_gz, 1 ); break; + case lz_opt: parse_compressor( arg, pn, fmt_lz, 1 ); break; + case xz_opt: parse_compressor( arg, pn, fmt_xz, 1 ); break; + case zst_opt: parse_compressor( arg, pn, fmt_zst, 1 ); break; + default: internal_error( "uncaught option." ); + } + } // end process options + +#if defined __MSVCRT__ || defined __OS2__ + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + for( ; argind < parser.arguments(); ++argind ) + filenames.push_back( parser.argument( argind ) ); + + if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" ); + + std::string input_filename; + bool error = false; + bool stdin_used = false; + while( next_filename( filenames, input_filename, error, recursive ) ) + { + int infd; + if( input_filename == "." ) + { + if( stdin_used ) continue; else stdin_used = true; + infd = STDIN_FILENO; input_filename = "-"; + } + else + { + infd = open_instream( input_filename, format_index < 0 ); + if( infd < 0 ) { error = true; continue; } + } + + if( !cat( infd, format_index, input_filename, cat_options ) ) error = true; + + if( close( infd ) != 0 ) + { show_file_error( input_filename.c_str(), "Error closing input file", + errno ); error = true; } + } + + if( std::fclose( stdout ) != 0 ) + { + show_error( "Error closing stdout", errno ); + error = true; + } + return error; + } |