summaryrefslogtreecommitdiffstats
path: root/src/cat.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 17:39:29 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 17:39:29 +0000
commit8ffec2a3aba6f114784e11f89ef1d57a096ae540 (patch)
treeccebcbad06203e8241a8e7249f8e6c478a3682ea /src/cat.c
parentInitial commit. (diff)
downloadcoreutils-8ffec2a3aba6f114784e11f89ef1d57a096ae540.tar.xz
coreutils-8ffec2a3aba6f114784e11f89ef1d57a096ae540.zip
Adding upstream version 8.32.upstream/8.32upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/cat.c')
-rw-r--r--src/cat.c767
1 files changed, 767 insertions, 0 deletions
diff --git a/src/cat.c b/src/cat.c
new file mode 100644
index 0000000..b132a7d
--- /dev/null
+++ b/src/cat.c
@@ -0,0 +1,767 @@
+/* cat -- concatenate files and print on the standard output.
+ Copyright (C) 1988-2020 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* Differences from the Unix cat:
+ * Always unbuffered, -u is ignored.
+ * Usually much faster than other versions of cat, the difference
+ is especially apparent when using the -v option.
+
+ By tege@sics.se, Torbjorn Granlund, advised by rms, Richard Stallman. */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+
+#if HAVE_STROPTS_H
+# include <stropts.h>
+#endif
+#include <sys/ioctl.h>
+
+#include "system.h"
+#include "ioblksize.h"
+#include "die.h"
+#include "error.h"
+#include "fadvise.h"
+#include "full-write.h"
+#include "safe-read.h"
+#include "xbinary-io.h"
+
+/* The official name of this program (e.g., no 'g' prefix). */
+#define PROGRAM_NAME "cat"
+
+#define AUTHORS \
+ proper_name ("Torbjorn Granlund"), \
+ proper_name ("Richard M. Stallman")
+
+/* Name of input file. May be "-". */
+static char const *infile;
+
+/* Descriptor on which input file is open. */
+static int input_desc;
+
+/* Buffer for line numbers.
+ An 11 digit counter may overflow within an hour on a P2/466,
+ an 18 digit counter needs about 1000y */
+#define LINE_COUNTER_BUF_LEN 20
+static char line_buf[LINE_COUNTER_BUF_LEN] =
+ {
+ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
+ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '0',
+ '\t', '\0'
+ };
+
+/* Position in 'line_buf' where printing starts. This will not change
+ unless the number of lines is larger than 999999. */
+static char *line_num_print = line_buf + LINE_COUNTER_BUF_LEN - 8;
+
+/* Position of the first digit in 'line_buf'. */
+static char *line_num_start = line_buf + LINE_COUNTER_BUF_LEN - 3;
+
+/* Position of the last digit in 'line_buf'. */
+static char *line_num_end = line_buf + LINE_COUNTER_BUF_LEN - 3;
+
+/* Preserves the 'cat' function's local 'newlines' between invocations. */
+static int newlines2 = 0;
+
+void
+usage (int status)
+{
+ if (status != EXIT_SUCCESS)
+ emit_try_help ();
+ else
+ {
+ printf (_("\
+Usage: %s [OPTION]... [FILE]...\n\
+"),
+ program_name);
+ fputs (_("\
+Concatenate FILE(s) to standard output.\n\
+"), stdout);
+
+ emit_stdin_note ();
+
+ fputs (_("\
+\n\
+ -A, --show-all equivalent to -vET\n\
+ -b, --number-nonblank number nonempty output lines, overrides -n\n\
+ -e equivalent to -vE\n\
+ -E, --show-ends display $ at end of each line\n\
+ -n, --number number all output lines\n\
+ -s, --squeeze-blank suppress repeated empty output lines\n\
+"), stdout);
+ fputs (_("\
+ -t equivalent to -vT\n\
+ -T, --show-tabs display TAB characters as ^I\n\
+ -u (ignored)\n\
+ -v, --show-nonprinting use ^ and M- notation, except for LFD and TAB\n\
+"), stdout);
+ fputs (HELP_OPTION_DESCRIPTION, stdout);
+ fputs (VERSION_OPTION_DESCRIPTION, stdout);
+ printf (_("\
+\n\
+Examples:\n\
+ %s f - g Output f's contents, then standard input, then g's contents.\n\
+ %s Copy standard input to standard output.\n\
+"),
+ program_name, program_name);
+ emit_ancillary_info (PROGRAM_NAME);
+ }
+ exit (status);
+}
+
+/* Compute the next line number. */
+
+static void
+next_line_num (void)
+{
+ char *endp = line_num_end;
+ do
+ {
+ if ((*endp)++ < '9')
+ return;
+ *endp-- = '0';
+ }
+ while (endp >= line_num_start);
+ if (line_num_start > line_buf)
+ *--line_num_start = '1';
+ else
+ *line_buf = '>';
+ if (line_num_start < line_num_print)
+ line_num_print--;
+}
+
+/* Plain cat. Copies the file behind 'input_desc' to STDOUT_FILENO.
+ Return true if successful. */
+
+static bool
+simple_cat (
+ /* Pointer to the buffer, used by reads and writes. */
+ char *buf,
+
+ /* Number of characters preferably read or written by each read and write
+ call. */
+ size_t bufsize)
+{
+ /* Actual number of characters read, and therefore written. */
+ size_t n_read;
+
+ /* Loop until the end of the file. */
+
+ while (true)
+ {
+ /* Read a block of input. */
+
+ n_read = safe_read (input_desc, buf, bufsize);
+ if (n_read == SAFE_READ_ERROR)
+ {
+ error (0, errno, "%s", quotef (infile));
+ return false;
+ }
+
+ /* End of this file? */
+
+ if (n_read == 0)
+ return true;
+
+ /* Write this block out. */
+
+ {
+ /* The following is ok, since we know that 0 < n_read. */
+ size_t n = n_read;
+ if (full_write (STDOUT_FILENO, buf, n) != n)
+ die (EXIT_FAILURE, errno, _("write error"));
+ }
+ }
+}
+
+/* Write any pending output to STDOUT_FILENO.
+ Pending is defined to be the *BPOUT - OUTBUF bytes starting at OUTBUF.
+ Then set *BPOUT to OUTPUT if it's not already that value. */
+
+static inline void
+write_pending (char *outbuf, char **bpout)
+{
+ size_t n_write = *bpout - outbuf;
+ if (0 < n_write)
+ {
+ if (full_write (STDOUT_FILENO, outbuf, n_write) != n_write)
+ die (EXIT_FAILURE, errno, _("write error"));
+ *bpout = outbuf;
+ }
+}
+
+/* Cat the file behind INPUT_DESC to the file behind OUTPUT_DESC.
+ Return true if successful.
+ Called if any option more than -u was specified.
+
+ A newline character is always put at the end of the buffer, to make
+ an explicit test for buffer end unnecessary. */
+
+static bool
+cat (
+ /* Pointer to the beginning of the input buffer. */
+ char *inbuf,
+
+ /* Number of characters read in each read call. */
+ size_t insize,
+
+ /* Pointer to the beginning of the output buffer. */
+ char *outbuf,
+
+ /* Number of characters written by each write call. */
+ size_t outsize,
+
+ /* Variables that have values according to the specified options. */
+ bool show_nonprinting,
+ bool show_tabs,
+ bool number,
+ bool number_nonblank,
+ bool show_ends,
+ bool squeeze_blank)
+{
+ /* Last character read from the input buffer. */
+ unsigned char ch;
+
+ /* Pointer to the next character in the input buffer. */
+ char *bpin;
+
+ /* Pointer to the first non-valid byte in the input buffer, i.e., the
+ current end of the buffer. */
+ char *eob;
+
+ /* Pointer to the position where the next character shall be written. */
+ char *bpout;
+
+ /* Number of characters read by the last read call. */
+ size_t n_read;
+
+ /* Determines how many consecutive newlines there have been in the
+ input. 0 newlines makes NEWLINES -1, 1 newline makes NEWLINES 1,
+ etc. Initially 0 to indicate that we are at the beginning of a
+ new line. The "state" of the procedure is determined by
+ NEWLINES. */
+ int newlines = newlines2;
+
+#ifdef FIONREAD
+ /* If nonzero, use the FIONREAD ioctl, as an optimization.
+ (On Ultrix, it is not supported on NFS file systems.) */
+ bool use_fionread = true;
+#endif
+
+ /* The inbuf pointers are initialized so that BPIN > EOB, and thereby input
+ is read immediately. */
+
+ eob = inbuf;
+ bpin = eob + 1;
+
+ bpout = outbuf;
+
+ while (true)
+ {
+ do
+ {
+ /* Write if there are at least OUTSIZE bytes in OUTBUF. */
+
+ if (outbuf + outsize <= bpout)
+ {
+ char *wp = outbuf;
+ size_t remaining_bytes;
+ do
+ {
+ if (full_write (STDOUT_FILENO, wp, outsize) != outsize)
+ die (EXIT_FAILURE, errno, _("write error"));
+ wp += outsize;
+ remaining_bytes = bpout - wp;
+ }
+ while (outsize <= remaining_bytes);
+
+ /* Move the remaining bytes to the beginning of the
+ buffer. */
+
+ memmove (outbuf, wp, remaining_bytes);
+ bpout = outbuf + remaining_bytes;
+ }
+
+ /* Is INBUF empty? */
+
+ if (bpin > eob)
+ {
+ bool input_pending = false;
+#ifdef FIONREAD
+ int n_to_read = 0;
+
+ /* Is there any input to read immediately?
+ If not, we are about to wait,
+ so write all buffered output before waiting. */
+
+ if (use_fionread
+ && ioctl (input_desc, FIONREAD, &n_to_read) < 0)
+ {
+ /* Ultrix returns EOPNOTSUPP on NFS;
+ HP-UX returns ENOTTY on pipes.
+ SunOS returns EINVAL and
+ More/BSD returns ENODEV on special files
+ like /dev/null.
+ Irix-5 returns ENOSYS on pipes. */
+ if (errno == EOPNOTSUPP || errno == ENOTTY
+ || errno == EINVAL || errno == ENODEV
+ || errno == ENOSYS)
+ use_fionread = false;
+ else
+ {
+ error (0, errno, _("cannot do ioctl on %s"),
+ quoteaf (infile));
+ newlines2 = newlines;
+ return false;
+ }
+ }
+ if (n_to_read != 0)
+ input_pending = true;
+#endif
+
+ if (!input_pending)
+ write_pending (outbuf, &bpout);
+
+ /* Read more input into INBUF. */
+
+ n_read = safe_read (input_desc, inbuf, insize);
+ if (n_read == SAFE_READ_ERROR)
+ {
+ error (0, errno, "%s", quotef (infile));
+ write_pending (outbuf, &bpout);
+ newlines2 = newlines;
+ return false;
+ }
+ if (n_read == 0)
+ {
+ write_pending (outbuf, &bpout);
+ newlines2 = newlines;
+ return true;
+ }
+
+ /* Update the pointers and insert a sentinel at the buffer
+ end. */
+
+ bpin = inbuf;
+ eob = bpin + n_read;
+ *eob = '\n';
+ }
+ else
+ {
+ /* It was a real (not a sentinel) newline. */
+
+ /* Was the last line empty?
+ (i.e., have two or more consecutive newlines been read?) */
+
+ if (++newlines > 0)
+ {
+ if (newlines >= 2)
+ {
+ /* Limit this to 2 here. Otherwise, with lots of
+ consecutive newlines, the counter could wrap
+ around at INT_MAX. */
+ newlines = 2;
+
+ /* Are multiple adjacent empty lines to be substituted
+ by single ditto (-s), and this was the second empty
+ line? */
+ if (squeeze_blank)
+ {
+ ch = *bpin++;
+ continue;
+ }
+ }
+
+ /* Are line numbers to be written at empty lines (-n)? */
+
+ if (number && !number_nonblank)
+ {
+ next_line_num ();
+ bpout = stpcpy (bpout, line_num_print);
+ }
+ }
+
+ /* Output a currency symbol if requested (-e). */
+
+ if (show_ends)
+ *bpout++ = '$';
+
+ /* Output the newline. */
+
+ *bpout++ = '\n';
+ }
+ ch = *bpin++;
+ }
+ while (ch == '\n');
+
+ /* Are we at the beginning of a line, and line numbers are requested? */
+
+ if (newlines >= 0 && number)
+ {
+ next_line_num ();
+ bpout = stpcpy (bpout, line_num_print);
+ }
+
+ /* Here CH cannot contain a newline character. */
+
+ /* The loops below continue until a newline character is found,
+ which means that the buffer is empty or that a proper newline
+ has been found. */
+
+ /* If quoting, i.e., at least one of -v, -e, or -t specified,
+ scan for chars that need conversion. */
+ if (show_nonprinting)
+ {
+ while (true)
+ {
+ if (ch >= 32)
+ {
+ if (ch < 127)
+ *bpout++ = ch;
+ else if (ch == 127)
+ {
+ *bpout++ = '^';
+ *bpout++ = '?';
+ }
+ else
+ {
+ *bpout++ = 'M';
+ *bpout++ = '-';
+ if (ch >= 128 + 32)
+ {
+ if (ch < 128 + 127)
+ *bpout++ = ch - 128;
+ else
+ {
+ *bpout++ = '^';
+ *bpout++ = '?';
+ }
+ }
+ else
+ {
+ *bpout++ = '^';
+ *bpout++ = ch - 128 + 64;
+ }
+ }
+ }
+ else if (ch == '\t' && !show_tabs)
+ *bpout++ = '\t';
+ else if (ch == '\n')
+ {
+ newlines = -1;
+ break;
+ }
+ else
+ {
+ *bpout++ = '^';
+ *bpout++ = ch + 64;
+ }
+
+ ch = *bpin++;
+ }
+ }
+ else
+ {
+ /* Not quoting, neither of -v, -e, or -t specified. */
+ while (true)
+ {
+ if (ch == '\t' && show_tabs)
+ {
+ *bpout++ = '^';
+ *bpout++ = ch + 64;
+ }
+ else if (ch != '\n')
+ *bpout++ = ch;
+ else
+ {
+ newlines = -1;
+ break;
+ }
+
+ ch = *bpin++;
+ }
+ }
+ }
+}
+
+int
+main (int argc, char **argv)
+{
+ /* Optimal size of i/o operations of output. */
+ size_t outsize;
+
+ /* Optimal size of i/o operations of input. */
+ size_t insize;
+
+ size_t page_size = getpagesize ();
+
+ /* Pointer to the input buffer. */
+ char *inbuf;
+
+ /* Pointer to the output buffer. */
+ char *outbuf;
+
+ bool ok = true;
+ int c;
+
+ /* Index in argv to processed argument. */
+ int argind;
+
+ /* Device number of the output (file or whatever). */
+ dev_t out_dev;
+
+ /* I-node number of the output. */
+ ino_t out_ino;
+
+ /* True if the output is a regular file. */
+ bool out_isreg;
+
+ /* Nonzero if we have ever read standard input. */
+ bool have_read_stdin = false;
+
+ struct stat stat_buf;
+
+ /* Variables that are set according to the specified options. */
+ bool number = false;
+ bool number_nonblank = false;
+ bool squeeze_blank = false;
+ bool show_ends = false;
+ bool show_nonprinting = false;
+ bool show_tabs = false;
+ int file_open_mode = O_RDONLY;
+
+ static struct option const long_options[] =
+ {
+ {"number-nonblank", no_argument, NULL, 'b'},
+ {"number", no_argument, NULL, 'n'},
+ {"squeeze-blank", no_argument, NULL, 's'},
+ {"show-nonprinting", no_argument, NULL, 'v'},
+ {"show-ends", no_argument, NULL, 'E'},
+ {"show-tabs", no_argument, NULL, 'T'},
+ {"show-all", no_argument, NULL, 'A'},
+ {GETOPT_HELP_OPTION_DECL},
+ {GETOPT_VERSION_OPTION_DECL},
+ {NULL, 0, NULL, 0}
+ };
+
+ initialize_main (&argc, &argv);
+ set_program_name (argv[0]);
+ setlocale (LC_ALL, "");
+ bindtextdomain (PACKAGE, LOCALEDIR);
+ textdomain (PACKAGE);
+
+ /* Arrange to close stdout if we exit via the
+ case_GETOPT_HELP_CHAR or case_GETOPT_VERSION_CHAR code.
+ Normally STDOUT_FILENO is used rather than stdout, so
+ close_stdout does nothing. */
+ atexit (close_stdout);
+
+ /* Parse command line options. */
+
+ while ((c = getopt_long (argc, argv, "benstuvAET", long_options, NULL))
+ != -1)
+ {
+ switch (c)
+ {
+ case 'b':
+ number = true;
+ number_nonblank = true;
+ break;
+
+ case 'e':
+ show_ends = true;
+ show_nonprinting = true;
+ break;
+
+ case 'n':
+ number = true;
+ break;
+
+ case 's':
+ squeeze_blank = true;
+ break;
+
+ case 't':
+ show_tabs = true;
+ show_nonprinting = true;
+ break;
+
+ case 'u':
+ /* We provide the -u feature unconditionally. */
+ break;
+
+ case 'v':
+ show_nonprinting = true;
+ break;
+
+ case 'A':
+ show_nonprinting = true;
+ show_ends = true;
+ show_tabs = true;
+ break;
+
+ case 'E':
+ show_ends = true;
+ break;
+
+ case 'T':
+ show_tabs = true;
+ break;
+
+ case_GETOPT_HELP_CHAR;
+
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
+
+ default:
+ usage (EXIT_FAILURE);
+ }
+ }
+
+ /* Get device, i-node number, and optimal blocksize of output. */
+
+ if (fstat (STDOUT_FILENO, &stat_buf) < 0)
+ die (EXIT_FAILURE, errno, _("standard output"));
+
+ outsize = io_blksize (stat_buf);
+ out_dev = stat_buf.st_dev;
+ out_ino = stat_buf.st_ino;
+ out_isreg = S_ISREG (stat_buf.st_mode) != 0;
+
+ if (! (number || show_ends || squeeze_blank))
+ {
+ file_open_mode |= O_BINARY;
+ xset_binary_mode (STDOUT_FILENO, O_BINARY);
+ }
+
+ /* Check if any of the input files are the same as the output file. */
+
+ /* Main loop. */
+
+ infile = "-";
+ argind = optind;
+
+ do
+ {
+ if (argind < argc)
+ infile = argv[argind];
+
+ if (STREQ (infile, "-"))
+ {
+ have_read_stdin = true;
+ input_desc = STDIN_FILENO;
+ if (file_open_mode & O_BINARY)
+ xset_binary_mode (STDIN_FILENO, O_BINARY);
+ }
+ else
+ {
+ input_desc = open (infile, file_open_mode);
+ if (input_desc < 0)
+ {
+ error (0, errno, "%s", quotef (infile));
+ ok = false;
+ continue;
+ }
+ }
+
+ if (fstat (input_desc, &stat_buf) < 0)
+ {
+ error (0, errno, "%s", quotef (infile));
+ ok = false;
+ goto contin;
+ }
+ insize = io_blksize (stat_buf);
+
+ fdadvise (input_desc, 0, 0, FADVISE_SEQUENTIAL);
+
+ /* Don't copy a nonempty regular file to itself, as that would
+ merely exhaust the output device. It's better to catch this
+ error earlier rather than later. */
+
+ if (out_isreg
+ && stat_buf.st_dev == out_dev && stat_buf.st_ino == out_ino
+ && lseek (input_desc, 0, SEEK_CUR) < stat_buf.st_size)
+ {
+ error (0, 0, _("%s: input file is output file"), quotef (infile));
+ ok = false;
+ goto contin;
+ }
+
+ /* Select which version of 'cat' to use. If any format-oriented
+ options were given use 'cat'; otherwise use 'simple_cat'. */
+
+ if (! (number || show_ends || show_nonprinting
+ || show_tabs || squeeze_blank))
+ {
+ insize = MAX (insize, outsize);
+ inbuf = xmalloc (insize + page_size - 1);
+
+ ok &= simple_cat (ptr_align (inbuf, page_size), insize);
+ }
+ else
+ {
+ inbuf = xmalloc (insize + 1 + page_size - 1);
+
+ /* Why are
+ (OUTSIZE - 1 + INSIZE * 4 + LINE_COUNTER_BUF_LEN + PAGE_SIZE - 1)
+ bytes allocated for the output buffer?
+
+ A test whether output needs to be written is done when the input
+ buffer empties or when a newline appears in the input. After
+ output is written, at most (OUTSIZE - 1) bytes will remain in the
+ buffer. Now INSIZE bytes of input is read. Each input character
+ may grow by a factor of 4 (by the prepending of M-^). If all
+ characters do, and no newlines appear in this block of input, we
+ will have at most (OUTSIZE - 1 + INSIZE * 4) bytes in the buffer.
+ If the last character in the preceding block of input was a
+ newline, a line number may be written (according to the given
+ options) as the first thing in the output buffer. (Done after the
+ new input is read, but before processing of the input begins.)
+ A line number requires seldom more than LINE_COUNTER_BUF_LEN
+ positions.
+
+ Align the output buffer to a page size boundary, for efficiency
+ on some paging implementations, so add PAGE_SIZE - 1 bytes to the
+ request to make room for the alignment. */
+
+ outbuf = xmalloc (outsize - 1 + insize * 4 + LINE_COUNTER_BUF_LEN
+ + page_size - 1);
+
+ ok &= cat (ptr_align (inbuf, page_size), insize,
+ ptr_align (outbuf, page_size), outsize, show_nonprinting,
+ show_tabs, number, number_nonblank, show_ends,
+ squeeze_blank);
+
+ free (outbuf);
+ }
+
+ free (inbuf);
+
+ contin:
+ if (!STREQ (infile, "-") && close (input_desc) < 0)
+ {
+ error (0, errno, "%s", quotef (infile));
+ ok = false;
+ }
+ }
+ while (++argind < argc);
+
+ if (have_read_stdin && close (STDIN_FILENO) < 0)
+ die (EXIT_FAILURE, errno, _("closing standard input"));
+
+ return ok ? EXIT_SUCCESS : EXIT_FAILURE;
+}