diff options
Diffstat (limited to 'src/tac.c')
-rw-r--r-- | src/tac.c | 586 |
1 files changed, 586 insertions, 0 deletions
diff --git a/src/tac.c b/src/tac.c new file mode 100644 index 0000000..4c36558 --- /dev/null +++ b/src/tac.c @@ -0,0 +1,586 @@ +/* tac - concatenate and print files in reverse + Copyright (C) 1988-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* Written by Jay Lepreau (lepreau@cs.utah.edu). + GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */ + +/* Copy each FILE, or the standard input if none are given or when a + FILE name of "-" is encountered, to the standard output with the + order of the records reversed. The records are separated by + instances of a string, or a newline if none is given. By default, the + separator string is attached to the end of the record that it + follows in the file. + + Options: + -b, --before The separator is attached to the beginning + of the record that it precedes in the file. + -r, --regex The separator is a regular expression. + -s, --separator=separator Use SEPARATOR as the record separator. + + To reverse a file byte by byte, use (in bash, ksh, or sh): +tac -r -s '.\| +' file */ + +#include <config.h> + +#include <stdio.h> +#include <getopt.h> +#include <sys/types.h> +#include "system.h" + +#include <regex.h> + +#include "filenamecat.h" +#include "full-read.h" +#include "safe-read.h" +#include "temp-stream.h" +#include "xbinary-io.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "tac" + +#define AUTHORS \ + proper_name ("Jay Lepreau"), \ + proper_name ("David MacKenzie") + + +/* The number of bytes per atomic read. */ +#define INITIAL_READSIZE 8192 + +/* The number of bytes per atomic write. */ +#define WRITESIZE 8192 + +/* The string that separates the records of the file. */ +static char const *separator; + +/* True if we have ever read standard input. */ +static bool have_read_stdin = false; + +/* If true, print 'separator' along with the record preceding it + in the file; otherwise with the record following it. */ +static bool separator_ends_record; + +/* 0 if 'separator' is to be matched as a regular expression; + otherwise, the length of 'separator', used as a sentinel to + stop the search. */ +static size_t sentinel_length; + +/* The length of a match with 'separator'. If 'sentinel_length' is 0, + 'match_length' is computed every time a match succeeds; + otherwise, it is simply the length of 'separator'. */ +static size_t match_length; + +/* The input buffer. */ +static char *G_buffer; + +/* The number of bytes to read at once into 'buffer'. */ +static size_t read_size; + +/* The size of 'buffer'. This is read_size * 2 + sentinel_length + 2. + The extra 2 bytes allow 'past_end' to have a value beyond the + end of 'G_buffer' and 'match_start' to run off the front of 'G_buffer'. */ +static size_t G_buffer_size; + +/* The compiled regular expression representing 'separator'. */ +static struct re_pattern_buffer compiled_separator; +static char compiled_separator_fastmap[UCHAR_MAX + 1]; +static struct re_registers regs; + +static struct option const longopts[] = +{ + {"before", no_argument, nullptr, 'b'}, + {"regex", no_argument, nullptr, 'r'}, + {"separator", required_argument, nullptr, 's'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ +"), + program_name); + fputs (_("\ +Write each FILE to standard output, last line first.\n\ +"), stdout); + + emit_stdin_note (); + emit_mandatory_arg_note (); + + fputs (_("\ + -b, --before attach the separator before instead of after\n\ + -r, --regex interpret the separator as a regular expression\n\ + -s, --separator=STRING use STRING as the separator instead of newline\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Print the characters from START to PAST_END - 1. + If START is null, just flush the buffer. */ + +static void +output (char const *start, char const *past_end) +{ + static char buffer[WRITESIZE]; + static size_t bytes_in_buffer = 0; + size_t bytes_to_add = past_end - start; + size_t bytes_available = WRITESIZE - bytes_in_buffer; + + if (start == 0) + { + fwrite (buffer, 1, bytes_in_buffer, stdout); + bytes_in_buffer = 0; + return; + } + + /* Write out as many full buffers as possible. */ + while (bytes_to_add >= bytes_available) + { + memcpy (buffer + bytes_in_buffer, start, bytes_available); + bytes_to_add -= bytes_available; + start += bytes_available; + fwrite (buffer, 1, WRITESIZE, stdout); + bytes_in_buffer = 0; + bytes_available = WRITESIZE; + } + + memcpy (buffer + bytes_in_buffer, start, bytes_to_add); + bytes_in_buffer += bytes_to_add; +} + +/* Print in reverse the file open on descriptor FD for reading FILE. + The file is already positioned at FILE_POS, which should be near its end. + Return true if successful. */ + +static bool +tac_seekable (int input_fd, char const *file, off_t file_pos) +{ + /* Pointer to the location in 'G_buffer' where the search for + the next separator will begin. */ + char *match_start; + + /* Pointer to one past the rightmost character in 'G_buffer' that + has not been printed yet. */ + char *past_end; + + /* Length of the record growing in 'G_buffer'. */ + size_t saved_record_size; + + /* True if 'output' has not been called yet for any file. + Only used when the separator is attached to the preceding record. */ + bool first_time = true; + char first_char = *separator; /* Speed optimization, non-regexp. */ + char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */ + size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */ + + /* Arrange for the first read to lop off enough to leave the rest of the + file a multiple of 'read_size'. Since 'read_size' can change, this may + not always hold during the program run, but since it usually will, leave + it here for i/o efficiency (page/sector boundaries and all that). + Note: the efficiency gain has not been verified. */ + size_t remainder = file_pos % read_size; + if (remainder != 0) + { + file_pos -= remainder; + if (lseek (input_fd, file_pos, SEEK_SET) < 0) + error (0, errno, _("%s: seek failed"), quotef (file)); + } + + /* Scan backward, looking for end of file. This caters to proc-like + file systems where the file size is just an estimate. */ + while ((saved_record_size = safe_read (input_fd, G_buffer, read_size)) == 0 + && file_pos != 0) + { + off_t rsize = read_size; + if (lseek (input_fd, -rsize, SEEK_CUR) < 0) + error (0, errno, _("%s: seek failed"), quotef (file)); + file_pos -= read_size; + } + + /* Now scan forward, looking for end of file. */ + while (saved_record_size == read_size) + { + size_t nread = safe_read (input_fd, G_buffer, read_size); + if (nread == 0) + break; + saved_record_size = nread; + if (saved_record_size == SAFE_READ_ERROR) + break; + file_pos += nread; + } + + if (saved_record_size == SAFE_READ_ERROR) + { + error (0, errno, _("%s: read error"), quotef (file)); + return false; + } + + match_start = past_end = G_buffer + saved_record_size; + /* For non-regexp search, move past impossible positions for a match. */ + if (sentinel_length) + match_start -= match_length1; + + while (true) + { + /* Search backward from 'match_start' - 1 to 'G_buffer' for a match + with 'separator'; for speed, use strncmp if 'separator' contains no + metacharacters. + If the match succeeds, set 'match_start' to point to the start of + the match and 'match_length' to the length of the match. + Otherwise, make 'match_start' < 'G_buffer'. */ + if (sentinel_length == 0) + { + size_t i = match_start - G_buffer; + regoff_t ri = i; + regoff_t range = 1 - ri; + regoff_t ret; + + if (1 < range) + error (EXIT_FAILURE, 0, _("record too large")); + + if (range == 1 + || ((ret = re_search (&compiled_separator, G_buffer, + i, i - 1, range, ®s)) + == -1)) + match_start = G_buffer - 1; + else if (ret == -2) + error (EXIT_FAILURE, 0, + _("error in regular expression search")); + else + { + match_start = G_buffer + regs.start[0]; + match_length = regs.end[0] - regs.start[0]; + } + } + else + { + /* 'match_length' is constant for non-regexp boundaries. */ + while (*--match_start != first_char + || (match_length1 && !STREQ_LEN (match_start + 1, separator1, + match_length1))) + /* Do nothing. */ ; + } + + /* Check whether we backed off the front of 'G_buffer' without finding + a match for 'separator'. */ + if (match_start < G_buffer) + { + if (file_pos == 0) + { + /* Hit the beginning of the file; print the remaining record. */ + output (G_buffer, past_end); + return true; + } + + saved_record_size = past_end - G_buffer; + if (saved_record_size > read_size) + { + /* 'G_buffer_size' is about twice 'read_size', so since + we want to read in another 'read_size' bytes before + the data already in 'G_buffer', we need to increase + 'G_buffer_size'. */ + char *newbuffer; + size_t offset = sentinel_length ? sentinel_length : 1; + size_t old_G_buffer_size = G_buffer_size; + + read_size *= 2; + G_buffer_size = read_size * 2 + sentinel_length + 2; + if (G_buffer_size < old_G_buffer_size) + xalloc_die (); + newbuffer = xrealloc (G_buffer - offset, G_buffer_size); + newbuffer += offset; + G_buffer = newbuffer; + } + + /* Back up to the start of the next bufferfull of the file. */ + if (file_pos >= read_size) + file_pos -= read_size; + else + { + read_size = file_pos; + file_pos = 0; + } + if (lseek (input_fd, file_pos, SEEK_SET) < 0) + error (0, errno, _("%s: seek failed"), quotef (file)); + + /* Shift the pending record data right to make room for the new. + The source and destination regions probably overlap. */ + memmove (G_buffer + read_size, G_buffer, saved_record_size); + past_end = G_buffer + read_size + saved_record_size; + /* For non-regexp searches, avoid unnecessary scanning. */ + if (sentinel_length) + match_start = G_buffer + read_size; + else + match_start = past_end; + + if (full_read (input_fd, G_buffer, read_size) != read_size) + { + error (0, errno, _("%s: read error"), quotef (file)); + return false; + } + } + else + { + /* Found a match of 'separator'. */ + if (separator_ends_record) + { + char *match_end = match_start + match_length; + + /* If this match of 'separator' isn't at the end of the + file, print the record. */ + if (!first_time || match_end != past_end) + output (match_end, past_end); + past_end = match_end; + first_time = false; + } + else + { + output (match_start, past_end); + past_end = match_start; + } + + /* For non-regex matching, we can back up. */ + if (sentinel_length > 0) + match_start -= match_length - 1; + } + } +} + +/* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to + a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream + and file name. Return the number of bytes copied, or -1 on error. */ + +static off_t +copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file) +{ + FILE *fp; + char *file_name; + uintmax_t bytes_copied = 0; + if (!temp_stream (&fp, &file_name)) + return -1; + + while (true) + { + size_t bytes_read = safe_read (input_fd, G_buffer, read_size); + if (bytes_read == 0) + break; + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, _("%s: read error"), quotef (file)); + return -1; + } + + if (fwrite (G_buffer, 1, bytes_read, fp) != bytes_read) + { + error (0, errno, _("%s: write error"), quotef (file_name)); + return -1; + } + + /* Implicitly <= OFF_T_MAX due to preceding fwrite(), + but unsigned type used to avoid compiler warnings + not aware of this fact. */ + bytes_copied += bytes_read; + } + + if (fflush (fp) != 0) + { + error (0, errno, _("%s: write error"), quotef (file_name)); + return -1; + } + + *g_tmp = fp; + *g_tempfile = file_name; + return bytes_copied; +} + +/* Copy INPUT_FD to a temporary, then tac that file. + Return true if successful. */ + +static bool +tac_nonseekable (int input_fd, char const *file) +{ + FILE *tmp_stream; + char *tmp_file; + off_t bytes_copied = copy_to_temp (&tmp_stream, &tmp_file, input_fd, file); + if (bytes_copied < 0) + return false; + + bool ok = tac_seekable (fileno (tmp_stream), tmp_file, bytes_copied); + return ok; +} + +/* Print FILE in reverse, copying it to a temporary + file first if it is not seekable. + Return true if successful. */ + +static bool +tac_file (char const *filename) +{ + bool ok; + off_t file_size; + int fd; + bool is_stdin = STREQ (filename, "-"); + + if (is_stdin) + { + have_read_stdin = true; + fd = STDIN_FILENO; + filename = _("standard input"); + xset_binary_mode (STDIN_FILENO, O_BINARY); + } + else + { + fd = open (filename, O_RDONLY | O_BINARY); + if (fd < 0) + { + error (0, errno, _("failed to open %s for reading"), + quoteaf (filename)); + return false; + } + } + + file_size = lseek (fd, 0, SEEK_END); + + ok = (file_size < 0 || isatty (fd) + ? tac_nonseekable (fd, filename) + : tac_seekable (fd, filename, file_size)); + + if (!is_stdin && close (fd) != 0) + { + error (0, errno, _("%s: read error"), quotef (filename)); + ok = false; + } + return ok; +} + +int +main (int argc, char **argv) +{ + char const *error_message; /* Return value from re_compile_pattern. */ + int optc; + bool ok; + size_t half_buffer_size; + + /* Initializer for file_list if no file-arguments + were specified on the command line. */ + static char const *const default_file_list[] = {"-", nullptr}; + char const *const *file; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + separator = "\n"; + sentinel_length = 1; + separator_ends_record = true; + + while ((optc = getopt_long (argc, argv, "brs:", longopts, nullptr)) != -1) + { + switch (optc) + { + case 'b': + separator_ends_record = false; + break; + case 'r': + sentinel_length = 0; + break; + case 's': + separator = optarg; + break; + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + } + + if (sentinel_length == 0) + { + if (*separator == 0) + error (EXIT_FAILURE, 0, _("separator cannot be empty")); + + compiled_separator.buffer = nullptr; + compiled_separator.allocated = 0; + compiled_separator.fastmap = compiled_separator_fastmap; + compiled_separator.translate = nullptr; + error_message = re_compile_pattern (separator, strlen (separator), + &compiled_separator); + if (error_message) + error (EXIT_FAILURE, 0, "%s", (error_message)); + } + else + match_length = sentinel_length = *separator ? strlen (separator) : 1; + + read_size = INITIAL_READSIZE; + while (sentinel_length >= read_size / 2) + { + if (SIZE_MAX / 2 < read_size) + xalloc_die (); + read_size *= 2; + } + half_buffer_size = read_size + sentinel_length + 1; + G_buffer_size = 2 * half_buffer_size; + if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size)) + xalloc_die (); + G_buffer = xmalloc (G_buffer_size); + if (sentinel_length) + { + memcpy (G_buffer, separator, sentinel_length + 1); + G_buffer += sentinel_length; + } + else + { + ++G_buffer; + } + + file = (optind < argc + ? (char const *const *) &argv[optind] + : default_file_list); + + xset_binary_mode (STDOUT_FILENO, O_BINARY); + + { + ok = true; + for (size_t i = 0; file[i]; ++i) + ok &= tac_file (file[i]); + } + + /* Flush the output buffer. */ + output ((char *) nullptr, (char *) nullptr); + + if (have_read_stdin && close (STDIN_FILENO) < 0) + { + error (0, errno, "-"); + ok = false; + } + + main_exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); +} |