summaryrefslogtreecommitdiffstats
path: root/src/csplit.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/csplit.c')
-rw-r--r--src/csplit.c1492
1 files changed, 1492 insertions, 0 deletions
diff --git a/src/csplit.c b/src/csplit.c
new file mode 100644
index 0000000..8f21414
--- /dev/null
+++ b/src/csplit.c
@@ -0,0 +1,1492 @@
+/* csplit - split a file into sections determined by context lines
+ Copyright (C) 1991-2022 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
+ Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
+
+#include <config.h>
+
+#include <assert.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#include "system.h"
+
+#include <regex.h>
+
+#include "die.h"
+#include "error.h"
+#include "fd-reopen.h"
+#include "idx.h"
+#include "quote.h"
+#include "safe-read.h"
+#include "stdio--.h"
+#include "xdectoint.h"
+#include "xstrtol.h"
+
+/* The official name of this program (e.g., no 'g' prefix). */
+#define PROGRAM_NAME "csplit"
+
+#define AUTHORS \
+ proper_name ("Stuart Kemp"), \
+ proper_name ("David MacKenzie")
+
+/* The default prefix for output file names. */
+#define DEFAULT_PREFIX "xx"
+
+/* A compiled pattern arg. */
+struct control
+{
+ intmax_t offset; /* Offset from regexp to split at. */
+ intmax_t lines_required; /* Number of lines required. */
+ intmax_t repeat; /* Repeat count. */
+ int argnum; /* ARGV index. */
+ bool repeat_forever; /* True if '*' used as a repeat count. */
+ bool ignore; /* If true, produce no output (for regexp). */
+ bool regexpr; /* True if regular expression was used. */
+ struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
+};
+
+/* Initial size of data area in buffers. */
+#define START_SIZE 8191
+
+/* Number of lines kept in each node in line list. */
+#define CTRL_SIZE 80
+
+#ifdef DEBUG
+/* Some small values to test the algorithms. */
+# define START_SIZE 200
+# define CTRL_SIZE 1
+#endif
+
+/* A string with a length count. */
+struct cstring
+{
+ idx_t len;
+ char *str;
+};
+
+/* Pointers to the beginnings of lines in the buffer area.
+ These structures are linked together if needed. */
+struct line
+{
+ idx_t used; /* Number of offsets used in this struct. */
+ idx_t insert_index; /* Next offset to use when inserting line. */
+ idx_t retrieve_index; /* Next index to use when retrieving line. */
+ struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
+ struct line *next; /* Next in linked list. */
+};
+
+/* The structure to hold the input lines.
+ Contains a pointer to the data area and a list containing
+ pointers to the individual lines. */
+struct buffer_record
+{
+ idx_t bytes_alloc; /* Size of the buffer area. */
+ idx_t bytes_used; /* Bytes used in the buffer area. */
+ intmax_t start_line; /* First line number in this buffer. */
+ intmax_t first_available; /* First line that can be retrieved. */
+ idx_t num_lines; /* Number of complete lines in this buffer. */
+ char *buffer; /* Data area. */
+ struct line *line_start; /* Head of list of pointers to lines. */
+ struct line *curr_line; /* The line start record currently in use. */
+ struct buffer_record *next;
+};
+
+static void close_output_file (void);
+static void create_output_file (void);
+static void delete_all_files (bool);
+static void save_line_to_file (const struct cstring *line);
+
+/* Start of buffer list. */
+static struct buffer_record *head = NULL;
+
+/* Partially read line. */
+static char *hold_area = NULL;
+
+/* Number of bytes in 'hold_area'. */
+static idx_t hold_count = 0;
+
+/* Number of the last line in the buffers. */
+static intmax_t last_line_number = 0;
+
+/* Number of the line currently being examined. */
+static intmax_t current_line = 0;
+
+/* If true, we have read EOF. */
+static bool have_read_eof = false;
+
+/* Name of output files. */
+static char *volatile filename_space = NULL;
+
+/* Prefix part of output file names. */
+static char const *volatile prefix = NULL;
+
+/* Suffix part of output file names. */
+static char *volatile suffix = NULL;
+
+/* Number of digits to use in output file names. */
+static int volatile digits = 2;
+
+/* Number of files created so far. */
+static int volatile files_created = 0;
+
+/* Number of bytes written to current file. */
+static intmax_t bytes_written;
+
+/* Output file pointer. */
+static FILE *output_stream = NULL;
+
+/* Output file name. */
+static char *output_filename = NULL;
+
+/* Perhaps it would be cleaner to pass arg values instead of indexes. */
+static char **global_argv;
+
+/* If true, do not print the count of bytes in each output file. */
+static bool suppress_count;
+
+/* If true, remove output files on error. */
+static bool volatile remove_files;
+
+/* If true, remove all output files which have a zero length. */
+static bool elide_empty_files;
+
+/* If true, suppress the lines that match the PATTERN */
+static bool suppress_matched;
+
+/* The compiled pattern arguments, which determine how to split
+ the input file. */
+static struct control *controls;
+
+/* Number of elements in 'controls'. */
+static idx_t control_used;
+
+/* The set of signals that are caught. */
+static sigset_t caught_signals;
+
+/* For long options that have no equivalent short option, use a
+ non-character as a pseudo short option, starting with CHAR_MAX + 1. */
+enum
+{
+ SUPPRESS_MATCHED_OPTION = CHAR_MAX + 1
+};
+
+static struct option const longopts[] =
+{
+ {"digits", required_argument, NULL, 'n'},
+ {"quiet", no_argument, NULL, 'q'},
+ {"silent", no_argument, NULL, 's'},
+ {"keep-files", no_argument, NULL, 'k'},
+ {"elide-empty-files", no_argument, NULL, 'z'},
+ {"prefix", required_argument, NULL, 'f'},
+ {"suffix-format", required_argument, NULL, 'b'},
+ {"suppress-matched", no_argument, NULL, SUPPRESS_MATCHED_OPTION},
+ {GETOPT_HELP_OPTION_DECL},
+ {GETOPT_VERSION_OPTION_DECL},
+ {NULL, 0, NULL, 0}
+};
+
+/* Optionally remove files created so far; then exit.
+ Called when an error detected. */
+
+static void
+cleanup (void)
+{
+ sigset_t oldset;
+
+ close_output_file ();
+
+ sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
+ delete_all_files (false);
+ sigprocmask (SIG_SETMASK, &oldset, NULL);
+}
+
+static _Noreturn void
+cleanup_fatal (void)
+{
+ cleanup ();
+ exit (EXIT_FAILURE);
+}
+
+extern void
+xalloc_die (void)
+{
+ error (0, 0, "%s", _("memory exhausted"));
+ cleanup_fatal ();
+}
+
+static void
+interrupt_handler (int sig)
+{
+ delete_all_files (true);
+ signal (sig, SIG_DFL);
+ /* The signal has been reset to SIG_DFL, but blocked during this
+ handler. Force the default action of this signal once the
+ handler returns and the block is removed. */
+ raise (sig);
+}
+
+/* Keep track of NUM bytes of a partial line in buffer START.
+ These bytes will be retrieved later when another large buffer is read. */
+
+static void
+save_to_hold_area (char *start, idx_t num)
+{
+ free (hold_area);
+ hold_area = start;
+ hold_count = num;
+}
+
+/* Read up to MAX_N_BYTES bytes from the input stream into DEST.
+ Return the number of bytes read. */
+
+static idx_t
+read_input (char *dest, idx_t max_n_bytes)
+{
+ idx_t bytes_read;
+
+ if (max_n_bytes == 0)
+ return 0;
+
+ bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes);
+
+ if (bytes_read == 0)
+ have_read_eof = true;
+
+ if (bytes_read == SAFE_READ_ERROR)
+ {
+ error (0, errno, _("read error"));
+ cleanup_fatal ();
+ }
+
+ return bytes_read;
+}
+
+/* Initialize existing line record P. */
+
+static void
+clear_line_control (struct line *p)
+{
+ p->used = 0;
+ p->insert_index = 0;
+ p->retrieve_index = 0;
+}
+
+/* Return a new, initialized line record. */
+
+static struct line *
+new_line_control (void)
+{
+ struct line *p = xmalloc (sizeof *p);
+
+ p->next = NULL;
+ clear_line_control (p);
+
+ return p;
+}
+
+/* Record LINE_START, which is the address of the start of a line
+ of length LINE_LEN in the large buffer, in the lines buffer of B. */
+
+static void
+keep_new_line (struct buffer_record *b, char *line_start, idx_t line_len)
+{
+ struct line *l;
+
+ /* If there is no existing area to keep line info, get some. */
+ if (b->line_start == NULL)
+ b->line_start = b->curr_line = new_line_control ();
+
+ /* If existing area for lines is full, get more. */
+ if (b->curr_line->used == CTRL_SIZE)
+ {
+ b->curr_line->next = new_line_control ();
+ b->curr_line = b->curr_line->next;
+ }
+
+ l = b->curr_line;
+
+ /* Record the start of the line, and update counters. */
+ l->starts[l->insert_index].str = line_start;
+ l->starts[l->insert_index].len = line_len;
+ l->used++;
+ l->insert_index++;
+}
+
+/* Scan the buffer in B for newline characters
+ and record the line start locations and lengths in B.
+ Return the number of lines found in this buffer.
+
+ There may be an incomplete line at the end of the buffer;
+ a pointer is kept to this area, which will be used when
+ the next buffer is filled. */
+
+static idx_t
+record_line_starts (struct buffer_record *b)
+{
+ char *line_start; /* Start of current line. */
+ idx_t lines; /* Number of lines found. */
+ idx_t line_length; /* Length of each line found. */
+
+ if (b->bytes_used == 0)
+ return 0;
+
+ lines = 0;
+ line_start = b->buffer;
+ char *buffer_end = line_start + b->bytes_used;
+ *buffer_end = '\n';
+
+ while (true)
+ {
+ char *line_end = rawmemchr (line_start, '\n');
+ if (line_end == buffer_end)
+ break;
+ line_length = line_end - line_start + 1;
+ keep_new_line (b, line_start, line_length);
+ line_start = line_end + 1;
+ lines++;
+ }
+
+ /* Check for an incomplete last line. */
+ idx_t bytes_left = buffer_end - line_start;
+ if (bytes_left)
+ {
+ if (have_read_eof)
+ {
+ keep_new_line (b, line_start, bytes_left);
+ lines++;
+ }
+ else
+ save_to_hold_area (ximemdup (line_start, bytes_left), bytes_left);
+ }
+
+ b->num_lines = lines;
+ b->first_available = b->start_line = last_line_number + 1;
+ last_line_number += lines;
+
+ return lines;
+}
+
+static void
+free_buffer (struct buffer_record *buf)
+{
+ for (struct line *l = buf->line_start; l;)
+ {
+ struct line *n = l->next;
+ free (l);
+ l = n;
+ }
+ free (buf->buffer);
+ free (buf);
+}
+
+/* Return a new buffer of at least MINSIZE bytes. */
+
+static ATTRIBUTE_DEALLOC (free_buffer, 1)
+struct buffer_record *
+get_new_buffer (idx_t min_size)
+{
+ struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
+ new_buffer->bytes_alloc = 0;
+ new_buffer->buffer = xpalloc (NULL, &new_buffer->bytes_alloc, min_size,
+ -1, 1);
+ new_buffer->bytes_used = 0;
+ new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
+ new_buffer->num_lines = 0;
+ new_buffer->line_start = new_buffer->curr_line = NULL;
+ new_buffer->next = NULL;
+
+ return new_buffer;
+}
+
+/* Append buffer BUF to the linked list of buffers that contain
+ some data yet to be processed. */
+
+static void
+save_buffer (struct buffer_record *buf)
+{
+ struct buffer_record *p;
+
+ buf->next = NULL;
+ buf->curr_line = buf->line_start;
+
+ if (head == NULL)
+ head = buf;
+ else
+ {
+ for (p = head; p->next; p = p->next)
+ /* Do nothing. */ ;
+ p->next = buf;
+ }
+}
+
+/* Fill a buffer of input.
+
+ Set the initial size of the buffer to a default.
+ Fill the buffer (from the hold area and input stream)
+ and find the individual lines.
+ If no lines are found (the buffer is too small to hold the next line),
+ release the current buffer (whose contents would have been put in the
+ hold area) and repeat the process with another large buffer until at least
+ one entire line has been read.
+
+ Return true if a new buffer was obtained, otherwise false
+ (in which case end-of-file must have been encountered). */
+
+static bool
+load_buffer (void)
+{
+ struct buffer_record *b;
+ idx_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
+ idx_t bytes_avail; /* Size of new buffer created. */
+ idx_t lines_found; /* Number of lines in this new buffer. */
+ char *p; /* Place to load into buffer. */
+
+ if (have_read_eof)
+ return false;
+
+ /* We must make the buffer at least as large as the amount of data
+ in the partial line left over from the last call,
+ plus room for a sentinel '\n'. */
+ if (bytes_wanted <= hold_count)
+ bytes_wanted = hold_count + 1;
+
+ while (true)
+ {
+ b = get_new_buffer (bytes_wanted);
+ bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
+ p = b->buffer;
+
+ /* First check the 'holding' area for a partial line. */
+ if (hold_count)
+ {
+ memcpy (p, hold_area, hold_count);
+ p += hold_count;
+ b->bytes_used += hold_count;
+ bytes_avail -= hold_count;
+ hold_count = 0;
+ }
+
+ b->bytes_used += read_input (p, bytes_avail - 1);
+
+ lines_found = record_line_starts (b);
+
+ if (lines_found || have_read_eof)
+ break;
+
+ if (INT_MULTIPLY_WRAPV (b->bytes_alloc, 2, &bytes_wanted))
+ xalloc_die ();
+ free_buffer (b);
+ }
+
+ if (lines_found)
+ save_buffer (b);
+ else
+ free_buffer (b);
+
+ return lines_found != 0;
+}
+
+/* Return the line number of the first line that has not yet been retrieved. */
+
+static intmax_t
+get_first_line_in_buffer (void)
+{
+ if (head == NULL && !load_buffer ())
+ die (EXIT_FAILURE, errno, _("input disappeared"));
+
+ return head->first_available;
+}
+
+/* Return a pointer to the logical first line in the buffer and make the
+ next line the logical first line.
+ Return NULL if there is no more input. */
+
+static struct cstring *
+remove_line (void)
+{
+ /* If non-NULL, this is the buffer for which the previous call
+ returned the final line. So now, presuming that line has been
+ processed, we can free the buffer and reset this pointer. */
+ static struct buffer_record *prev_buf = NULL;
+
+ struct cstring *line; /* Return value. */
+ struct line *l; /* For convenience. */
+
+ if (prev_buf)
+ {
+ free_buffer (prev_buf);
+ prev_buf = NULL;
+ }
+
+ if (head == NULL && !load_buffer ())
+ return NULL;
+
+ if (current_line < head->first_available)
+ current_line = head->first_available;
+
+ ++(head->first_available);
+
+ l = head->curr_line;
+
+ line = &l->starts[l->retrieve_index];
+
+ /* Advance index to next line. */
+ if (++l->retrieve_index == l->used)
+ {
+ /* Go on to the next line record. */
+ head->curr_line = l->next;
+ if (head->curr_line == NULL || head->curr_line->used == 0)
+ {
+ /* Go on to the next data block.
+ but first record the current one so we can free it
+ once the line we're returning has been processed. */
+ prev_buf = head;
+ head = head->next;
+ }
+ }
+
+ return line;
+}
+
+/* Search the buffers for line LINENUM, reading more input if necessary.
+ Return a pointer to the line, or NULL if it is not found in the file. */
+
+static struct cstring *
+find_line (intmax_t linenum)
+{
+ struct buffer_record *b;
+
+ if (head == NULL && !load_buffer ())
+ return NULL;
+
+ if (linenum < head->start_line)
+ return NULL;
+
+ for (b = head;;)
+ {
+ assert (b);
+ if (linenum < b->start_line + b->num_lines)
+ {
+ /* The line is in this buffer. */
+ struct line *l;
+ idx_t offset; /* How far into the buffer the line is. */
+
+ l = b->line_start;
+ offset = linenum - b->start_line;
+ /* Find the control record. */
+ while (offset >= CTRL_SIZE)
+ {
+ l = l->next;
+ offset -= CTRL_SIZE;
+ }
+ return &l->starts[offset];
+ }
+ if (b->next == NULL && !load_buffer ())
+ return NULL;
+ b = b->next; /* Try the next data block. */
+ }
+}
+
+/* Return true if at least one more line is available for input. */
+
+static bool
+no_more_lines (void)
+{
+ return find_line (current_line + 1) == NULL;
+}
+
+/* Open NAME as standard input. */
+
+static void
+set_input_file (char const *name)
+{
+ if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0)
+ die (EXIT_FAILURE, errno, _("cannot open %s for reading"),
+ quoteaf (name));
+}
+
+/* Write all lines from the beginning of the buffer up to, but
+ not including, line LAST_LINE, to the current output file.
+ If IGNORE is true, do not output lines selected here.
+ ARGNUM is the index in ARGV of the current pattern. */
+
+static void
+write_to_file (intmax_t last_line, bool ignore, int argnum)
+{
+ struct cstring *line;
+ intmax_t first_line; /* First available input line. */
+ intmax_t lines; /* Number of lines to output. */
+ intmax_t i;
+
+ first_line = get_first_line_in_buffer ();
+
+ if (first_line > last_line)
+ {
+ error (0, 0, _("%s: line number out of range"),
+ quote (global_argv[argnum]));
+ cleanup_fatal ();
+ }
+
+ lines = last_line - first_line;
+
+ for (i = 0; i < lines; i++)
+ {
+ line = remove_line ();
+ if (line == NULL)
+ {
+ error (0, 0, _("%s: line number out of range"),
+ quote (global_argv[argnum]));
+ cleanup_fatal ();
+ }
+ if (!ignore)
+ save_line_to_file (line);
+ }
+}
+
+/* Output any lines left after all regexps have been processed. */
+
+static void
+dump_rest_of_file (void)
+{
+ struct cstring *line;
+
+ while ((line = remove_line ()) != NULL)
+ save_line_to_file (line);
+}
+
+/* Handle an attempt to read beyond EOF under the control of record P,
+ on iteration REPETITION if nonzero. */
+
+static void
+handle_line_error (const struct control *p, intmax_t repetition)
+{
+ char buf[INT_BUFSIZE_BOUND (intmax_t)];
+
+ fprintf (stderr, _("%s: %s: line number out of range"),
+ program_name, quote (imaxtostr (p->lines_required, buf)));
+ if (repetition)
+ fprintf (stderr, _(" on repetition %s\n"), imaxtostr (repetition, buf));
+ else
+ fprintf (stderr, "\n");
+
+ cleanup_fatal ();
+}
+
+/* Determine the line number that marks the end of this file,
+ then get those lines and save them to the output file.
+ P is the control record.
+ REPETITION is the repetition number. */
+
+static void
+process_line_count (const struct control *p, intmax_t repetition)
+{
+ intmax_t linenum;
+ intmax_t last_line_to_save = p->lines_required * (repetition + 1);
+
+ create_output_file ();
+
+ /* Ensure that the line number specified is not 1 greater than
+ the number of lines in the file.
+ When suppressing matched lines, check before the loop. */
+ if (no_more_lines () && suppress_matched)
+ handle_line_error (p, repetition);
+
+ linenum = get_first_line_in_buffer ();
+ while (linenum++ < last_line_to_save)
+ {
+ struct cstring *line = remove_line ();
+ if (line == NULL)
+ handle_line_error (p, repetition);
+ save_line_to_file (line);
+ }
+
+ close_output_file ();
+
+ if (suppress_matched)
+ remove_line ();
+
+ /* Ensure that the line number specified is not 1 greater than
+ the number of lines in the file. */
+ if (no_more_lines () && !suppress_matched)
+ handle_line_error (p, repetition);
+}
+
+static void
+regexp_error (struct control *p, intmax_t repetition, bool ignore)
+{
+ fprintf (stderr, _("%s: %s: match not found"),
+ program_name, quote (global_argv[p->argnum]));
+
+ if (repetition)
+ {
+ char buf[INT_BUFSIZE_BOUND (intmax_t)];
+ fprintf (stderr, _(" on repetition %s\n"), imaxtostr (repetition, buf));
+ }
+ else
+ fprintf (stderr, "\n");
+
+ if (!ignore)
+ {
+ dump_rest_of_file ();
+ close_output_file ();
+ }
+ cleanup_fatal ();
+}
+
+/* Read the input until a line matches the regexp in P, outputting
+ it unless P->IGNORE is true.
+ REPETITION is this repeat-count; 0 means the first time. */
+
+static void
+process_regexp (struct control *p, intmax_t repetition)
+{
+ struct cstring *line; /* From input file. */
+ idx_t line_len; /* To make "$" in regexps work. */
+ intmax_t break_line; /* First line number of next file. */
+ bool ignore = p->ignore; /* If true, skip this section. */
+ regoff_t ret;
+
+ if (!ignore)
+ create_output_file ();
+
+ /* If there is no offset for the regular expression, or
+ it is positive, then it is not necessary to buffer the lines. */
+
+ if (p->offset >= 0)
+ {
+ while (true)
+ {
+ line = find_line (++current_line);
+ if (line == NULL)
+ {
+ if (p->repeat_forever)
+ {
+ if (!ignore)
+ {
+ dump_rest_of_file ();
+ close_output_file ();
+ }
+ exit (EXIT_SUCCESS);
+ }
+ else
+ regexp_error (p, repetition, ignore);
+ }
+ line_len = line->len;
+ if (line->str[line_len - 1] == '\n')
+ line_len--;
+ ret = re_search (&p->re_compiled, line->str, line_len,
+ 0, line_len, NULL);
+ if (ret == -2)
+ {
+ error (0, 0, _("error in regular expression search"));
+ cleanup_fatal ();
+ }
+ if (ret == -1)
+ {
+ line = remove_line ();
+ if (!ignore)
+ save_line_to_file (line);
+ }
+ else
+ break;
+ }
+ }
+ else
+ {
+ /* Buffer the lines. */
+ while (true)
+ {
+ line = find_line (++current_line);
+ if (line == NULL)
+ {
+ if (p->repeat_forever)
+ {
+ if (!ignore)
+ {
+ dump_rest_of_file ();
+ close_output_file ();
+ }
+ exit (EXIT_SUCCESS);
+ }
+ else
+ regexp_error (p, repetition, ignore);
+ }
+ line_len = line->len;
+ if (line->str[line_len - 1] == '\n')
+ line_len--;
+ ret = re_search (&p->re_compiled, line->str, line_len,
+ 0, line_len, NULL);
+ if (ret == -2)
+ {
+ error (0, 0, _("error in regular expression search"));
+ cleanup_fatal ();
+ }
+ if (ret != -1)
+ break;
+ }
+ }
+
+ /* Account for any offset from this regexp. */
+ break_line = current_line + p->offset;
+
+ write_to_file (break_line, ignore, p->argnum);
+
+ if (!ignore)
+ close_output_file ();
+
+ if (p->offset > 0)
+ current_line = break_line;
+
+ if (suppress_matched)
+ remove_line ();
+}
+
+/* Split the input file according to the control records we have built. */
+
+static void
+split_file (void)
+{
+ for (idx_t i = 0; i < control_used; i++)
+ {
+ intmax_t j;
+ if (controls[i].regexpr)
+ {
+ for (j = 0; (controls[i].repeat_forever
+ || j <= controls[i].repeat); j++)
+ process_regexp (&controls[i], j);
+ }
+ else
+ {
+ for (j = 0; (controls[i].repeat_forever
+ || j <= controls[i].repeat); j++)
+ process_line_count (&controls[i], j);
+ }
+ }
+
+ create_output_file ();
+ dump_rest_of_file ();
+ close_output_file ();
+}
+
+/* Return the name of output file number NUM.
+
+ This function is called from a signal handler, so it should invoke
+ only reentrant functions that are async-signal-safe. POSIX does
+ not guarantee this for the functions called below, but we don't
+ know of any hosts where this implementation isn't safe. */
+
+static char *
+make_filename (int num)
+{
+ strcpy (filename_space, prefix);
+ if (suffix)
+ sprintf (filename_space + strlen (prefix), suffix, num);
+ else
+ sprintf (filename_space + strlen (prefix), "%0*d", digits, num);
+ return filename_space;
+}
+
+/* Create the next output file. */
+
+static void
+create_output_file (void)
+{
+ int nfiles = files_created;
+ bool fopen_ok;
+ int fopen_errno;
+
+ output_filename = make_filename (nfiles);
+
+ if (nfiles == INT_MAX)
+ {
+ fopen_ok = false;
+ fopen_errno = EOVERFLOW;
+ }
+ else
+ {
+ /* Create the output file in a critical section, to avoid races. */
+ sigset_t oldset;
+ sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
+ output_stream = fopen (output_filename, "w");
+ fopen_ok = (output_stream != NULL);
+ fopen_errno = errno;
+ files_created = nfiles + fopen_ok;
+ sigprocmask (SIG_SETMASK, &oldset, NULL);
+ }
+
+ if (! fopen_ok)
+ {
+ error (0, fopen_errno, "%s", quotef (output_filename));
+ cleanup_fatal ();
+ }
+ bytes_written = 0;
+}
+
+/* If requested, delete all the files we have created. This function
+ must be called only from critical sections. */
+
+static void
+delete_all_files (bool in_signal_handler)
+{
+ if (! remove_files)
+ return;
+
+ for (int i = files_created; 0 <= --i; )
+ {
+ char const *name = make_filename (i);
+ if (unlink (name) != 0 && errno != ENOENT && !in_signal_handler)
+ error (0, errno, "%s", quotef (name));
+ }
+
+ files_created = 0;
+}
+
+/* Close the current output file and print the count
+ of characters in this file. */
+
+static void
+close_output_file (void)
+{
+ if (output_stream)
+ {
+ if (ferror (output_stream))
+ {
+ error (0, 0, _("write error for %s"), quoteaf (output_filename));
+ output_stream = NULL;
+ cleanup_fatal ();
+ }
+ if (fclose (output_stream) != 0)
+ {
+ error (0, errno, "%s", quotef (output_filename));
+ output_stream = NULL;
+ cleanup_fatal ();
+ }
+ if (bytes_written == 0 && elide_empty_files)
+ {
+ sigset_t oldset;
+ bool unlink_ok;
+ int unlink_errno;
+
+ /* Remove the output file in a critical section, to avoid races. */
+ sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
+ unlink_ok = (unlink (output_filename) == 0);
+ unlink_errno = errno;
+ files_created--;
+ sigprocmask (SIG_SETMASK, &oldset, NULL);
+
+ if (! unlink_ok && unlink_errno != ENOENT)
+ error (0, unlink_errno, "%s", quotef (output_filename));
+ }
+ else
+ {
+ if (!suppress_count)
+ {
+ char buf[INT_BUFSIZE_BOUND (intmax_t)];
+ fprintf (stdout, "%s\n", imaxtostr (bytes_written, buf));
+ }
+ }
+ output_stream = NULL;
+ }
+}
+
+/* Save line LINE to the output file and
+ increment the character count for the current file. */
+
+static void
+save_line_to_file (const struct cstring *line)
+{
+ idx_t l = fwrite (line->str, sizeof (char), line->len, output_stream);
+ if (l != line->len)
+ {
+ error (0, errno, _("write error for %s"), quoteaf (output_filename));
+ output_stream = NULL;
+ cleanup_fatal ();
+ }
+ bytes_written += line->len;
+}
+
+/* Return a new, initialized control record. */
+
+static struct control *
+new_control_record (void)
+{
+ static idx_t control_allocated = 0; /* Total space allocated. */
+ struct control *p;
+
+ if (control_used == control_allocated)
+ controls = xpalloc (controls, &control_allocated, 1, -1, sizeof *controls);
+ p = &controls[control_used++];
+ p->regexpr = false;
+ p->repeat = 0;
+ p->repeat_forever = false;
+ p->lines_required = 0;
+ p->offset = 0;
+ return p;
+}
+
+/* Check if there is a numeric offset after a regular expression.
+ STR is the entire command line argument.
+ P is the control record for this regular expression.
+ NUM is the numeric part of STR. */
+
+static void
+check_for_offset (struct control *p, char const *str, char const *num)
+{
+ if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
+ die (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"),
+ quote (str));
+}
+
+/* Given that the first character of command line arg STR is '{',
+ make sure that the rest of the string is a valid repeat count
+ and store its value in P.
+ ARGNUM is the ARGV index of STR. */
+
+static void
+parse_repeat_count (int argnum, struct control *p, char *str)
+{
+ char *end;
+
+ end = str + strlen (str) - 1;
+ if (*end != '}')
+ die (EXIT_FAILURE, 0, _("%s: '}' is required in repeat count"),
+ quote (str));
+ *end = '\0';
+
+ if (str + 1 == end - 1 && *(str + 1) == '*')
+ p->repeat_forever = true;
+ else
+ {
+ uintmax_t val;
+ if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK
+ || INTMAX_MAX < val)
+ {
+ die (EXIT_FAILURE, 0,
+ _("%s}: integer required between '{' and '}'"),
+ quote (global_argv[argnum]));
+ }
+ p->repeat = val;
+ }
+
+ *end = '}';
+}
+
+/* Extract the regular expression from STR and check for a numeric offset.
+ STR should start with the regexp delimiter character.
+ Return a new control record for the regular expression.
+ ARGNUM is the ARGV index of STR.
+ Unless IGNORE is true, mark these lines for output. */
+
+static struct control *
+extract_regexp (int argnum, bool ignore, char const *str)
+{
+ idx_t len; /* Number of bytes in this regexp. */
+ char delim = *str;
+ char const *closing_delim;
+ struct control *p;
+ char const *err;
+
+ closing_delim = strrchr (str + 1, delim);
+ if (closing_delim == NULL)
+ die (EXIT_FAILURE, 0,
+ _("%s: closing delimiter '%c' missing"), str, delim);
+
+ len = closing_delim - str - 1;
+ p = new_control_record ();
+ p->argnum = argnum;
+ p->ignore = ignore;
+
+ p->regexpr = true;
+ p->re_compiled.buffer = NULL;
+ p->re_compiled.allocated = 0;
+ p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1);
+ p->re_compiled.translate = NULL;
+ re_syntax_options =
+ RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
+ err = re_compile_pattern (str + 1, len, &p->re_compiled);
+ if (err)
+ {
+ error (0, 0, _("%s: invalid regular expression: %s"), quote (str), err);
+ cleanup_fatal ();
+ }
+
+ if (closing_delim[1])
+ check_for_offset (p, str, closing_delim + 1);
+
+ return p;
+}
+
+/* Extract the break patterns from args START through ARGC - 1 of ARGV.
+ After each pattern, check if the next argument is a repeat count. */
+
+static void
+parse_patterns (int argc, int start, char **argv)
+{
+ struct control *p; /* New control record created. */
+ static intmax_t last_val = 0;
+
+ for (int i = start; i < argc; i++)
+ {
+ if (*argv[i] == '/' || *argv[i] == '%')
+ {
+ p = extract_regexp (i, *argv[i] == '%', argv[i]);
+ }
+ else
+ {
+ p = new_control_record ();
+ p->argnum = i;
+
+ uintmax_t val;
+ if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK
+ || INTMAX_MAX < val)
+ die (EXIT_FAILURE, 0, _("%s: invalid pattern"), quote (argv[i]));
+ if (val == 0)
+ die (EXIT_FAILURE, 0,
+ _("%s: line number must be greater than zero"), argv[i]);
+ if (val < last_val)
+ {
+ char buf[INT_BUFSIZE_BOUND (intmax_t)];
+ die (EXIT_FAILURE, 0,
+ _("line number %s is smaller than preceding line number, %s"),
+ quote (argv[i]), imaxtostr (last_val, buf));
+ }
+
+ if (val == last_val)
+ error (0, 0,
+ _("warning: line number %s is the same as preceding line number"),
+ quote (argv[i]));
+
+ last_val = val;
+
+ p->lines_required = val;
+ }
+
+ if (i + 1 < argc && *argv[i + 1] == '{')
+ {
+ /* We have a repeat count. */
+ i++;
+ parse_repeat_count (i, p, argv[i]);
+ }
+ }
+}
+
+
+
+/* Names for the printf format flags ' and #. These can be ORed together. */
+enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 };
+
+/* Scan the printf format flags in FORMAT, storing info about the
+ flags into *FLAGS_PTR. Return the number of flags found. */
+static idx_t
+get_format_flags (char const *format, int *flags_ptr)
+{
+ int flags = 0;
+
+ for (idx_t count = 0; ; count++)
+ {
+ switch (format[count])
+ {
+ case '-':
+ case '0':
+ break;
+
+ case '\'':
+ flags |= FLAG_THOUSANDS;
+ break;
+
+ case '#':
+ flags |= FLAG_ALTERNATIVE;
+ break;
+
+ default:
+ *flags_ptr = flags;
+ return count;
+ }
+ }
+}
+
+/* Check that the printf format conversion specifier *FORMAT is valid
+ and compatible with FLAGS. Change it to 'd' if it is 'u',
+ since the format will be used with a signed value. */
+static void
+check_format_conv_type (char *format, int flags)
+{
+ unsigned char ch = *format;
+ int compatible_flags = FLAG_THOUSANDS;
+
+ switch (ch)
+ {
+ case 'd':
+ case 'i':
+ break;
+
+ case 'u':
+ *format = 'd';
+ break;
+
+ case 'o':
+ case 'x':
+ case 'X':
+ compatible_flags = FLAG_ALTERNATIVE;
+ break;
+
+ case 0:
+ die (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
+
+ default:
+ if (isprint (ch))
+ die (EXIT_FAILURE, 0,
+ _("invalid conversion specifier in suffix: %c"), ch);
+ else
+ die (EXIT_FAILURE, 0,
+ _("invalid conversion specifier in suffix: \\%.3o"), ch);
+ }
+
+ if (flags & ~ compatible_flags)
+ die (EXIT_FAILURE, 0,
+ _("invalid flags in conversion specification: %%%c%c"),
+ (flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch);
+}
+
+/* Return the maximum number of bytes that can be generated by
+ applying FORMAT to an int value. If the format is
+ invalid, diagnose the problem and exit. */
+static idx_t
+max_out (char *format)
+{
+ bool percent = false;
+
+ for (char *f = format; *f; f++)
+ if (*f == '%' && *++f != '%')
+ {
+ if (percent)
+ die (EXIT_FAILURE, 0,
+ _("too many %% conversion specifications in suffix"));
+ percent = true;
+ int flags;
+ f += get_format_flags (f, &flags);
+ while (ISDIGIT (*f))
+ f++;
+ if (*f == '.')
+ while (ISDIGIT (*++f))
+ continue;
+ check_format_conv_type (f, flags);
+ }
+
+ if (! percent)
+ die (EXIT_FAILURE, 0,
+ _("missing %% conversion specification in suffix"));
+
+ int maxlen = snprintf (NULL, 0, format, INT_MAX);
+ if (maxlen < 0)
+ xalloc_die ();
+ return maxlen;
+}
+
+int
+main (int argc, char **argv)
+{
+ int optc;
+
+ initialize_main (&argc, &argv);
+ set_program_name (argv[0]);
+ setlocale (LC_ALL, "");
+ bindtextdomain (PACKAGE, LOCALEDIR);
+ textdomain (PACKAGE);
+
+ atexit (close_stdout);
+
+ global_argv = argv;
+ controls = NULL;
+ control_used = 0;
+ suppress_count = false;
+ remove_files = true;
+ suppress_matched = false;
+ prefix = DEFAULT_PREFIX;
+
+ while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
+ switch (optc)
+ {
+ case 'f':
+ prefix = optarg;
+ break;
+
+ case 'b':
+ suffix = optarg;
+ break;
+
+ case 'k':
+ remove_files = false;
+ break;
+
+ case 'n':
+ digits = xdectoimax (optarg, 0, MIN (INT_MAX, IDX_MAX), "",
+ _("invalid number"), 0);
+ break;
+
+ case 's':
+ case 'q':
+ suppress_count = true;
+ break;
+
+ case 'z':
+ elide_empty_files = true;
+ break;
+
+ case SUPPRESS_MATCHED_OPTION:
+ suppress_matched = true;
+ break;
+
+ case_GETOPT_HELP_CHAR;
+
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
+
+ default:
+ usage (EXIT_FAILURE);
+ }
+
+ if (argc - optind < 2)
+ {
+ if (argc <= optind)
+ error (0, 0, _("missing operand"));
+ else
+ error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
+ usage (EXIT_FAILURE);
+ }
+
+ idx_t prefix_len = strlen (prefix);
+ idx_t max_digit_string_len
+ = (suffix
+ ? max_out (suffix)
+ : MAX (INT_STRLEN_BOUND (int), digits));
+ idx_t filename_size;
+ if (INT_ADD_WRAPV (prefix_len, max_digit_string_len + 1, &filename_size))
+ xalloc_die ();
+ filename_space = ximalloc (filename_size);
+
+ set_input_file (argv[optind++]);
+
+ parse_patterns (argc, optind, argv);
+
+ {
+ int i;
+ static int const sig[] =
+ {
+ /* The usual suspects. */
+ SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
+#ifdef SIGPOLL
+ SIGPOLL,
+#endif
+#ifdef SIGPROF
+ SIGPROF,
+#endif
+#ifdef SIGVTALRM
+ SIGVTALRM,
+#endif
+#ifdef SIGXCPU
+ SIGXCPU,
+#endif
+#ifdef SIGXFSZ
+ SIGXFSZ,
+#endif
+ };
+ enum { nsigs = ARRAY_CARDINALITY (sig) };
+
+ struct sigaction act;
+
+ sigemptyset (&caught_signals);
+ for (i = 0; i < nsigs; i++)
+ {
+ sigaction (sig[i], NULL, &act);
+ if (act.sa_handler != SIG_IGN)
+ sigaddset (&caught_signals, sig[i]);
+ }
+
+ act.sa_handler = interrupt_handler;
+ act.sa_mask = caught_signals;
+ act.sa_flags = 0;
+
+ for (i = 0; i < nsigs; i++)
+ if (sigismember (&caught_signals, sig[i]))
+ sigaction (sig[i], &act, NULL);
+ }
+
+ split_file ();
+
+ if (close (STDIN_FILENO) != 0)
+ {
+ error (0, errno, _("read error"));
+ cleanup_fatal ();
+ }
+
+ return EXIT_SUCCESS;
+}
+
+void
+usage (int status)
+{
+ if (status != EXIT_SUCCESS)
+ emit_try_help ();
+ else
+ {
+ printf (_("\
+Usage: %s [OPTION]... FILE PATTERN...\n\
+"),
+ program_name);
+ fputs (_("\
+Output pieces of FILE separated by PATTERN(s) to files 'xx00', 'xx01', ...,\n\
+and output byte counts of each piece to standard output.\n\
+"), stdout);
+ fputs (_("\
+\n\
+Read standard input if FILE is -\n\
+"), stdout);
+
+ emit_mandatory_arg_note ();
+
+ fputs (_("\
+ -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
+ -f, --prefix=PREFIX use PREFIX instead of 'xx'\n\
+ -k, --keep-files do not remove output files on errors\n\
+"), stdout);
+ fputs (_("\
+ --suppress-matched suppress the lines matching PATTERN\n\
+"), stdout);
+ fputs (_("\
+ -n, --digits=DIGITS use specified number of digits instead of 2\n\
+ -s, --quiet, --silent do not print counts of output file sizes\n\
+ -z, --elide-empty-files remove empty output files\n\
+"), stdout);
+ fputs (HELP_OPTION_DESCRIPTION, stdout);
+ fputs (VERSION_OPTION_DESCRIPTION, stdout);
+ fputs (_("\
+\n\
+Each PATTERN may be:\n\
+ INTEGER copy up to but not including specified line number\n\
+ /REGEXP/[OFFSET] copy up to but not including a matching line\n\
+ %REGEXP%[OFFSET] skip to, but not including a matching line\n\
+ {INTEGER} repeat the previous pattern specified number of times\n\
+ {*} repeat the previous pattern as many times as possible\n\
+\n\
+A line OFFSET is an integer optionally preceded by '+' or '-'\n\
+"), stdout);
+ emit_ancillary_info (PROGRAM_NAME);
+ }
+ exit (status);
+}