diff options
Diffstat (limited to '')
-rw-r--r-- | text-utils/col.c | 723 |
1 files changed, 723 insertions, 0 deletions
diff --git a/text-utils/col.c b/text-utils/col.c new file mode 100644 index 0000000..afea8aa --- /dev/null +++ b/text-utils/col.c @@ -0,0 +1,723 @@ +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Michael Rendell of the Memorial University of Newfoundland. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Wed Jun 22 22:15:41 1994, faith@cs.unc.edu: Added internationalization + * patches from Andries.Brouwer@cwi.nl + * Wed Sep 14 22:31:17 1994: patches from Carl Christofferson + * (cchris@connected.com) + * 1999-02-22 Arkadiusz MiĆkiewicz <misiek@pld.ORG.PL> + * added Native Language Support + * 1999-09-19 Bruno Haible <haible@clisp.cons.org> + * modified to work correctly in multi-byte locales + * + */ + +/* + * This command is deprecated. The utility is in maintenance mode, + * meaning we keep them in source tree for backward compatibility + * only. Do not waste time making this command better, unless the + * fix is about security or other very critical issue. + * + * See Documentation/deprecated.txt for more information. + */ + +#include <ctype.h> +#include <errno.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "c.h" +#include "closestream.h" +#include "nls.h" +#include "optutils.h" +#include "strutils.h" +#include "widechar.h" +#include "xalloc.h" + +#define SPACE ' ' /* space */ +#define BS '\b' /* backspace */ +#define NL '\n' /* newline */ +#define CR '\r' /* carriage return */ +#define TAB '\t' /* tab */ +#define VT '\v' /* vertical tab (aka reverse line feed) */ + +#define ESC '\033' /* escape */ +#define RLF '\a' /* ESC-007 reverse line feed */ +#define RHLF BS /* ESC-010 reverse half-line feed */ +#define FHLF TAB /* ESC-011 forward half-line feed */ + +#define SO '\016' /* activate the G1 character set */ +#define SI '\017' /* activate the G0 character set */ + +/* build up at least this many lines before flushing them out */ +#define BUFFER_MARGIN 32 + +/* number of lines to allocate */ +#define NALLOC 64 + +#if HAS_FEATURE_ADDRESS_SANITIZER || defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) +# define COL_DEALLOCATE_ON_EXIT +#endif + +/* SI & SO charset mode */ +enum { + CS_NORMAL, + CS_ALTERNATE +}; + +struct col_char { + size_t c_column; /* column character is in */ + wchar_t c_char; /* character in question */ + int c_width; /* character width */ + + uint8_t c_set:1; /* character set (currently only 2) */ +}; + +struct col_line { + struct col_char *l_line; /* characters on the line */ + struct col_line *l_prev; /* previous line */ + struct col_line *l_next; /* next line */ + size_t l_lsize; /* allocated sizeof l_line */ + size_t l_line_len; /* strlen(l_line) */ + size_t l_max_col; /* max column in the line */ + + uint8_t l_needs_sort:1; /* set if chars went in out of order */ +}; + +#ifdef COL_DEALLOCATE_ON_EXIT +/* + * Free memory before exit when compiling LeakSanitizer. + */ +struct col_alloc { + struct col_line *l; + struct col_alloc *next; +}; +#endif + +struct col_ctl { + struct col_line *lines; + struct col_line *l; /* current line */ + size_t max_bufd_lines; /* max # lines to keep in memory */ + struct col_line *line_freelist; + size_t nblank_lines; /* # blanks after last flushed line */ +#ifdef COL_DEALLOCATE_ON_EXIT + struct col_alloc *alloc_root; /* first of line allocations */ + struct col_alloc *alloc_head; /* latest line allocation */ +#endif + unsigned int + last_set:1, /* char_set of last char printed */ + compress_spaces:1, /* if doing space -> tab conversion */ + fine:1, /* if `fine' resolution (half lines) */ + no_backspaces:1, /* if not to output any backspaces */ + pass_unknown_seqs:1; /* whether to pass unknown control sequences */ +}; + +struct col_lines { + struct col_char *c; + wint_t ch; + size_t adjust; + size_t cur_col; + ssize_t cur_line; + size_t extra_lines; + size_t max_line; + size_t nflushd_lines; + size_t this_line; + + unsigned int + cur_set:1, + warned:1; +}; + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fprintf(out, _( + "\nUsage:\n" + " %s [options]\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Filter out reverse line feeds from standard input.\n"), out); + + fprintf(out, _( + "\nOptions:\n" + " -b, --no-backspaces do not output backspaces\n" + " -f, --fine permit forward half line feeds\n" + " -p, --pass pass unknown control sequences\n" + " -h, --tabs convert spaces to tabs\n" + " -x, --spaces convert tabs to spaces\n" + " -l, --lines NUM buffer at least NUM lines\n" + )); + printf( " -H, --help %s\n", USAGE_OPTSTR_HELP); + printf( " -v, --version %s\n", USAGE_OPTSTR_VERSION); + + printf(USAGE_MAN_TAIL("col(1)")); + exit(EXIT_SUCCESS); +} + +static inline void col_putchar(wchar_t ch) +{ + if (putwchar(ch) == WEOF) + err(EXIT_FAILURE, _("write failed")); +} + +/* + * Print a number of newline/half newlines. If fine flag is set, nblank_lines + * is the number of half line feeds, otherwise it is the number of whole line + * feeds. + */ +static void flush_blanks(struct col_ctl *ctl) +{ + int half = 0; + ssize_t i, nb = ctl->nblank_lines; + + if (nb & 1) { + if (ctl->fine) + half = 1; + else + nb++; + } + nb /= 2; + for (i = nb; --i >= 0;) + col_putchar(NL); + + if (half) { + col_putchar(ESC); + col_putchar('9'); + if (!nb) + col_putchar(CR); + } + ctl->nblank_lines = 0; +} + +/* + * Write a line to stdout taking care of space to tab conversion (-h flag) + * and character set shifts. + */ +static void flush_line(struct col_ctl *ctl, struct col_line *l) +{ + struct col_char *c, *endc; + size_t nchars = l->l_line_len, last_col = 0, this_col; + + if (l->l_needs_sort) { + static struct col_char *sorted = NULL; + static size_t count_size = 0, *count = NULL, sorted_size = 0; + size_t i, tot; + + /* + * Do an O(n) sort on l->l_line by column being careful to + * preserve the order of characters in the same column. + */ + if (sorted_size < l->l_lsize) { + sorted_size = l->l_lsize; + sorted = xrealloc(sorted, sizeof(struct col_char) * sorted_size); + } + if (count_size <= l->l_max_col) { + count_size = l->l_max_col + 1; + count = xrealloc(count, sizeof(size_t) * count_size); + } + memset(count, 0, sizeof(size_t) * l->l_max_col + 1); + for (i = nchars, c = l->l_line; c && 0 < i; i--, c++) + count[c->c_column]++; + + /* + * calculate running total (shifted down by 1) to use as + * indices into new line. + */ + for (tot = 0, i = 0; i <= l->l_max_col; i++) { + size_t save = count[i]; + count[i] = tot; + tot += save; + } + + for (i = nchars, c = l->l_line; 0 < i; i--, c++) + sorted[count[c->c_column]++] = *c; + c = sorted; + } else + c = l->l_line; + + while (0 < nchars) { + this_col = c->c_column; + endc = c; + + /* find last character */ + do { + ++endc; + } while (0 < --nchars && this_col == endc->c_column); + + if (ctl->no_backspaces) { + /* print only the last character */ + c = endc - 1; + if (0 < nchars && endc->c_column < this_col + c->c_width) + continue; + } + + if (last_col < this_col) { + /* tabs and spaces handling */ + ssize_t nspace = this_col - last_col; + + if (ctl->compress_spaces && 1 < nspace) { + ssize_t ntabs; + + ntabs = this_col / 8 - last_col / 8; + if (0 < ntabs) { + nspace = this_col & 7; + while (0 <= --ntabs) + col_putchar(TAB); + } + } + while (0 <= --nspace) + col_putchar(SPACE); + last_col = this_col; + } + + for (;;) { + /* SO / SI character set changing */ + if (c->c_set != ctl->last_set) { + switch (c->c_set) { + case CS_NORMAL: + col_putchar(SI); + break; + case CS_ALTERNATE: + col_putchar(SO); + break; + default: + abort(); + } + ctl->last_set = c->c_set; + } + + /* output a character */ + col_putchar(c->c_char); + + /* rubout control chars from output */ + if (c + 1 < endc) { + int i; + + for (i = 0; i < c->c_width; i++) + col_putchar(BS); + } + + if (endc <= ++c) + break; + } + last_col += (c - 1)->c_width; + } +} + +static struct col_line *alloc_line(struct col_ctl *ctl) +{ + struct col_line *l; + size_t i; + + if (!ctl->line_freelist) { + l = xmalloc(sizeof(struct col_line) * NALLOC); +#ifdef COL_DEALLOCATE_ON_EXIT + if (ctl->alloc_root == NULL) { + ctl->alloc_root = xcalloc(1, sizeof(struct col_alloc)); + ctl->alloc_root->l = l; + ctl->alloc_head = ctl->alloc_root; + } else { + ctl->alloc_head->next = xcalloc(1, sizeof(struct col_alloc)); + ctl->alloc_head = ctl->alloc_head->next; + ctl->alloc_head->l = l; + } +#endif + ctl->line_freelist = l; + for (i = 1; i < NALLOC; i++, l++) + l->l_next = l + 1; + l->l_next = NULL; + } + l = ctl->line_freelist; + ctl->line_freelist = l->l_next; + + memset(l, 0, sizeof(struct col_line)); + return l; +} + +static void free_line(struct col_ctl *ctl, struct col_line *l) +{ + l->l_next = ctl->line_freelist; + ctl->line_freelist = l; +} + +static void flush_lines(struct col_ctl *ctl, ssize_t nflush) +{ + struct col_line *l; + + while (0 <= --nflush) { + l = ctl->lines; + ctl->lines = l->l_next; + if (l->l_line) { + flush_blanks(ctl); + flush_line(ctl, l); + } + ctl->nblank_lines++; + free(l->l_line); + free_line(ctl, l); + } + if (ctl->lines) + ctl->lines->l_prev = NULL; +} + +static int handle_not_graphic(struct col_ctl *ctl, struct col_lines *lns) +{ + switch (lns->ch) { + case BS: + if (lns->cur_col == 0) + return 1; /* can't go back further */ + if (lns->c) + lns->cur_col -= lns->c->c_width; + else + lns->cur_col -= 1; + return 1; + case CR: + lns->cur_col = 0; + return 1; + case ESC: + switch (getwchar()) { /* just ignore EOF */ + case RLF: + lns->cur_line -= 2; + break; + case RHLF: + lns->cur_line -= 1; + break; + case FHLF: + lns->cur_line += 1; + if (0 < lns->cur_line && lns->max_line < (size_t)lns->cur_line) + lns->max_line = lns->cur_line; + break; + default: + break; + } + return 1; + case NL: + lns->cur_line += 2; + if (0 < lns->cur_line && lns->max_line < (size_t)lns->cur_line) + lns->max_line = lns->cur_line; + lns->cur_col = 0; + return 1; + case SPACE: + lns->cur_col += 1; + return 1; + case SI: + lns->cur_set = CS_NORMAL; + return 1; + case SO: + lns->cur_set = CS_ALTERNATE; + return 1; + case TAB: /* adjust column */ + lns->cur_col |= 7; + lns->cur_col += 1; + return 1; + case VT: + lns->cur_line -= 2; + return 1; + default: + break; + } + if (iswspace(lns->ch)) { + if (0 < wcwidth(lns->ch)) + lns->cur_col += wcwidth(lns->ch); + return 1; + } + + if (!ctl->pass_unknown_seqs) + return 1; + return 0; +} + +static void update_cur_line(struct col_ctl *ctl, struct col_lines *lns) +{ + ssize_t nmove; + + lns->adjust = 0; + nmove = lns->cur_line - lns->this_line; + if (!ctl->fine) { + /* round up to next line */ + if (lns->cur_line & 1) { + lns->adjust = 1; + nmove++; + } + } + if (nmove < 0) { + for (; nmove < 0 && ctl->l->l_prev; nmove++) + ctl->l = ctl->l->l_prev; + + if (nmove) { + if (lns->nflushd_lines == 0) { + /* + * Allow backup past first line if nothing + * has been flushed yet. + */ + for (; nmove < 0; nmove++) { + struct col_line *lnew = alloc_line(ctl); + ctl->l->l_prev = lnew; + lnew->l_next = ctl->l; + ctl->l = ctl->lines = lnew; + lns->extra_lines += 1; + } + } else { + if (!lns->warned) { + warnx(_("warning: can't back up %s."), + lns->cur_line < 0 ? + _("past first line") : + _("-- line already flushed")); + lns->warned = 1; + } + lns->cur_line -= nmove; + } + } + } else { + /* may need to allocate here */ + for (; 0 < nmove && ctl->l->l_next; nmove--) + ctl->l = ctl->l->l_next; + + for (; 0 < nmove; nmove--) { + struct col_line *lnew = alloc_line(ctl); + lnew->l_prev = ctl->l; + ctl->l->l_next = lnew; + ctl->l = lnew; + } + } + + lns->this_line = lns->cur_line + lns->adjust; + nmove = lns->this_line - lns->nflushd_lines; + + if (0 < nmove && ctl->max_bufd_lines + BUFFER_MARGIN <= (size_t)nmove) { + lns->nflushd_lines += nmove - ctl->max_bufd_lines; + flush_lines(ctl, nmove - ctl->max_bufd_lines); + } +} + +static void parse_options(struct col_ctl *ctl, int argc, char **argv) +{ + static const struct option longopts[] = { + { "no-backspaces", no_argument, NULL, 'b' }, + { "fine", no_argument, NULL, 'f' }, + { "pass", no_argument, NULL, 'p' }, + { "tabs", no_argument, NULL, 'h' }, + { "spaces", no_argument, NULL, 'x' }, + { "lines", required_argument, NULL, 'l' }, + { "version", no_argument, NULL, 'V' }, + { "help", no_argument, NULL, 'H' }, + { NULL, 0, NULL, 0 } + }; + static const ul_excl_t excl[] = { + { 'h', 'x' }, + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + int opt; + + while ((opt = getopt_long(argc, argv, "bfhl:pxVH", longopts, NULL)) != -1) { + err_exclusive_options(opt, longopts, excl, excl_st); + + switch (opt) { + case 'b': /* do not output backspaces */ + ctl->no_backspaces = 1; + break; + case 'f': /* allow half forward line feeds */ + ctl->fine = 1; + break; + case 'h': /* compress spaces into tabs */ + ctl->compress_spaces = 1; + break; + case 'l': + /* + * Buffered line count, which is a value in half + * lines e.g. twice the amount specified. + */ + ctl->max_bufd_lines = strtou32_or_err(optarg, _("bad -l argument")) * 2; + break; + case 'p': + ctl->pass_unknown_seqs = 1; + break; + case 'x': /* do not compress spaces into tabs */ + ctl->compress_spaces = 0; + break; + + case 'V': + print_version(EXIT_SUCCESS); + case 'H': + usage(); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (optind != argc) { + warnx(_("bad usage")); + errtryhelp(EXIT_FAILURE); + } +} + +#ifdef COL_DEALLOCATE_ON_EXIT +static void free_line_allocations(struct col_alloc *root) +{ + struct col_alloc *next; + + while (root) { + next = root->next; + free(root->l); + free(root); + root = next; + } +} +#endif + +static void process_char(struct col_ctl *ctl, struct col_lines *lns) +{ + /* Deal printable characters */ + if (!iswgraph(lns->ch) && handle_not_graphic(ctl, lns)) + return; + + /* Must stuff ch in a line - are we at the right one? */ + if ((size_t)lns->cur_line != lns->this_line - lns->adjust) + update_cur_line(ctl, lns); + + /* Does line buffer need to grow? */ + if (ctl->l->l_lsize <= ctl->l->l_line_len + 1) { + size_t need; + + need = ctl->l->l_lsize ? ctl->l->l_lsize * 2 : NALLOC; + ctl->l->l_line = xrealloc(ctl->l->l_line, need * sizeof(struct col_char)); + ctl->l->l_lsize = need; + } + + /* Store character */ + lns->c = &ctl->l->l_line[ctl->l->l_line_len++]; + lns->c->c_char = lns->ch; + lns->c->c_set = lns->cur_set; + + if (0 < lns->cur_col) + lns->c->c_column = lns->cur_col; + else + lns->c->c_column = 0; + lns->c->c_width = wcwidth(lns->ch); + + /* + * If things are put in out of order, they will need sorting + * when it is flushed. + */ + if (lns->cur_col < ctl->l->l_max_col) + ctl->l->l_needs_sort = 1; + else + ctl->l->l_max_col = lns->cur_col; + if (0 < lns->c->c_width) + lns->cur_col += lns->c->c_width; + +} + +int main(int argc, char **argv) +{ + struct col_ctl ctl = { + .compress_spaces = 1, + .last_set = CS_NORMAL, + .max_bufd_lines = BUFFER_MARGIN * 2, + }; + struct col_lines lns = { + .cur_set = CS_NORMAL, + }; + int ret = EXIT_SUCCESS; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + ctl.lines = ctl.l = alloc_line(&ctl); + + parse_options(&ctl, argc, argv); + + while (feof(stdin) == 0) { + errno = 0; + /* Get character */ + lns.ch = getwchar(); + + if (lns.ch == WEOF) { + if (errno == EILSEQ) { + /* Illegal multibyte sequence */ + int c; + char buf[5]; + size_t len, i; + + c = getchar(); + if (c == EOF) + break; + sprintf(buf, "\\x%02x", (unsigned char) c); + len = strlen(buf); + for (i = 0; i < len; i++) { + lns.ch = buf[i]; + process_char(&ctl, &lns); + } + } else + /* end of file */ + break; + } else + /* the common case */ + process_char(&ctl, &lns); + } + + /* goto the last line that had a character on it */ + for (; ctl.l->l_next; ctl.l = ctl.l->l_next) + lns.this_line++; + if (lns.max_line == 0 && lns.cur_col == 0) { +#ifdef COL_DEALLOCATE_ON_EXIT + free_line_allocations(ctl.alloc_root); +#endif + return EXIT_SUCCESS; /* no lines, so just exit */ + } + flush_lines(&ctl, lns.this_line - lns.nflushd_lines + lns.extra_lines + 1); + + /* make sure we leave things in a sane state */ + if (ctl.last_set != CS_NORMAL) + col_putchar(SI); + + /* flush out the last few blank lines */ + ctl.nblank_lines = lns.max_line - lns.this_line; + if (lns.max_line & 1) + ctl.nblank_lines++; + else if (!ctl.nblank_lines) + /* missing a \n on the last line? */ + ctl.nblank_lines = 2; + flush_blanks(&ctl); +#ifdef COL_DEALLOCATE_ON_EXIT + free_line_allocations(ctl.alloc_root); +#endif + return ret; +} |