/*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * Michael Rendell of the Memorial University of Newfoundland. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Wed Jun 22 22:15:41 1994, faith@cs.unc.edu: Added internationalization * patches from Andries.Brouwer@cwi.nl * Wed Sep 14 22:31:17 1994: patches from Carl Christofferson * (cchris@connected.com) * 1999-02-22 Arkadiusz Miƛkiewicz * added Native Language Support * 1999-09-19 Bruno Haible * modified to work correctly in multi-byte locales * */ /* * This command is deprecated. The utility is in maintenance mode, * meaning we keep them in source tree for backward compatibility * only. Do not waste time making this command better, unless the * fix is about security or other very critical issue. * * See Documentation/deprecated.txt for more information. */ #include #include #include #include #include #include #include #include "c.h" #include "closestream.h" #include "nls.h" #include "optutils.h" #include "strutils.h" #include "widechar.h" #include "xalloc.h" #define SPACE ' ' /* space */ #define BS '\b' /* backspace */ #define NL '\n' /* newline */ #define CR '\r' /* carriage return */ #define TAB '\t' /* tab */ #define VT '\v' /* vertical tab (aka reverse line feed) */ #define ESC '\033' /* escape */ #define RLF '\a' /* ESC-007 reverse line feed */ #define RHLF BS /* ESC-010 reverse half-line feed */ #define FHLF TAB /* ESC-011 forward half-line feed */ #define SO '\016' /* activate the G1 character set */ #define SI '\017' /* activate the G0 character set */ /* build up at least this many lines before flushing them out */ #define BUFFER_MARGIN 32 /* number of lines to allocate */ #define NALLOC 64 #if HAS_FEATURE_ADDRESS_SANITIZER || defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) # define COL_DEALLOCATE_ON_EXIT #endif /* SI & SO charset mode */ enum { CS_NORMAL, CS_ALTERNATE }; struct col_char { size_t c_column; /* column character is in */ wchar_t c_char; /* character in question */ int c_width; /* character width */ uint8_t c_set:1; /* character set (currently only 2) */ }; struct col_line { struct col_char *l_line; /* characters on the line */ struct col_line *l_prev; /* previous line */ struct col_line *l_next; /* next line */ size_t l_lsize; /* allocated sizeof l_line */ size_t l_line_len; /* strlen(l_line) */ size_t l_max_col; /* max column in the line */ uint8_t l_needs_sort:1; /* set if chars went in out of order */ }; #ifdef COL_DEALLOCATE_ON_EXIT /* * Free memory before exit when compiling LeakSanitizer. */ struct col_alloc { struct col_line *l; struct col_alloc *next; }; #endif struct col_ctl { struct col_line *lines; struct col_line *l; /* current line */ size_t max_bufd_lines; /* max # lines to keep in memory */ struct col_line *line_freelist; size_t nblank_lines; /* # blanks after last flushed line */ #ifdef COL_DEALLOCATE_ON_EXIT struct col_alloc *alloc_root; /* first of line allocations */ struct col_alloc *alloc_head; /* latest line allocation */ #endif unsigned int last_set:1, /* char_set of last char printed */ compress_spaces:1, /* if doing space -> tab conversion */ fine:1, /* if `fine' resolution (half lines) */ no_backspaces:1, /* if not to output any backspaces */ pass_unknown_seqs:1; /* whether to pass unknown control sequences */ }; struct col_lines { struct col_char *c; wint_t ch; size_t adjust; size_t cur_col; ssize_t cur_line; size_t extra_lines; size_t max_line; size_t nflushd_lines; size_t this_line; unsigned int cur_set:1, warned:1; }; static void __attribute__((__noreturn__)) usage(void) { FILE *out = stdout; fprintf(out, _( "\nUsage:\n" " %s [options]\n"), program_invocation_short_name); fputs(USAGE_SEPARATOR, out); fputs(_("Filter out reverse line feeds from standard input.\n"), out); fprintf(out, _( "\nOptions:\n" " -b, --no-backspaces do not output backspaces\n" " -f, --fine permit forward half line feeds\n" " -p, --pass pass unknown control sequences\n" " -h, --tabs convert spaces to tabs\n" " -x, --spaces convert tabs to spaces\n" " -l, --lines NUM buffer at least NUM lines\n" )); printf( " -H, --help %s\n", USAGE_OPTSTR_HELP); printf( " -v, --version %s\n", USAGE_OPTSTR_VERSION); printf(USAGE_MAN_TAIL("col(1)")); exit(EXIT_SUCCESS); } static inline void col_putchar(wchar_t ch) { if (putwchar(ch) == WEOF) err(EXIT_FAILURE, _("write failed")); } /* * Print a number of newline/half newlines. If fine flag is set, nblank_lines * is the number of half line feeds, otherwise it is the number of whole line * feeds. */ static void flush_blanks(struct col_ctl *ctl) { int half = 0; ssize_t i, nb = ctl->nblank_lines; if (nb & 1) { if (ctl->fine) half = 1; else nb++; } nb /= 2; for (i = nb; --i >= 0;) col_putchar(NL); if (half) { col_putchar(ESC); col_putchar('9'); if (!nb) col_putchar(CR); } ctl->nblank_lines = 0; } /* * Write a line to stdout taking care of space to tab conversion (-h flag) * and character set shifts. */ static void flush_line(struct col_ctl *ctl, struct col_line *l) { struct col_char *c, *endc; size_t nchars = l->l_line_len, last_col = 0, this_col; if (l->l_needs_sort) { static struct col_char *sorted = NULL; static size_t count_size = 0, *count = NULL, sorted_size = 0; size_t i, tot; /* * Do an O(n) sort on l->l_line by column being careful to * preserve the order of characters in the same column. */ if (sorted_size < l->l_lsize) { sorted_size = l->l_lsize; sorted = xrealloc(sorted, sizeof(struct col_char) * sorted_size); } if (count_size <= l->l_max_col) { count_size = l->l_max_col + 1; count = xrealloc(count, sizeof(size_t) * count_size); } memset(count, 0, sizeof(size_t) * l->l_max_col + 1); for (i = nchars, c = l->l_line; c && 0 < i; i--, c++) count[c->c_column]++; /* * calculate running total (shifted down by 1) to use as * indices into new line. */ for (tot = 0, i = 0; i <= l->l_max_col; i++) { size_t save = count[i]; count[i] = tot; tot += save; } for (i = nchars, c = l->l_line; 0 < i; i--, c++) sorted[count[c->c_column]++] = *c; c = sorted; } else c = l->l_line; while (0 < nchars) { this_col = c->c_column; endc = c; /* find last character */ do { ++endc; } while (0 < --nchars && this_col == endc->c_column); if (ctl->no_backspaces) { /* print only the last character */ c = endc - 1; if (0 < nchars && endc->c_column < this_col + c->c_width) continue; } if (last_col < this_col) { /* tabs and spaces handling */ ssize_t nspace = this_col - last_col; if (ctl->compress_spaces && 1 < nspace) { ssize_t ntabs; ntabs = this_col / 8 - last_col / 8; if (0 < ntabs) { nspace = this_col & 7; while (0 <= --ntabs) col_putchar(TAB); } } while (0 <= --nspace) col_putchar(SPACE); last_col = this_col; } for (;;) { /* SO / SI character set changing */ if (c->c_set != ctl->last_set) { switch (c->c_set) { case CS_NORMAL: col_putchar(SI); break; case CS_ALTERNATE: col_putchar(SO); break; default: abort(); } ctl->last_set = c->c_set; } /* output a character */ col_putchar(c->c_char); /* rubout control chars from output */ if (c + 1 < endc) { int i; for (i = 0; i < c->c_width; i++) col_putchar(BS); } if (endc <= ++c) break; } last_col += (c - 1)->c_width; } } static struct col_line *alloc_line(struct col_ctl *ctl) { struct col_line *l; size_t i; if (!ctl->line_freelist) { l = xmalloc(sizeof(struct col_line) * NALLOC); #ifdef COL_DEALLOCATE_ON_EXIT if (ctl->alloc_root == NULL) { ctl->alloc_root = xcalloc(1, sizeof(struct col_alloc)); ctl->alloc_root->l = l; ctl->alloc_head = ctl->alloc_root; } else { ctl->alloc_head->next = xcalloc(1, sizeof(struct col_alloc)); ctl->alloc_head = ctl->alloc_head->next; ctl->alloc_head->l = l; } #endif ctl->line_freelist = l; for (i = 1; i < NALLOC; i++, l++) l->l_next = l + 1; l->l_next = NULL; } l = ctl->line_freelist; ctl->line_freelist = l->l_next; memset(l, 0, sizeof(struct col_line)); return l; } static void free_line(struct col_ctl *ctl, struct col_line *l) { l->l_next = ctl->line_freelist; ctl->line_freelist = l; } static void flush_lines(struct col_ctl *ctl, ssize_t nflush) { struct col_line *l; while (0 <= --nflush) { l = ctl->lines; ctl->lines = l->l_next; if (l->l_line) { flush_blanks(ctl); flush_line(ctl, l); } ctl->nblank_lines++; free(l->l_line); free_line(ctl, l); } if (ctl->lines) ctl->lines->l_prev = NULL; } static int handle_not_graphic(struct col_ctl *ctl, struct col_lines *lns) { switch (lns->ch) { case BS: if (lns->cur_col == 0) return 1; /* can't go back further */ if (lns->c) lns->cur_col -= lns->c->c_width; else lns->cur_col -= 1; return 1; case CR: lns->cur_col = 0; return 1; case ESC: switch (getwchar()) { /* just ignore EOF */ case RLF: lns->cur_line -= 2; break; case RHLF: lns->cur_line -= 1; break; case FHLF: lns->cur_line += 1; if (0 < lns->cur_line && lns->max_line < (size_t)lns->cur_line) lns->max_line = lns->cur_line; break; default: break; } return 1; case NL: lns->cur_line += 2; if (0 < lns->cur_line && lns->max_line < (size_t)lns->cur_line) lns->max_line = lns->cur_line; lns->cur_col = 0; return 1; case SPACE: lns->cur_col += 1; return 1; case SI: lns->cur_set = CS_NORMAL; return 1; case SO: lns->cur_set = CS_ALTERNATE; return 1; case TAB: /* adjust column */ lns->cur_col |= 7; lns->cur_col += 1; return 1; case VT: lns->cur_line -= 2; return 1; default: break; } if (iswspace(lns->ch)) { if (0 < wcwidth(lns->ch)) lns->cur_col += wcwidth(lns->ch); return 1; } if (!ctl->pass_unknown_seqs) return 1; return 0; } static void update_cur_line(struct col_ctl *ctl, struct col_lines *lns) { ssize_t nmove; lns->adjust = 0; nmove = lns->cur_line - lns->this_line; if (!ctl->fine) { /* round up to next line */ if (lns->cur_line & 1) { lns->adjust = 1; nmove++; } } if (nmove < 0) { for (; nmove < 0 && ctl->l->l_prev; nmove++) ctl->l = ctl->l->l_prev; if (nmove) { if (lns->nflushd_lines == 0) { /* * Allow backup past first line if nothing * has been flushed yet. */ for (; nmove < 0; nmove++) { struct col_line *lnew = alloc_line(ctl); ctl->l->l_prev = lnew; lnew->l_next = ctl->l; ctl->l = ctl->lines = lnew; lns->extra_lines += 1; } } else { if (!lns->warned) { warnx(_("warning: can't back up %s."), lns->cur_line < 0 ? _("past first line") : _("-- line already flushed")); lns->warned = 1; } lns->cur_line -= nmove; } } } else { /* may need to allocate here */ for (; 0 < nmove && ctl->l->l_next; nmove--) ctl->l = ctl->l->l_next; for (; 0 < nmove; nmove--) { struct col_line *lnew = alloc_line(ctl); lnew->l_prev = ctl->l; ctl->l->l_next = lnew; ctl->l = lnew; } } lns->this_line = lns->cur_line + lns->adjust; nmove = lns->this_line - lns->nflushd_lines; if (0 < nmove && ctl->max_bufd_lines + BUFFER_MARGIN <= (size_t)nmove) { lns->nflushd_lines += nmove - ctl->max_bufd_lines; flush_lines(ctl, nmove - ctl->max_bufd_lines); } } static void parse_options(struct col_ctl *ctl, int argc, char **argv) { static const struct option longopts[] = { { "no-backspaces", no_argument, NULL, 'b' }, { "fine", no_argument, NULL, 'f' }, { "pass", no_argument, NULL, 'p' }, { "tabs", no_argument, NULL, 'h' }, { "spaces", no_argument, NULL, 'x' }, { "lines", required_argument, NULL, 'l' }, { "version", no_argument, NULL, 'V' }, { "help", no_argument, NULL, 'H' }, { NULL, 0, NULL, 0 } }; static const ul_excl_t excl[] = { { 'h', 'x' }, { 0 } }; int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; int opt; while ((opt = getopt_long(argc, argv, "bfhl:pxVH", longopts, NULL)) != -1) { err_exclusive_options(opt, longopts, excl, excl_st); switch (opt) { case 'b': /* do not output backspaces */ ctl->no_backspaces = 1; break; case 'f': /* allow half forward line feeds */ ctl->fine = 1; break; case 'h': /* compress spaces into tabs */ ctl->compress_spaces = 1; break; case 'l': /* * Buffered line count, which is a value in half * lines e.g. twice the amount specified. */ ctl->max_bufd_lines = strtou32_or_err(optarg, _("bad -l argument")) * 2; break; case 'p': ctl->pass_unknown_seqs = 1; break; case 'x': /* do not compress spaces into tabs */ ctl->compress_spaces = 0; break; case 'V': print_version(EXIT_SUCCESS); case 'H': usage(); default: errtryhelp(EXIT_FAILURE); } } if (optind != argc) { warnx(_("bad usage")); errtryhelp(EXIT_FAILURE); } } #ifdef COL_DEALLOCATE_ON_EXIT static void free_line_allocations(struct col_alloc *root) { struct col_alloc *next; while (root) { next = root->next; free(root->l); free(root); root = next; } } #endif static void process_char(struct col_ctl *ctl, struct col_lines *lns) { /* Deal printable characters */ if (!iswgraph(lns->ch) && handle_not_graphic(ctl, lns)) return; /* Must stuff ch in a line - are we at the right one? */ if ((size_t)lns->cur_line != lns->this_line - lns->adjust) update_cur_line(ctl, lns); /* Does line buffer need to grow? */ if (ctl->l->l_lsize <= ctl->l->l_line_len + 1) { size_t need; need = ctl->l->l_lsize ? ctl->l->l_lsize * 2 : NALLOC; ctl->l->l_line = xrealloc(ctl->l->l_line, need * sizeof(struct col_char)); ctl->l->l_lsize = need; } /* Store character */ lns->c = &ctl->l->l_line[ctl->l->l_line_len++]; lns->c->c_char = lns->ch; lns->c->c_set = lns->cur_set; if (0 < lns->cur_col) lns->c->c_column = lns->cur_col; else lns->c->c_column = 0; lns->c->c_width = wcwidth(lns->ch); /* * If things are put in out of order, they will need sorting * when it is flushed. */ if (lns->cur_col < ctl->l->l_max_col) ctl->l->l_needs_sort = 1; else ctl->l->l_max_col = lns->cur_col; if (0 < lns->c->c_width) lns->cur_col += lns->c->c_width; } int main(int argc, char **argv) { struct col_ctl ctl = { .compress_spaces = 1, .last_set = CS_NORMAL, .max_bufd_lines = BUFFER_MARGIN * 2, }; struct col_lines lns = { .cur_set = CS_NORMAL, }; int ret = EXIT_SUCCESS; setlocale(LC_ALL, ""); bindtextdomain(PACKAGE, LOCALEDIR); textdomain(PACKAGE); close_stdout_atexit(); ctl.lines = ctl.l = alloc_line(&ctl); parse_options(&ctl, argc, argv); while (feof(stdin) == 0) { errno = 0; /* Get character */ lns.ch = getwchar(); if (lns.ch == WEOF) { if (errno == EILSEQ) { /* Illegal multibyte sequence */ int c; char buf[5]; size_t len, i; c = getchar(); if (c == EOF) break; sprintf(buf, "\\x%02x", (unsigned char) c); len = strlen(buf); for (i = 0; i < len; i++) { lns.ch = buf[i]; process_char(&ctl, &lns); } } else /* end of file */ break; } else /* the common case */ process_char(&ctl, &lns); } /* goto the last line that had a character on it */ for (; ctl.l->l_next; ctl.l = ctl.l->l_next) lns.this_line++; if (lns.max_line == 0 && lns.cur_col == 0) { #ifdef COL_DEALLOCATE_ON_EXIT free_line_allocations(ctl.alloc_root); #endif return EXIT_SUCCESS; /* no lines, so just exit */ } flush_lines(&ctl, lns.this_line - lns.nflushd_lines + lns.extra_lines + 1); /* make sure we leave things in a sane state */ if (ctl.last_set != CS_NORMAL) col_putchar(SI); /* flush out the last few blank lines */ ctl.nblank_lines = lns.max_line - lns.this_line; if (lns.max_line & 1) ctl.nblank_lines++; else if (!ctl.nblank_lines) /* missing a \n on the last line? */ ctl.nblank_lines = 2; flush_blanks(&ctl); #ifdef COL_DEALLOCATE_ON_EXIT free_line_allocations(ctl.alloc_root); #endif return ret; }