summaryrefslogtreecommitdiffstats
path: root/src/unexpand.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/unexpand.c')
-rw-r--r--src/unexpand.c323
1 files changed, 323 insertions, 0 deletions
diff --git a/src/unexpand.c b/src/unexpand.c
new file mode 100644
index 0000000..5a2283f
--- /dev/null
+++ b/src/unexpand.c
@@ -0,0 +1,323 @@
+/* unexpand - convert blanks to tabs
+ Copyright (C) 1989-2023 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* By default, convert only maximal strings of initial blanks and tabs
+ into tabs.
+ Preserves backspace characters in the output; they decrement the
+ column count for tab calculations.
+ The default action is equivalent to -8.
+
+ Options:
+ --tabs=tab1[,tab2[,...]]
+ -t tab1[,tab2[,...]]
+ -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
+ columns apart instead of the default 8. Otherwise,
+ set the tabs at columns tab1, tab2, etc. (numbered from
+ 0); preserve any blanks beyond the tab stops given.
+ --all
+ -a Use tabs wherever they would replace 2 or more blanks,
+ not just at the beginnings of lines.
+
+ David MacKenzie <djm@gnu.ai.mit.edu> */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+#include "expand-common.h"
+
+/* The official name of this program (e.g., no 'g' prefix). */
+#define PROGRAM_NAME "unexpand"
+
+#define AUTHORS proper_name ("David MacKenzie")
+
+
+
+/* For long options that have no equivalent short option, use a
+ non-character as a pseudo short option, starting with CHAR_MAX + 1. */
+enum
+{
+ CONVERT_FIRST_ONLY_OPTION = CHAR_MAX + 1
+};
+
+static struct option const longopts[] =
+{
+ {"tabs", required_argument, nullptr, 't'},
+ {"all", no_argument, nullptr, 'a'},
+ {"first-only", no_argument, nullptr, CONVERT_FIRST_ONLY_OPTION},
+ {GETOPT_HELP_OPTION_DECL},
+ {GETOPT_VERSION_OPTION_DECL},
+ {nullptr, 0, nullptr, 0}
+};
+
+void
+usage (int status)
+{
+ if (status != EXIT_SUCCESS)
+ emit_try_help ();
+ else
+ {
+ printf (_("\
+Usage: %s [OPTION]... [FILE]...\n\
+"),
+ program_name);
+ fputs (_("\
+Convert blanks in each FILE to tabs, writing to standard output.\n\
+"), stdout);
+
+ emit_stdin_note ();
+ emit_mandatory_arg_note ();
+
+ fputs (_("\
+ -a, --all convert all blanks, instead of just initial blanks\n\
+ --first-only convert only leading sequences of blanks (overrides -a)\n\
+ -t, --tabs=N have tabs N characters apart instead of 8 (enables -a)\n\
+"), stdout);
+ emit_tab_list_info ();
+ fputs (HELP_OPTION_DESCRIPTION, stdout);
+ fputs (VERSION_OPTION_DESCRIPTION, stdout);
+ emit_ancillary_info (PROGRAM_NAME);
+ }
+ exit (status);
+}
+
+/* Change blanks to tabs, writing to stdout.
+ Read each file in 'file_list', in order. */
+
+static void
+unexpand (void)
+{
+ /* Input stream. */
+ FILE *fp = next_file (nullptr);
+
+ /* The array of pending blanks. In non-POSIX locales, blanks can
+ include characters other than spaces, so the blanks must be
+ stored, not merely counted. */
+ char *pending_blank;
+
+ if (!fp)
+ return;
+
+ /* The worst case is a non-blank character, then one blank, then a
+ tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so
+ allocate MAX_COLUMN_WIDTH bytes to store the blanks. */
+ pending_blank = xmalloc (max_column_width);
+
+ while (true)
+ {
+ /* Input character, or EOF. */
+ int c;
+
+ /* If true, perform translations. */
+ bool convert = true;
+
+
+ /* The following variables have valid values only when CONVERT
+ is true: */
+
+ /* Column of next input character. */
+ uintmax_t column = 0;
+
+ /* Column the next input tab stop is on. */
+ uintmax_t next_tab_column = 0;
+
+ /* Index in TAB_LIST of next tab stop to examine. */
+ size_t tab_index = 0;
+
+ /* If true, the first pending blank came just before a tab stop. */
+ bool one_blank_before_tab_stop = false;
+
+ /* If true, the previous input character was a blank. This is
+ initially true, since initial strings of blanks are treated
+ as if the line was preceded by a blank. */
+ bool prev_blank = true;
+
+ /* Number of pending columns of blanks. */
+ size_t pending = 0;
+
+
+ /* Convert a line of text. */
+
+ do
+ {
+ while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
+ continue;
+
+ if (convert)
+ {
+ bool blank = !! isblank (c);
+
+ if (blank)
+ {
+ bool last_tab;
+
+ next_tab_column = get_next_tab_column (column, &tab_index,
+ &last_tab);
+
+ if (last_tab)
+ convert = false;
+
+ if (convert)
+ {
+ if (next_tab_column < column)
+ error (EXIT_FAILURE, 0, _("input line is too long"));
+
+ if (c == '\t')
+ {
+ column = next_tab_column;
+
+ if (pending)
+ pending_blank[0] = '\t';
+ }
+ else
+ {
+ column++;
+
+ if (! (prev_blank && column == next_tab_column))
+ {
+ /* It is not yet known whether the pending blanks
+ will be replaced by tabs. */
+ if (column == next_tab_column)
+ one_blank_before_tab_stop = true;
+ pending_blank[pending++] = c;
+ prev_blank = true;
+ continue;
+ }
+
+ /* Replace the pending blanks by a tab or two. */
+ pending_blank[0] = c = '\t';
+ }
+
+ /* Discard pending blanks, unless it was a single
+ blank just before the previous tab stop. */
+ pending = one_blank_before_tab_stop;
+ }
+ }
+ else if (c == '\b')
+ {
+ /* Go back one column, and force recalculation of the
+ next tab stop. */
+ column -= !!column;
+ next_tab_column = column;
+ tab_index -= !!tab_index;
+ }
+ else
+ {
+ column++;
+ if (!column)
+ error (EXIT_FAILURE, 0, _("input line is too long"));
+ }
+
+ if (pending)
+ {
+ if (pending > 1 && one_blank_before_tab_stop)
+ pending_blank[0] = '\t';
+ if (fwrite (pending_blank, 1, pending, stdout) != pending)
+ write_error ();
+ pending = 0;
+ one_blank_before_tab_stop = false;
+ }
+
+ prev_blank = blank;
+ convert &= convert_entire_line || blank;
+ }
+
+ if (c < 0)
+ {
+ free (pending_blank);
+ return;
+ }
+
+ if (putchar (c) < 0)
+ write_error ();
+ }
+ while (c != '\n');
+ }
+}
+
+int
+main (int argc, char **argv)
+{
+ bool have_tabval = false;
+ uintmax_t tabval IF_LINT ( = 0);
+ int c;
+
+ /* If true, cancel the effect of any -a (explicit or implicit in -t),
+ so that only leading blanks will be considered. */
+ bool convert_first_only = false;
+
+ initialize_main (&argc, &argv);
+ set_program_name (argv[0]);
+ setlocale (LC_ALL, "");
+ bindtextdomain (PACKAGE, LOCALEDIR);
+ textdomain (PACKAGE);
+
+ atexit (close_stdout);
+
+ while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, nullptr))
+ != -1)
+ {
+ switch (c)
+ {
+ case '?':
+ usage (EXIT_FAILURE);
+ case 'a':
+ convert_entire_line = true;
+ break;
+ case 't':
+ convert_entire_line = true;
+ parse_tab_stops (optarg);
+ break;
+ case CONVERT_FIRST_ONLY_OPTION:
+ convert_first_only = true;
+ break;
+ case ',':
+ if (have_tabval)
+ add_tab_stop (tabval);
+ have_tabval = false;
+ break;
+ case_GETOPT_HELP_CHAR;
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
+ default:
+ if (!have_tabval)
+ {
+ tabval = 0;
+ have_tabval = true;
+ }
+ if (!DECIMAL_DIGIT_ACCUMULATE (tabval, c - '0', uintmax_t))
+ error (EXIT_FAILURE, 0, _("tab stop value is too large"));
+ break;
+ }
+ }
+
+ if (convert_first_only)
+ convert_entire_line = false;
+
+ if (have_tabval)
+ add_tab_stop (tabval);
+
+ finalize_tab_stops ();
+
+ set_file_list ((optind < argc) ? &argv[optind] : nullptr);
+
+ unexpand ();
+
+ cleanup_file_list_stdin ();
+
+ return exit_status;
+}