summaryrefslogtreecommitdiffstats
path: root/src/sed/sed/fmt.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/sed/sed/fmt.c')
-rw-r--r--src/sed/sed/fmt.c587
1 files changed, 587 insertions, 0 deletions
diff --git a/src/sed/sed/fmt.c b/src/sed/sed/fmt.c
new file mode 100644
index 0000000..64600a0
--- /dev/null
+++ b/src/sed/sed/fmt.c
@@ -0,0 +1,587 @@
+/* `L' command implementation for GNU sed, based on GNU fmt 1.22.
+ Copyright (C) 1994, 1995, 1996, 2002, 2003 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* GNU fmt was written by Ross Paterson <rap@doc.ic.ac.uk>. */
+
+#include "sed.h"
+
+#include <stdio.h>
+#include <ctype.h>
+#include <sys/types.h>
+
+#if HAVE_LIMITS_H
+# include <limits.h>
+#endif
+
+#ifndef UINT_MAX
+# define UINT_MAX ((unsigned int) ~(unsigned int) 0)
+#endif
+
+#ifndef INT_MAX
+# define INT_MAX ((int) (UINT_MAX >> 1))
+#endif
+
+/* The following parameters represent the program's idea of what is
+ "best". Adjust to taste, subject to the caveats given. */
+
+/* Prefer lines to be LEEWAY % shorter than the maximum width, giving
+ room for optimization. */
+#define LEEWAY 7
+
+/* Costs and bonuses are expressed as the equivalent departure from the
+ optimal line length, multiplied by 10. e.g. assigning something a
+ cost of 50 means that it is as bad as a line 5 characters too short
+ or too long. The definition of SHORT_COST(n) should not be changed.
+ However, EQUIV(n) may need tuning. */
+
+typedef long COST;
+
+#define MAXCOST (~(((unsigned long) 1) << (8 * sizeof (COST) -1)))
+
+#define SQR(n) ((n) * (n))
+#define EQUIV(n) SQR ((COST) (n))
+
+/* Cost of a filled line n chars longer or shorter than best_width. */
+#define SHORT_COST(n) EQUIV ((n) * 10)
+
+/* Cost of the difference between adjacent filled lines. */
+#define RAGGED_COST(n) (SHORT_COST (n) / 2)
+
+/* Basic cost per line. */
+#define LINE_COST EQUIV (70)
+
+/* Cost of breaking a line after the first word of a sentence, where
+ the length of the word is N. */
+#define WIDOW_COST(n) (EQUIV (200) / ((n) + 2))
+
+/* Cost of breaking a line before the last word of a sentence, where
+ the length of the word is N. */
+#define ORPHAN_COST(n) (EQUIV (150) / ((n) + 2))
+
+/* Bonus for breaking a line at the end of a sentence. */
+#define SENTENCE_BONUS EQUIV (50)
+
+/* Cost of breaking a line after a period not marking end of a sentence.
+ With the definition of sentence we are using (borrowed from emacs, see
+ get_line()) such a break would then look like a sentence break. Hence
+ we assign a very high cost -- it should be avoided unless things are
+ really bad. */
+#define NOBREAK_COST EQUIV (600)
+
+/* Bonus for breaking a line before open parenthesis. */
+#define PAREN_BONUS EQUIV (40)
+
+/* Bonus for breaking a line after other punctuation. */
+#define PUNCT_BONUS EQUIV(40)
+
+/* Credit for breaking a long paragraph one line later. */
+#define LINE_CREDIT EQUIV(3)
+
+/* Size of paragraph buffer in words. Longer paragraphs are handled
+ neatly (cf. flush_paragraph()), so there's little to gain by making
+ these larger. */
+#define MAXWORDS 1000
+
+#define GETC() (parabuf == end_of_parabuf ? EOF : *parabuf++)
+
+/* Extra ctype(3)-style macros. */
+
+#define isopen(c) (strchr ("([`'\"", (c)) != NULL)
+#define isclose(c) (strchr (")]'\"", (c)) != NULL)
+#define isperiod(c) (strchr (".?!", (c)) != NULL)
+
+/* Size of a tab stop, for expansion on input and re-introduction on
+ output. */
+#define TABWIDTH 8
+
+/* Word descriptor structure. */
+
+typedef struct Word WORD;
+
+struct Word
+ {
+
+ /* Static attributes determined during input. */
+
+ const char *text; /* the text of the word */
+ short length; /* length of this word */
+ short space; /* the size of the following space */
+ unsigned paren:1; /* starts with open paren */
+ unsigned period:1; /* ends in [.?!])* */
+ unsigned punct:1; /* ends in punctuation */
+ unsigned final:1; /* end of sentence */
+
+ /* The remaining fields are computed during the optimization. */
+
+ short line_length; /* length of the best line starting here */
+ COST best_cost; /* cost of best paragraph starting here */
+ WORD *next_break; /* break which achieves best_cost */
+ };
+
+/* Forward declarations. */
+
+static bool get_paragraph P_ ((void));
+static int get_line P_ ((int c));
+static int get_space P_ ((int c));
+static int copy_rest P_ ((int c));
+static bool same_para P_ ((int c));
+static void flush_paragraph P_ ((void));
+static void fmt_paragraph P_ ((void));
+static void check_punctuation P_ ((WORD *w));
+static COST base_cost P_ ((WORD *this));
+static COST line_cost P_ ((WORD *next, int len));
+static void put_paragraph P_ ((WORD *finish));
+static void put_line P_ ((WORD *w, int indent));
+static void put_word P_ ((WORD *w));
+static void put_space P_ ((int space));
+
+/* Option values. */
+
+/* User-supplied maximum line width (default WIDTH). The only output
+ lines
+ longer than this will each comprise a single word. */
+static int max_width;
+
+/* Space for the paragraph text. */
+static const char *parabuf;
+
+/* End of space for the paragraph text. */
+static const char *end_of_parabuf;
+
+/* The file on which we output */
+static FILE *outfile;
+
+/* Values derived from the option values. */
+
+/* The preferred width of text lines, set to LEEWAY % less than max_width. */
+static int best_width;
+
+/* Dynamic variables. */
+
+/* Start column of the character most recently read from the input file. */
+static int in_column;
+
+/* Start column of the next character to be written to stdout. */
+static int out_column;
+
+/* The words of a paragraph -- longer paragraphs are handled neatly
+ (cf. flush_paragraph()). */
+static WORD words[MAXWORDS];
+
+/* A pointer into the above word array, indicating the first position
+ after the last complete word. Sometimes it will point at an incomplete
+ word. */
+static WORD *word_limit;
+
+/* Indentation of the first line of the current paragraph. */
+static int first_indent;
+
+/* Indentation of other lines of the current paragraph */
+static int other_indent;
+
+/* The last character read from the input file. */
+static int next_char;
+
+/* If nonzero, the length of the last line output in the current
+ paragraph, used to charge for raggedness at the split point for long
+ paragraphs chosen by fmt_paragraph(). */
+static int last_line_length;
+
+/* read file F and send formatted output to stdout. */
+
+void
+fmt (const char *line, const char *line_end, int max_length, FILE *output_file)
+{
+ parabuf = line;
+ end_of_parabuf = line_end;
+ outfile = output_file;
+
+ max_width = max_length;
+ best_width = max_width * (201 - 2 * LEEWAY) / 200;
+
+ in_column = 0;
+ other_indent = 0;
+ next_char = GETC();
+ while (get_paragraph ())
+ {
+ fmt_paragraph ();
+ put_paragraph (word_limit);
+ }
+}
+
+/* Read a paragraph from input file F. A paragraph consists of a
+ maximal number of non-blank (excluding any prefix) lines
+ with the same indent.
+
+ Return false if end-of-file was encountered before the start of a
+ paragraph, else true. */
+
+static bool
+get_paragraph ()
+{
+ register int c;
+
+ last_line_length = 0;
+ c = next_char;
+
+ /* Scan (and copy) blank lines, and lines not introduced by the prefix. */
+
+ while (c == '\n' || c == EOF)
+ {
+ c = copy_rest (c);
+ if (c == EOF)
+ {
+ next_char = EOF;
+ return false;
+ }
+ putc ('\n', outfile);
+ c = GETC();
+ }
+
+ /* Got a suitable first line for a paragraph. */
+
+ first_indent = in_column;
+ word_limit = words;
+ c = get_line (c);
+
+ /* Read rest of paragraph. */
+
+ other_indent = in_column;
+ while (same_para (c) && in_column == other_indent)
+ c = get_line (c);
+
+ (word_limit - 1)->period = (word_limit - 1)->final = true;
+ next_char = c;
+ return true;
+}
+
+/* Copy to the output a blank line. In the latter, C is \n or EOF.
+ Return the character (\n or EOF) ending the line. */
+
+static int
+copy_rest (register int c)
+{
+ out_column = 0;
+ while (c != '\n' && c != EOF)
+ {
+ putc (c, outfile);
+ c = GETC();
+ }
+ return c;
+}
+
+/* Return true if a line whose first non-blank character after the
+ prefix (if any) is C could belong to the current paragraph,
+ otherwise false. */
+
+static bool
+same_para (register int c)
+{
+ return (c != '\n' && c != EOF);
+}
+
+/* Read a line from the input data given first non-blank character C
+ after the prefix, and the following indent, and break it into words.
+ A word is a maximal non-empty string of non-white characters. A word
+ ending in [.?!]["')\]]* and followed by end-of-line or at least two
+ spaces ends a sentence, as in emacs.
+
+ Return the first non-blank character of the next line. */
+
+static int
+get_line (register int c)
+{
+ int start;
+ register WORD *end_of_word;
+
+ end_of_word = &words[MAXWORDS - 2];
+
+ do
+ { /* for each word in a line */
+
+ /* Scan word. */
+
+ word_limit->text = parabuf - 1;
+ do
+ c = GETC();
+ while (c != EOF && !ISSPACE (c));
+ word_limit->length = parabuf - word_limit->text - (c != EOF);
+ in_column += word_limit->length;
+
+ check_punctuation (word_limit);
+
+ /* Scan inter-word space. */
+
+ start = in_column;
+ c = get_space (c);
+ word_limit->space = in_column - start;
+ word_limit->final = (c == EOF
+ || (word_limit->period
+ && (c == '\n' || word_limit->space > 1)));
+ if (c == '\n' || c == EOF)
+ word_limit->space = word_limit->final ? 2 : 1;
+ if (word_limit == end_of_word)
+ flush_paragraph ();
+ word_limit++;
+ if (c == EOF)
+ {
+ in_column = first_indent;
+ return EOF;
+ }
+ }
+ while (c != '\n');
+
+ in_column = 0;
+ c = GETC();
+ return get_space (c);
+}
+
+/* Read blank characters from the input data, starting with C, and keeping
+ in_column up-to-date. Return first non-blank character. */
+
+static int
+get_space (register int c)
+{
+ for (;;)
+ {
+ if (c == ' ')
+ in_column++;
+ else if (c == '\t')
+ in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
+ else
+ return c;
+ c = GETC();
+ }
+}
+
+/* Set extra fields in word W describing any attached punctuation. */
+
+static void
+check_punctuation (register WORD *w)
+{
+ register const char *start, *finish;
+
+ start = w->text;
+ finish = start + (w->length - 1);
+ w->paren = isopen (*start);
+ w->punct = ISPUNCT (*finish);
+ while (isclose (*finish) && finish > start)
+ finish--;
+ w->period = isperiod (*finish);
+}
+
+/* Flush part of the paragraph to make room. This function is called on
+ hitting the limit on the number of words or characters. */
+
+static void
+flush_paragraph (void)
+{
+ WORD *split_point;
+ register WORD *w;
+ COST best_break;
+
+ /* - format what you have so far as a paragraph,
+ - find a low-cost line break near the end,
+ - output to there,
+ - make that the start of the paragraph. */
+
+ fmt_paragraph ();
+
+ /* Choose a good split point. */
+
+ split_point = word_limit;
+ best_break = MAXCOST;
+ for (w = words->next_break; w != word_limit; w = w->next_break)
+ {
+ if (w->best_cost - w->next_break->best_cost < best_break)
+ {
+ split_point = w;
+ best_break = w->best_cost - w->next_break->best_cost;
+ }
+ if (best_break <= MAXCOST - LINE_CREDIT)
+ best_break += LINE_CREDIT;
+ }
+ put_paragraph (split_point);
+
+ /* Copy words from split_point down to word -- we use memmove because
+ the source and target may overlap. */
+
+ memmove ((char *) words, (char *) split_point,
+ (word_limit - split_point + 1) * sizeof (WORD));
+ word_limit -= split_point - words;
+}
+
+/* Compute the optimal formatting for the whole paragraph by computing
+ and remembering the optimal formatting for each suffix from the empty
+ one to the whole paragraph. */
+
+static void
+fmt_paragraph (void)
+{
+ register WORD *start, *w;
+ register int len;
+ register COST wcost, best;
+ int saved_length;
+
+ word_limit->best_cost = 0;
+ saved_length = word_limit->length;
+ word_limit->length = max_width; /* sentinel */
+
+ for (start = word_limit - 1; start >= words; start--)
+ {
+ best = MAXCOST;
+ len = start == words ? first_indent : other_indent;
+
+ /* At least one word, however long, in the line. */
+
+ w = start;
+ len += w->length;
+ do
+ {
+ w++;
+
+ /* Consider breaking before w. */
+
+ wcost = line_cost (w, len) + w->best_cost;
+ if (start == words && last_line_length > 0)
+ wcost += RAGGED_COST (len - last_line_length);
+ if (wcost < best)
+ {
+ best = wcost;
+ start->next_break = w;
+ start->line_length = len;
+ }
+ len += (w - 1)->space + w->length; /* w > start >= words */
+ }
+ while (len < max_width);
+ start->best_cost = best + base_cost (start);
+ }
+
+ word_limit->length = saved_length;
+}
+
+/* Return the constant component of the cost of breaking before the
+ word THIS. */
+
+static COST
+base_cost (register WORD *this)
+{
+ register COST cost;
+
+ cost = LINE_COST;
+
+ if (this > words)
+ {
+ if ((this - 1)->period)
+ {
+ if ((this - 1)->final)
+ cost -= SENTENCE_BONUS;
+ else
+ cost += NOBREAK_COST;
+ }
+ else if ((this - 1)->punct)
+ cost -= PUNCT_BONUS;
+ else if (this > words + 1 && (this - 2)->final)
+ cost += WIDOW_COST ((this - 1)->length);
+ }
+
+ if (this->paren)
+ cost -= PAREN_BONUS;
+ else if (this->final)
+ cost += ORPHAN_COST (this->length);
+
+ return cost;
+}
+
+/* Return the component of the cost of breaking before word NEXT that
+ depends on LEN, the length of the line beginning there. */
+
+static COST
+line_cost (register WORD *next, register int len)
+{
+ register int n;
+ register COST cost;
+
+ if (next == word_limit)
+ return 0;
+ n = best_width - len;
+ cost = SHORT_COST (n);
+ if (next->next_break != word_limit)
+ {
+ n = len - next->line_length;
+ cost += RAGGED_COST (n);
+ }
+ return cost;
+}
+
+/* Output to stdout a paragraph from word up to (but not including)
+ FINISH, which must be in the next_break chain from word. */
+
+static void
+put_paragraph (register WORD *finish)
+{
+ register WORD *w;
+
+ put_line (words, first_indent);
+ for (w = words->next_break; w != finish; w = w->next_break)
+ put_line (w, other_indent);
+}
+
+/* Output to stdout the line beginning with word W, beginning in column
+ INDENT, including the prefix (if any). */
+
+static void
+put_line (register WORD *w, int indent)
+{
+ register WORD *endline;
+ out_column = 0;
+ put_space (indent);
+
+ endline = w->next_break - 1;
+ for (; w != endline; w++)
+ {
+ put_word (w);
+ put_space (w->space);
+ }
+ put_word (w);
+ last_line_length = out_column;
+ putc ('\n', outfile);
+}
+
+/* Output to stdout the word W. */
+
+static void
+put_word (register WORD *w)
+{
+ register const char *s;
+ register int n;
+
+ s = w->text;
+ for (n = w->length; n != 0; n--)
+ putc (*s++, outfile);
+ out_column += w->length;
+}
+
+/* Output to stdout SPACE spaces, or equivalent tabs. */
+
+static void
+put_space (int space)
+{
+ out_column += space;
+ while (space--)
+ putc (' ', outfile);
+}