Adding upstream version 9.1.upstream/9.1 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 16:11:47 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 16:11:47 +0000
commit: 758f820bcc0f68aeebac1717e537ca13a320b909 (patch)
tree: 48111ece75cf4f98316848b37a7e26356e00669e /src/ptx.c
parent: Initial commit. (diff)
download: coreutils-758f820bcc0f68aeebac1717e537ca13a320b909.tar.xz
coreutils-758f820bcc0f68aeebac1717e537ca13a320b909.zip
1 files changed, 2049 insertions, 0 deletions
diff --git a/src/ptx.c b/src/ptx.c
new file mode 100644
index 0000000..09b5444
--- /dev/null
+++ b/src/ptx.c
@@ -0,0 +1,2049 @@
+/* Permuted index for GNU, with keywords in their context.
+   Copyright (C) 1990-2022 Free Software Foundation, Inc.
+   François Pinard <pinard@iro.umontreal.ca>, 1988.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+   François Pinard <pinard@iro.umontreal.ca> */
+
+#include <config.h>
+
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+#include "die.h"
+#include <regex.h>
+#include "argmatch.h"
+#include "error.h"
+#include "fadvise.h"
+#include "quote.h"
+#include "read-file.h"
+#include "stdio--.h"
+#include "xstrtol.h"
+
+/* The official name of this program (e.g., no 'g' prefix).  */
+#define PROGRAM_NAME "ptx"
+
+/* TRANSLATORS: Please translate "F. Pinard" to "François Pinard"
+   if "ç" (c-with-cedilla) is available in the translation's character
+   set and encoding.  */
+#define AUTHORS proper_name_utf8 ("F. Pinard", "Fran\xc3\xa7ois Pinard")
+
+/* Number of possible characters in a byte.  */
+#define CHAR_SET_SIZE 256
+
+#define ISODIGIT(C) ((C) >= '0' && (C) <= '7')
+#define HEXTOBIN(C) ((C) >= 'a' && (C) <= 'f' ? (C)-'a'+10 \
+                     : (C) >= 'A' && (C) <= 'F' ? (C)-'A'+10 : (C)-'0')
+#define OCTTOBIN(C) ((C) - '0')
+
+/* Debugging the memory allocator.  */
+
+#if WITH_DMALLOC
+# define MALLOC_FUNC_CHECK 1
+# include <dmalloc.h>
+#endif
+
+/* Global definitions.  */
+
+/* FIXME: There are many unchecked integer overflows in this file,
+   and in theory they could cause this command to have undefined
+   behavior given large inputs or options.  This command should
+   diagnose any such overflow and exit.  */
+
+/* Program options.  */
+
+enum Format
+{
+  UNKNOWN_FORMAT,		/* output format still unknown */
+  DUMB_FORMAT,			/* output for a dumb terminal */
+  ROFF_FORMAT,			/* output for 'troff' or 'nroff' */
+  TEX_FORMAT			/* output for 'TeX' or 'LaTeX' */
+};
+
+static bool gnu_extensions = true;	/* trigger all GNU extensions */
+static bool auto_reference = false;	/* refs are 'file_name:line_number:' */
+static bool input_reference = false;	/* refs at beginning of input lines */
+static bool right_reference = false;	/* output refs after right context  */
+static ptrdiff_t line_width = 72;	/* output line width in characters */
+static ptrdiff_t gap_size = 3;	/* number of spaces between output fields */
+static char const *truncation_string = "/";
+                                /* string used to mark line truncations */
+static char const *macro_name = "xx";	/* macro name for roff or TeX output */
+static enum Format output_format = UNKNOWN_FORMAT;
+                                /* output format */
+
+static bool ignore_case = false;	/* fold lower to upper for sorting */
+static char const *break_file = NULL;	/* name of the 'Break chars' file */
+static char const *only_file = NULL;	/* name of the 'Only words' file */
+static char const *ignore_file = NULL;	/* name of the 'Ignore words' file */
+
+/* Options that use regular expressions.  */
+struct regex_data
+{
+  /* The original regular expression, as a string.  */
+  char const *string;
+
+  /* The compiled regular expression, and its fastmap.  */
+  struct re_pattern_buffer pattern;
+  char fastmap[UCHAR_MAX + 1];
+};
+
+static struct regex_data context_regex;	/* end of context */
+static struct regex_data word_regex;	/* keyword */
+
+/* A BLOCK delimit a region in memory of arbitrary size, like the copy of a
+   whole file.  A WORD is similar, except it is intended for smaller regions.
+   A WORD_TABLE may contain several WORDs.  */
+
+typedef struct
+  {
+    char *start;		/* pointer to beginning of region */
+    char *end;			/* pointer to end + 1 of region */
+  }
+BLOCK;
+
+typedef struct
+  {
+    char *start;		/* pointer to beginning of region */
+    ptrdiff_t size;		/* length of the region */
+  }
+WORD;
+
+typedef struct
+  {
+    WORD *start;		/* array of WORDs */
+    size_t alloc;		/* allocated length */
+    ptrdiff_t length;		/* number of used entries */
+  }
+WORD_TABLE;
+
+/* Pattern description tables.  */
+
+/* For each character, provide its folded equivalent.  */
+static unsigned char folded_chars[CHAR_SET_SIZE];
+
+/* End of context pattern register indices.  */
+static struct re_registers context_regs;
+
+/* Keyword pattern register indices.  */
+static struct re_registers word_regs;
+
+/* A word characters fastmap is used only when no word regexp has been
+   provided.  A word is then made up of a sequence of one or more characters
+   allowed by the fastmap.  Contains !0 if character allowed in word.  Not
+   only this is faster in most cases, but it simplifies the implementation
+   of the Break files.  */
+static char word_fastmap[CHAR_SET_SIZE];
+
+/* Maximum length of any word read.  */
+static ptrdiff_t maximum_word_length;
+
+/* Maximum width of any reference used.  */
+static ptrdiff_t reference_max_width;
+
+/* Ignore and Only word tables.  */
+
+static WORD_TABLE ignore_table;	/* table of words to ignore */
+static WORD_TABLE only_table;		/* table of words to select */
+
+/* Source text table, and scanning macros.  */
+
+static int number_input_files;	/* number of text input files */
+static intmax_t total_line_count;	/* total number of lines seen so far */
+static char const **input_file_name;	/* array of text input file names */
+static intmax_t *file_line_count;	/* array of line count values at end */
+
+static BLOCK *text_buffers;	/* files to study */
+
+/* SKIP_NON_WHITE used only for getting or skipping the reference.  */
+
+#define SKIP_NON_WHITE(cursor, limit) \
+  while (cursor < limit && ! isspace (to_uchar (*cursor)))		\
+    cursor++
+
+#define SKIP_WHITE(cursor, limit) \
+  while (cursor < limit && isspace (to_uchar (*cursor)))		\
+    cursor++
+
+#define SKIP_WHITE_BACKWARDS(cursor, start) \
+  while (cursor > start && isspace (to_uchar (cursor[-1])))		\
+    cursor--
+
+#define SKIP_SOMETHING(cursor, limit) \
+  if (word_regex.string)						\
+    {									\
+      regoff_t count;							\
+      count = re_match (&word_regex.pattern, cursor, limit - cursor, 0, NULL); \
+      if (count == -2)							\
+        matcher_error ();						\
+      cursor += count == -1 ? 1 : count;				\
+    }									\
+  else if (word_fastmap[to_uchar (*cursor)])				\
+    while (cursor < limit && word_fastmap[to_uchar (*cursor)])		\
+      cursor++;								\
+  else									\
+    cursor++
+
+/* Occurrences table.
+
+   The 'keyword' pointer provides the central word, which is surrounded
+   by a left context and a right context.  The 'keyword' and 'length'
+   field allow full 8-bit characters keys, even including NULs.  At other
+   places in this program, the name 'keyafter' refers to the keyword
+   followed by its right context.
+
+   The left context does not extend, towards the beginning of the file,
+   further than a distance given by the 'left' value.  This value is
+   relative to the keyword beginning, it is usually negative.  This
+   insures that, except for white space, we will never have to backward
+   scan the source text, when it is time to generate the final output
+   lines.
+
+   The right context, indirectly attainable through the keyword end, does
+   not extend, towards the end of the file, further than a distance given
+   by the 'right' value.  This value is relative to the keyword
+   beginning, it is usually positive.
+
+   When automatic references are used, the 'reference' value is the
+   overall line number in all input files read so far, in this case, it
+   is of type intmax_t.  When input references are used, the 'reference'
+   value indicates the distance between the keyword beginning and the
+   start of the reference field, and it fits in ptrdiff_t and is usually
+   negative.  */
+
+typedef struct
+  {
+    WORD key;			/* description of the keyword */
+    ptrdiff_t left;		/* distance to left context start */
+    ptrdiff_t right;		/* distance to right context end */
+    intmax_t reference;		/* reference descriptor */
+    int file_index;		/* corresponding file  */
+  }
+OCCURS;
+
+/* The various OCCURS tables are indexed by the language.  But the time
+   being, there is no such multiple language support.  */
+
+static OCCURS *occurs_table[1];	/* all words retained from the read text */
+static size_t occurs_alloc[1];	/* allocated size of occurs_table */
+static ptrdiff_t number_of_occurs[1]; /* number of used slots in occurs_table */
+
+
+/* Communication among output routines.  */
+
+/* Indicate if special output processing is requested for each character.  */
+static char edited_flag[CHAR_SET_SIZE];
+
+/* Half of line width, reference excluded.  */
+static ptrdiff_t half_line_width;
+
+/* Maximum width of before field.  */
+static ptrdiff_t before_max_width;
+
+/* Maximum width of keyword-and-after field.  */
+static ptrdiff_t keyafter_max_width;
+
+/* Length of string that flags truncation.  */
+static ptrdiff_t truncation_string_length;
+
+/* When context is limited by lines, wraparound may happen on final output:
+   the 'head' pointer gives access to some supplementary left context which
+   will be seen at the end of the output line, the 'tail' pointer gives
+   access to some supplementary right context which will be seen at the
+   beginning of the output line. */
+
+static BLOCK tail;		/* tail field */
+static bool tail_truncation;	/* flag truncation after the tail field */
+
+static BLOCK before;		/* before field */
+static bool before_truncation;	/* flag truncation before the before field */
+
+static BLOCK keyafter;		/* keyword-and-after field */
+static bool keyafter_truncation; /* flag truncation after the keyafter field */
+
+static BLOCK head;		/* head field */
+static bool head_truncation;	/* flag truncation before the head field */
+
+static BLOCK reference;		/* reference field for input reference mode */
+
+/* Miscellaneous routines.  */
+
+/* Diagnose an error in the regular expression matcher.  Then exit.  */
+
+static void
+matcher_error (void)
+{
+  die (EXIT_FAILURE, errno, _("error in regular expression matcher"));
+}
+
+/* Unescape STRING in-place.  */
+
+static void
+unescape_string (char *string)
+{
+  char *cursor;			/* cursor in result */
+  int value;			/* value of \nnn escape */
+  int length;			/* length of \nnn escape */
+
+  cursor = string;
+
+  while (*string)
+    {
+      if (*string == '\\')
+        {
+          string++;
+          switch (*string)
+            {
+            case 'x':		/* \xhhh escape, 3 chars maximum */
+              value = 0;
+              for (length = 0, string++;
+                   length < 3 && isxdigit (to_uchar (*string));
+                   length++, string++)
+                value = value * 16 + HEXTOBIN (*string);
+              if (length == 0)
+                {
+                  *cursor++ = '\\';
+                  *cursor++ = 'x';
+                }
+              else
+                *cursor++ = value;
+              break;
+
+            case '0':		/* \0ooo escape, 3 chars maximum */
+              value = 0;
+              for (length = 0, string++;
+                   length < 3 && ISODIGIT (*string);
+                   length++, string++)
+                value = value * 8 + OCTTOBIN (*string);
+              *cursor++ = value;
+              break;
+
+            case 'a':		/* alert */
+#if __STDC__
+              *cursor++ = '\a';
+#else
+              *cursor++ = 7;
+#endif
+              string++;
+              break;
+
+            case 'b':		/* backspace */
+              *cursor++ = '\b';
+              string++;
+              break;
+
+            case 'c':		/* cancel the rest of the output */
+              while (*string)
+                string++;
+              break;
+
+            case 'f':		/* form feed */
+              *cursor++ = '\f';
+              string++;
+              break;
+
+            case 'n':		/* new line */
+              *cursor++ = '\n';
+              string++;
+              break;
+
+            case 'r':		/* carriage return */
+              *cursor++ = '\r';
+              string++;
+              break;
+
+            case 't':		/* horizontal tab */
+              *cursor++ = '\t';
+              string++;
+              break;
+
+            case 'v':		/* vertical tab */
+#if __STDC__
+              *cursor++ = '\v';
+#else
+              *cursor++ = 11;
+#endif
+              string++;
+              break;
+
+            case '\0':		/* lone backslash at end of string */
+              /* ignore it */
+              break;
+
+            default:
+              *cursor++ = '\\';
+              *cursor++ = *string++;
+              break;
+            }
+        }
+      else
+        *cursor++ = *string++;
+    }
+
+  *cursor = '\0';
+}
+
+/*--------------------------------------------------------------------------.
+| Compile the regex represented by REGEX, diagnose and abort if any error.  |
+`--------------------------------------------------------------------------*/
+
+static void
+compile_regex (struct regex_data *regex)
+{
+  struct re_pattern_buffer *pattern = &regex->pattern;
+  char const *string = regex->string;
+  char const *message;
+
+  pattern->buffer = NULL;
+  pattern->allocated = 0;
+  pattern->fastmap = regex->fastmap;
+  pattern->translate = ignore_case ? folded_chars : NULL;
+
+  message = re_compile_pattern (string, strlen (string), pattern);
+  if (message)
+    die (EXIT_FAILURE, 0, _("%s (for regexp %s)"), message, quote (string));
+
+  /* The fastmap should be compiled before 're_match'.  The following
+     call is not mandatory, because 're_search' is always called sooner,
+     and it compiles the fastmap if this has not been done yet.  */
+
+  re_compile_fastmap (pattern);
+}
+
+/*------------------------------------------------------------------------.
+| This will initialize various tables for pattern match and compiles some |
+| regexps.								  |
+`------------------------------------------------------------------------*/
+
+static void
+initialize_regex (void)
+{
+  int character;		/* character value */
+
+  /* Initialize the case folding table.  */
+
+  if (ignore_case)
+    for (character = 0; character < CHAR_SET_SIZE; character++)
+      folded_chars[character] = toupper (character);
+
+  /* Unless the user already provided a description of the end of line or
+     end of sentence sequence, select an end of line sequence to compile.
+     If the user provided an empty definition, thus disabling end of line
+     or sentence feature, make it NULL to speed up tests.  If GNU
+     extensions are enabled, use end of sentence like in GNU emacs.  If
+     disabled, use end of lines.  */
+
+  if (context_regex.string)
+    {
+      if (!*context_regex.string)
+        context_regex.string = NULL;
+    }
+  else if (gnu_extensions && !input_reference)
+    context_regex.string = "[.?!][]\"')}]*\\($\\|\t\\|  \\)[ \t\n]*";
+  else
+    context_regex.string = "\n";
+
+  if (context_regex.string)
+    compile_regex (&context_regex);
+
+  /* If the user has already provided a non-empty regexp to describe
+     words, compile it.  Else, unless this has already been done through
+     a user provided Break character file, construct a fastmap of
+     characters that may appear in a word.  If GNU extensions enabled,
+     include only letters of the underlying character set.  If disabled,
+     include almost everything, even punctuations; stop only on white
+     space.  */
+
+  if (word_regex.string)
+    compile_regex (&word_regex);
+  else if (!break_file)
+    {
+      if (gnu_extensions)
+        {
+
+          /* Simulate \w+.  */
+
+          for (character = 0; character < CHAR_SET_SIZE; character++)
+            word_fastmap[character] = !! isalpha (character);
+        }
+      else
+        {
+
+          /* Simulate [^ \t\n]+.  */
+
+          memset (word_fastmap, 1, CHAR_SET_SIZE);
+          word_fastmap[' '] = 0;
+          word_fastmap['\t'] = 0;
+          word_fastmap['\n'] = 0;
+        }
+    }
+}
+
+/*------------------------------------------------------------------------.
+| This routine will attempt to swallow a whole file name FILE_NAME into a |
+| contiguous region of memory and return a description of it into BLOCK.  |
+| Standard input is assumed whenever FILE_NAME is NULL, empty or "-".	  |
+|									  |
+| Previously, in some cases, white space compression was attempted while  |
+| inputting text.  This was defeating some regexps like default end of	  |
+| sentence, which checks for two consecutive spaces.  If white space	  |
+| compression is ever reinstated, it should be in output routines.	  |
+`------------------------------------------------------------------------*/
+
+static void
+swallow_file_in_memory (char const *file_name, BLOCK *block)
+{
+  size_t used_length;		/* used length in memory buffer */
+
+  /* As special cases, a file name which is NULL or "-" indicates standard
+     input, which is already opened.  In all other cases, open the file from
+     its name.  */
+  bool using_stdin = !file_name || !*file_name || STREQ (file_name, "-");
+  if (using_stdin)
+    block->start = fread_file (stdin, 0, &used_length);
+  else
+    block->start = read_file (file_name, 0, &used_length);
+
+  if (!block->start)
+    die (EXIT_FAILURE, errno, "%s", quotef (using_stdin ? "-" : file_name));
+
+  if (using_stdin)
+    clearerr (stdin);
+
+  block->end = block->start + used_length;
+}
+
+/* Sort and search routines.  */
+
+/*--------------------------------------------------------------------------.
+| Compare two words, FIRST and SECOND, and return 0 if they are identical.  |
+| Return less than 0 if the first word goes before the second; return	    |
+| greater than 0 if the first word goes after the second.		    |
+|									    |
+| If a word is indeed a prefix of the other, the shorter should go first.   |
+`--------------------------------------------------------------------------*/
+
+static int
+compare_words (const void *void_first, const void *void_second)
+{
+#define first ((const WORD *) void_first)
+#define second ((const WORD *) void_second)
+  ptrdiff_t length;		/* minimum of two lengths */
+  ptrdiff_t counter;		/* cursor in words */
+  int value;			/* value of comparison */
+
+  length = first->size < second->size ? first->size : second->size;
+
+  if (ignore_case)
+    {
+      for (counter = 0; counter < length; counter++)
+        {
+          value = (folded_chars [to_uchar (first->start[counter])]
+                   - folded_chars [to_uchar (second->start[counter])]);
+          if (value != 0)
+            return value;
+        }
+    }
+  else
+    {
+      for (counter = 0; counter < length; counter++)
+        {
+          value = (to_uchar (first->start[counter])
+                   - to_uchar (second->start[counter]));
+          if (value != 0)
+            return value;
+        }
+    }
+
+  return first->size < second->size ? -1 : first->size > second->size;
+#undef first
+#undef second
+}
+
+/*-----------------------------------------------------------------------.
+| Decides which of two OCCURS, FIRST or SECOND, should lexicographically |
+| go first.  In case of a tie, preserve the original order through a	 |
+| pointer comparison.							 |
+`-----------------------------------------------------------------------*/
+
+static int
+compare_occurs (const void *void_first, const void *void_second)
+{
+#define first ((const OCCURS *) void_first)
+#define second ((const OCCURS *) void_second)
+  int value;
+
+  value = compare_words (&first->key, &second->key);
+  return (value ? value
+          : first->key.start < second->key.start ? -1
+          : first->key.start > second->key.start);
+#undef first
+#undef second
+}
+
+/* True if WORD appears in TABLE.  Uses a binary search.  */
+
+ATTRIBUTE_PURE
+static bool
+search_table (WORD *word, WORD_TABLE *table)
+{
+  ptrdiff_t lowest;		/* current lowest possible index */
+  ptrdiff_t highest;		/* current highest possible index */
+  ptrdiff_t middle;		/* current middle index */
+  int value;			/* value from last comparison */
+
+  lowest = 0;
+  highest = table->length - 1;
+  while (lowest <= highest)
+    {
+      middle = (lowest + highest) / 2;
+      value = compare_words (word, table->start + middle);
+      if (value < 0)
+        highest = middle - 1;
+      else if (value > 0)
+        lowest = middle + 1;
+      else
+        return true;
+    }
+  return false;
+}
+
+/*---------------------------------------------------------------------.
+| Sort the whole occurs table in memory.  Presumably, 'qsort' does not |
+| take intermediate copies or table elements, so the sort will be      |
+| stabilized throughout the comparison routine.			       |
+`---------------------------------------------------------------------*/
+
+static void
+sort_found_occurs (void)
+{
+
+  /* Only one language for the time being.  */
+  if (number_of_occurs[0])
+    qsort (occurs_table[0], number_of_occurs[0], sizeof **occurs_table,
+           compare_occurs);
+}
+
+/* Parameter files reading routines.  */
+
+/*----------------------------------------------------------------------.
+| Read a file named FILE_NAME, containing a set of break characters.    |
+| Build a content to the array word_fastmap in which all characters are |
+| allowed except those found in the file.  Characters may be repeated.  |
+`----------------------------------------------------------------------*/
+
+static void
+digest_break_file (char const *file_name)
+{
+  BLOCK file_contents;		/* to receive a copy of the file */
+  char *cursor;			/* cursor in file copy */
+
+  swallow_file_in_memory (file_name, &file_contents);
+
+  /* Make the fastmap and record the file contents in it.  */
+
+  memset (word_fastmap, 1, CHAR_SET_SIZE);
+  for (cursor = file_contents.start; cursor < file_contents.end; cursor++)
+    word_fastmap[to_uchar (*cursor)] = 0;
+
+  if (!gnu_extensions)
+    {
+
+      /* If GNU extensions are enabled, the only way to avoid newline as
+         a break character is to write all the break characters in the
+         file with no newline at all, not even at the end of the file.
+         If disabled, spaces, tabs and newlines are always considered as
+         break characters even if not included in the break file.  */
+
+      word_fastmap[' '] = 0;
+      word_fastmap['\t'] = 0;
+      word_fastmap['\n'] = 0;
+    }
+
+  /* Return the space of the file, which is no more required.  */
+
+  free (file_contents.start);
+}
+
+/*-----------------------------------------------------------------------.
+| Read a file named FILE_NAME, containing one word per line, then	 |
+| construct in TABLE a table of WORD descriptors for them.  The routine	 |
+| swallows the whole file in memory; this is at the expense of space	 |
+| needed for newlines, which are useless; however, the reading is fast.	 |
+`-----------------------------------------------------------------------*/
+
+static void
+digest_word_file (char const *file_name, WORD_TABLE *table)
+{
+  BLOCK file_contents;		/* to receive a copy of the file */
+  char *cursor;			/* cursor in file copy */
+  char *word_start;		/* start of the current word */
+
+  swallow_file_in_memory (file_name, &file_contents);
+
+  table->start = NULL;
+  table->alloc = 0;
+  table->length = 0;
+
+  /* Read the whole file.  */
+
+  cursor = file_contents.start;
+  while (cursor < file_contents.end)
+    {
+
+      /* Read one line, and save the word in contains.  */
+
+      word_start = cursor;
+      while (cursor < file_contents.end && *cursor != '\n')
+        cursor++;
+
+      /* Record the word in table if it is not empty.  */
+
+      if (cursor > word_start)
+        {
+          if (table->length == table->alloc)
+            table->start = x2nrealloc (table->start, &table->alloc,
+                                       sizeof *table->start);
+          table->start[table->length].start = word_start;
+          table->start[table->length].size = cursor - word_start;
+          table->length++;
+        }
+
+      /* This test allows for an incomplete line at end of file.  */
+
+      if (cursor < file_contents.end)
+        cursor++;
+    }
+
+  /* Finally, sort all the words read.  */
+
+  qsort (table->start, table->length, sizeof table->start[0], compare_words);
+}
+
+/* Keyword recognition and selection.  */
+
+/*----------------------------------------------------------------------.
+| For each keyword in the source text, constructs an OCCURS structure.  |
+`----------------------------------------------------------------------*/
+
+static void
+find_occurs_in_text (int file_index)
+{
+  char *cursor;			/* for scanning the source text */
+  char *scan;			/* for scanning the source text also */
+  char *line_start;		/* start of the current input line */
+  char *line_scan;		/* newlines scanned until this point */
+  ptrdiff_t reference_length;	/* length of reference in input mode */
+  WORD possible_key;		/* possible key, to ease searches */
+  OCCURS *occurs_cursor;	/* current OCCURS under construction */
+
+  char *context_start;		/* start of left context */
+  char *context_end;		/* end of right context */
+  char *word_start;		/* start of word */
+  char *word_end;		/* end of word */
+  char *next_context_start;	/* next start of left context */
+
+  const BLOCK *text_buffer = &text_buffers[file_index];
+
+  /* reference_length is always used within 'if (input_reference)'.
+     However, GNU C diagnoses that it may be used uninitialized.  The
+     following assignment is merely to shut it up.  */
+
+  reference_length = 0;
+
+  /* Tracking where lines start is helpful for reference processing.  In
+     auto reference mode, this allows counting lines.  In input reference
+     mode, this permits finding the beginning of the references.
+
+     The first line begins with the file, skip immediately this very first
+     reference in input reference mode, to help further rejection any word
+     found inside it.  Also, unconditionally assigning these variable has
+     the happy effect of shutting up lint.  */
+
+  line_start = text_buffer->start;
+  line_scan = line_start;
+  if (input_reference)
+    {
+      SKIP_NON_WHITE (line_scan, text_buffer->end);
+      reference_length = line_scan - line_start;
+      SKIP_WHITE (line_scan, text_buffer->end);
+    }
+
+  /* Process the whole buffer, one line or one sentence at a time.  */
+
+  for (cursor = text_buffer->start;
+       cursor < text_buffer->end;
+       cursor = next_context_start)
+    {
+
+      /* 'context_start' gets initialized before the processing of each
+         line, or once for the whole buffer if no end of line or sentence
+         sequence separator.  */
+
+      context_start = cursor;
+
+      /* If an end of line or end of sentence sequence is defined and
+         non-empty, 'next_context_start' will be recomputed to be the end of
+         each line or sentence, before each one is processed.  If no such
+         sequence, then 'next_context_start' is set at the end of the whole
+         buffer, which is then considered to be a single line or sentence.
+         This test also accounts for the case of an incomplete line or
+         sentence at the end of the buffer.  */
+
+      next_context_start = text_buffer->end;
+      if (context_regex.string)
+        switch (re_search (&context_regex.pattern, cursor,
+                           text_buffer->end - cursor,
+                           0, text_buffer->end - cursor, &context_regs))
+          {
+          case -2:
+            matcher_error ();
+
+          case -1:
+            break;
+
+          case 0:
+            die (EXIT_FAILURE, 0,
+                 _("error: regular expression has a match of length zero: %s"),
+                 quote (context_regex.string));
+
+          default:
+            next_context_start = cursor + context_regs.end[0];
+            break;
+          }
+
+      /* Include the separator into the right context, but not any suffix
+         white space in this separator; this insures it will be seen in
+         output and will not take more space than necessary.  */
+
+      context_end = next_context_start;
+      SKIP_WHITE_BACKWARDS (context_end, context_start);
+
+      /* Read and process a single input line or sentence, one word at a
+         time.  */
+
+      while (true)
+        {
+          if (word_regex.string)
+
+            /* If a word regexp has been compiled, use it to skip at the
+               beginning of the next word.  If there is no such word, exit
+               the loop.  */
+
+            {
+              regoff_t r = re_search (&word_regex.pattern, cursor,
+                                      context_end - cursor,
+                                      0, context_end - cursor, &word_regs);
+              if (r == -2)
+                matcher_error ();
+              if (r == -1)
+                break;
+              word_start = cursor + word_regs.start[0];
+              word_end = cursor + word_regs.end[0];
+            }
+          else
+
+            /* Avoid re_search and use the fastmap to skip to the
+               beginning of the next word.  If there is no more word in
+               the buffer, exit the loop.  */
+
+            {
+              scan = cursor;
+              while (scan < context_end
+                     && !word_fastmap[to_uchar (*scan)])
+                scan++;
+
+              if (scan == context_end)
+                break;
+
+              word_start = scan;
+
+              while (scan < context_end
+                     && word_fastmap[to_uchar (*scan)])
+                scan++;
+
+              word_end = scan;
+            }
+
+          /* Skip right to the beginning of the found word.  */
+
+          cursor = word_start;
+
+          /* Skip any zero length word.  Just advance a single position,
+             then go fetch the next word.  */
+
+          if (word_end == word_start)
+            {
+              cursor++;
+              continue;
+            }
+
+          /* This is a genuine, non empty word, so save it as a possible
+             key.  Then skip over it.  Also, maintain the maximum length of
+             all words read so far.  It is mandatory to take the maximum
+             length of all words in the file, without considering if they
+             are actually kept or rejected, because backward jumps at output
+             generation time may fall in *any* word.  */
+
+          possible_key.start = cursor;
+          possible_key.size = word_end - word_start;
+          cursor += possible_key.size;
+
+          if (possible_key.size > maximum_word_length)
+            maximum_word_length = possible_key.size;
+
+          /* In input reference mode, update 'line_start' from its previous
+             value.  Count the lines just in case auto reference mode is
+             also selected. If it happens that the word just matched is
+             indeed part of a reference; just ignore it.  */
+
+          if (input_reference)
+            {
+              while (line_scan < possible_key.start)
+                if (*line_scan == '\n')
+                  {
+                    total_line_count++;
+                    line_scan++;
+                    line_start = line_scan;
+                    SKIP_NON_WHITE (line_scan, text_buffer->end);
+                    reference_length = line_scan - line_start;
+                  }
+                else
+                  line_scan++;
+              if (line_scan > possible_key.start)
+                continue;
+            }
+
+          /* Ignore the word if an 'Ignore words' table exists and if it is
+             part of it.  Also ignore the word if an 'Only words' table and
+             if it is *not* part of it.
+
+             It is allowed that both tables be used at once, even if this
+             may look strange for now.  Just ignore a word that would appear
+             in both.  If regexps are eventually implemented for these
+             tables, the Ignore table could then reject words that would
+             have been previously accepted by the Only table.  */
+
+          if (ignore_file && search_table (&possible_key, &ignore_table))
+            continue;
+          if (only_file && !search_table (&possible_key, &only_table))
+            continue;
+
+          /* A non-empty word has been found.  First of all, insure
+             proper allocation of the next OCCURS, and make a pointer to
+             where it will be constructed.  */
+
+          if (number_of_occurs[0] == occurs_alloc[0])
+            occurs_table[0] = x2nrealloc (occurs_table[0],
+                                          &occurs_alloc[0],
+                                          sizeof *occurs_table[0]);
+          occurs_cursor = occurs_table[0] + number_of_occurs[0];
+
+          /* Define the reference field, if any.  */
+
+          if (auto_reference)
+            {
+
+              /* While auto referencing, update 'line_start' from its
+                 previous value, counting lines as we go.  If input
+                 referencing at the same time, 'line_start' has been
+                 advanced earlier, and the following loop is never really
+                 executed.  */
+
+              while (line_scan < possible_key.start)
+                if (*line_scan == '\n')
+                  {
+                    total_line_count++;
+                    line_scan++;
+                    line_start = line_scan;
+                    SKIP_NON_WHITE (line_scan, text_buffer->end);
+                  }
+                else
+                  line_scan++;
+
+              occurs_cursor->reference = total_line_count;
+            }
+          else if (input_reference)
+            {
+
+              /* If only input referencing, 'line_start' has been computed
+                 earlier to detect the case the word matched would be part
+                 of the reference.  The reference position is simply the
+                 value of 'line_start'.  */
+
+              occurs_cursor->reference = line_start - possible_key.start;
+              if (reference_length > reference_max_width)
+                reference_max_width = reference_length;
+            }
+
+          /* Exclude the reference from the context in simple cases.  */
+
+          if (input_reference && line_start == context_start)
+            {
+              SKIP_NON_WHITE (context_start, context_end);
+              SKIP_WHITE (context_start, context_end);
+            }
+
+          /* Completes the OCCURS structure.  */
+
+          occurs_cursor->key = possible_key;
+          occurs_cursor->left = context_start - possible_key.start;
+          occurs_cursor->right = context_end - possible_key.start;
+          occurs_cursor->file_index = file_index;
+
+          number_of_occurs[0]++;
+        }
+    }
+}
+
+/* Formatting and actual output - service routines.  */
+
+/*-----------------------------------------.
+| Prints some NUMBER of spaces on stdout.  |
+`-----------------------------------------*/
+
+static void
+print_spaces (ptrdiff_t number)
+{
+  for (ptrdiff_t counter = number; counter > 0; counter--)
+    putchar (' ');
+}
+
+/*-------------------------------------.
+| Prints the field provided by FIELD.  |
+`-------------------------------------*/
+
+static void
+print_field (BLOCK field)
+{
+  char *cursor;			/* Cursor in field to print */
+
+  /* Whitespace is not really compressed.  Instead, each white space
+     character (tab, vt, ht etc.) is printed as one single space.  */
+
+  for (cursor = field.start; cursor < field.end; cursor++)
+    {
+      unsigned char character = *cursor;
+      if (edited_flag[character])
+        {
+          /* Handle cases which are specific to 'roff' or TeX.  All
+             white space processing is done as the default case of
+             this switch.  */
+
+          switch (character)
+            {
+            case '"':
+              /* In roff output format, double any quote.  */
+              putchar ('"');
+              putchar ('"');
+              break;
+
+            case '$':
+            case '%':
+            case '&':
+            case '#':
+            case '_':
+              /* In TeX output format, precede these with a backslash.  */
+              putchar ('\\');
+              putchar (character);
+              break;
+
+            case '{':
+            case '}':
+              /* In TeX output format, precede these with a backslash and
+                 force mathematical mode.  */
+              printf ("$\\%c$", character);
+              break;
+
+            case '\\':
+              /* In TeX output mode, request production of a backslash.  */
+              fputs ("\\backslash{}", stdout);
+              break;
+
+            default:
+              /* Any other flagged character produces a single space.  */
+              putchar (' ');
+            }
+        }
+      else
+        putchar (*cursor);
+    }
+}
+
+/* Formatting and actual output - planning routines.  */
+
+/*--------------------------------------------------------------------.
+| From information collected from command line options and input file |
+| readings, compute and fix some output parameter values.	      |
+`--------------------------------------------------------------------*/
+
+static void
+fix_output_parameters (void)
+{
+  size_t file_index;		/* index in text input file arrays */
+  intmax_t line_ordinal;	/* line ordinal value for reference */
+  ptrdiff_t reference_width;	/* width for the whole reference */
+  int character;		/* character ordinal */
+  char const *cursor;		/* cursor in some constant strings */
+
+  /* In auto reference mode, the maximum width of this field is
+     precomputed and subtracted from the overall line width.  Add one for
+     the column which separate the file name from the line number.  */
+
+  if (auto_reference)
+    {
+      reference_max_width = 0;
+      for (file_index = 0; file_index < number_input_files; file_index++)
+        {
+          line_ordinal = file_line_count[file_index] + 1;
+          if (file_index > 0)
+            line_ordinal -= file_line_count[file_index - 1];
+          char ordinal_string[INT_BUFSIZE_BOUND (intmax_t)];
+          reference_width = sprintf (ordinal_string, "%"PRIdMAX, line_ordinal);
+          if (input_file_name[file_index])
+            reference_width += strlen (input_file_name[file_index]);
+          if (reference_width > reference_max_width)
+            reference_max_width = reference_width;
+        }
+      reference_max_width++;
+      reference.start = xmalloc (reference_max_width + 1);
+    }
+
+  /* If the reference appears to the left of the output line, reserve some
+     space for it right away, including one gap size.  */
+
+  if ((auto_reference || input_reference) && !right_reference)
+    line_width -= reference_max_width + gap_size;
+  if (line_width < 0)
+    line_width = 0;
+
+  /* The output lines, minimally, will contain from left to right a left
+     context, a gap, and a keyword followed by the right context with no
+     special intervening gap.  Half of the line width is dedicated to the
+     left context and the gap, the other half is dedicated to the keyword
+     and the right context; these values are computed once and for all here.
+     There also are tail and head wrap around fields, used when the keyword
+     is near the beginning or the end of the line, or when some long word
+     cannot fit in, but leave place from wrapped around shorter words.  The
+     maximum width of these fields are recomputed separately for each line,
+     on a case by case basis.  It is worth noting that it cannot happen that
+     both the tail and head fields are used at once.  */
+
+  half_line_width = line_width / 2;
+  before_max_width = half_line_width - gap_size;
+  keyafter_max_width = half_line_width;
+
+  /* If truncation_string is the empty string, make it NULL to speed up
+     tests.  In this case, truncation_string_length will never get used, so
+     there is no need to set it.  */
+
+  if (truncation_string && *truncation_string)
+    truncation_string_length = strlen (truncation_string);
+  else
+    truncation_string = NULL;
+
+  if (gnu_extensions)
+    {
+
+      /* When flagging truncation at the left of the keyword, the
+         truncation mark goes at the beginning of the before field,
+         unless there is a head field, in which case the mark goes at the
+         left of the head field.  When flagging truncation at the right
+         of the keyword, the mark goes at the end of the keyafter field,
+         unless there is a tail field, in which case the mark goes at the
+         end of the tail field.  Only eight combination cases could arise
+         for truncation marks:
+
+         . None.
+         . One beginning the before field.
+         . One beginning the head field.
+         . One ending the keyafter field.
+         . One ending the tail field.
+         . One beginning the before field, another ending the keyafter field.
+         . One ending the tail field, another beginning the before field.
+         . One ending the keyafter field, another beginning the head field.
+
+         So, there is at most two truncation marks, which could appear both
+         on the left side of the center of the output line, both on the
+         right side, or one on either side.  */
+
+      before_max_width -= 2 * truncation_string_length;
+      if (before_max_width < 0)
+        before_max_width = 0;
+      keyafter_max_width -= 2 * truncation_string_length;
+    }
+  else
+    {
+
+      /* I never figured out exactly how UNIX' ptx plans the output width
+         of its various fields.  If GNU extensions are disabled, do not
+         try computing the field widths correctly; instead, use the
+         following formula, which does not completely imitate UNIX' ptx,
+         but almost.  */
+
+      keyafter_max_width -= 2 * truncation_string_length + 1;
+    }
+
+  /* Compute which characters need special output processing.  Initialize
+     by flagging any white space character.  Some systems do not consider
+     form feed as a space character, but we do.  */
+
+  for (character = 0; character < CHAR_SET_SIZE; character++)
+    edited_flag[character] = !! isspace (character);
+  edited_flag['\f'] = 1;
+
+  /* Complete the special character flagging according to selected output
+     format.  */
+
+  switch (output_format)
+    {
+    case UNKNOWN_FORMAT:
+      /* Should never happen.  */
+
+    case DUMB_FORMAT:
+      break;
+
+    case ROFF_FORMAT:
+
+      /* 'Quote' characters should be doubled.  */
+
+      edited_flag['"'] = 1;
+      break;
+
+    case TEX_FORMAT:
+
+      /* Various characters need special processing.  */
+
+      for (cursor = "$%&#_{}\\"; *cursor; cursor++)
+        edited_flag[to_uchar (*cursor)] = 1;
+
+      break;
+    }
+}
+
+/*------------------------------------------------------------------.
+| Compute the position and length of all the output fields, given a |
+| pointer to some OCCURS.					    |
+`------------------------------------------------------------------*/
+
+static void
+define_all_fields (OCCURS *occurs)
+{
+  ptrdiff_t tail_max_width;	/* allowable width of tail field */
+  ptrdiff_t head_max_width;	/* allowable width of head field */
+  char *cursor;			/* running cursor in source text */
+  char *left_context_start;	/* start of left context */
+  char *right_context_end;	/* end of right context */
+  char *left_field_start;	/* conservative start for 'head'/'before' */
+  char const *file_name;	/* file name for reference */
+  intmax_t line_ordinal;	/* line ordinal for reference */
+  char const *buffer_start;	/* start of buffered file for this occurs */
+  char const *buffer_end;	/* end of buffered file for this occurs */
+
+  /* Define 'keyafter', start of left context and end of right context.
+     'keyafter' starts at the saved position for keyword and extend to the
+     right from the end of the keyword, eating separators or full words, but
+     not beyond maximum allowed width for 'keyafter' field or limit for the
+     right context.  Suffix spaces will be removed afterwards.  */
+
+  keyafter.start = occurs->key.start;
+  keyafter.end = keyafter.start + occurs->key.size;
+  left_context_start = keyafter.start + occurs->left;
+  right_context_end = keyafter.start + occurs->right;
+
+  buffer_start = text_buffers[occurs->file_index].start;
+  buffer_end = text_buffers[occurs->file_index].end;
+
+  cursor = keyafter.end;
+  while (cursor < right_context_end
+         && cursor <= keyafter.start + keyafter_max_width)
+    {
+      keyafter.end = cursor;
+      SKIP_SOMETHING (cursor, right_context_end);
+    }
+  if (cursor <= keyafter.start + keyafter_max_width)
+    keyafter.end = cursor;
+
+  keyafter_truncation = truncation_string && keyafter.end < right_context_end;
+
+  SKIP_WHITE_BACKWARDS (keyafter.end, keyafter.start);
+
+  /* When the left context is wide, it might take some time to catch up from
+     the left context boundary to the beginning of the 'head' or 'before'
+     fields.  So, in this case, to speed the catchup, we jump back from the
+     keyword, using some secure distance, possibly falling in the middle of
+     a word.  A secure backward jump would be at least half the maximum
+     width of a line, plus the size of the longest word met in the whole
+     input.  We conclude this backward jump by a skip forward of at least
+     one word.  In this manner, we should not inadvertently accept only part
+     of a word.  From the reached point, when it will be time to fix the
+     beginning of 'head' or 'before' fields, we will skip forward words or
+     delimiters until we get sufficiently near.  */
+
+  if (-occurs->left > half_line_width + maximum_word_length)
+    {
+      left_field_start
+        = keyafter.start - (half_line_width + maximum_word_length);
+      SKIP_SOMETHING (left_field_start, keyafter.start);
+    }
+  else
+    left_field_start = keyafter.start + occurs->left;
+
+  /* 'before' certainly ends at the keyword, but not including separating
+     spaces.  It starts after than the saved value for the left context, by
+     advancing it until it falls inside the maximum allowed width for the
+     before field.  There will be no prefix spaces either.  'before' only
+     advances by skipping single separators or whole words. */
+
+  before.start = left_field_start;
+  before.end = keyafter.start;
+  SKIP_WHITE_BACKWARDS (before.end, before.start);
+
+  while (before.start + before_max_width < before.end)
+    SKIP_SOMETHING (before.start, before.end);
+
+  if (truncation_string)
+    {
+      cursor = before.start;
+      SKIP_WHITE_BACKWARDS (cursor, buffer_start);
+      before_truncation = cursor > left_context_start;
+    }
+  else
+    before_truncation = false;
+
+  SKIP_WHITE (before.start, buffer_end);
+
+  /* The tail could not take more columns than what has been left in the
+     left context field, and a gap is mandatory.  It starts after the
+     right context, and does not contain prefixed spaces.  It ends at
+     the end of line, the end of buffer or when the tail field is full,
+     whichever comes first.  It cannot contain only part of a word, and
+     has no suffixed spaces.  */
+
+  tail_max_width
+    = before_max_width - (before.end - before.start) - gap_size;
+
+  if (tail_max_width > 0)
+    {
+      tail.start = keyafter.end;
+      SKIP_WHITE (tail.start, buffer_end);
+
+      tail.end = tail.start;
+      cursor = tail.end;
+      while (cursor < right_context_end
+             && cursor < tail.start + tail_max_width)
+        {
+          tail.end = cursor;
+          SKIP_SOMETHING (cursor, right_context_end);
+        }
+
+      if (cursor < tail.start + tail_max_width)
+        tail.end = cursor;
+
+      if (tail.end > tail.start)
+        {
+          keyafter_truncation = false;
+          tail_truncation = truncation_string && tail.end < right_context_end;
+        }
+      else
+        tail_truncation = false;
+
+      SKIP_WHITE_BACKWARDS (tail.end, tail.start);
+    }
+  else
+    {
+
+      /* No place left for a tail field.  */
+
+      tail.start = NULL;
+      tail.end = NULL;
+      tail_truncation = false;
+    }
+
+  /* 'head' could not take more columns than what has been left in the right
+     context field, and a gap is mandatory.  It ends before the left
+     context, and does not contain suffixed spaces.  Its pointer is advanced
+     until the head field has shrunk to its allowed width.  It cannot
+     contain only part of a word, and has no suffixed spaces.  */
+
+  head_max_width
+    = keyafter_max_width - (keyafter.end - keyafter.start) - gap_size;
+
+  if (head_max_width > 0)
+    {
+      head.end = before.start;
+      SKIP_WHITE_BACKWARDS (head.end, buffer_start);
+
+      head.start = left_field_start;
+      while (head.start + head_max_width < head.end)
+        SKIP_SOMETHING (head.start, head.end);
+
+      if (head.end > head.start)
+        {
+          before_truncation = false;
+          head_truncation = (truncation_string
+                             && head.start > left_context_start);
+        }
+      else
+        head_truncation = false;
+
+      SKIP_WHITE (head.start, head.end);
+    }
+  else
+    {
+
+      /* No place left for a head field.  */
+
+      head.start = NULL;
+      head.end = NULL;
+      head_truncation = false;
+    }
+
+  if (auto_reference)
+    {
+
+      /* Construct the reference text in preallocated space from the file
+         name and the line number.  Standard input yields an empty file name.
+         Ensure line numbers are 1 based, even if they are computed 0 based.  */
+
+      file_name = input_file_name[occurs->file_index];
+      if (!file_name)
+        file_name = "";
+
+      line_ordinal = occurs->reference + 1;
+      if (occurs->file_index > 0)
+        line_ordinal -= file_line_count[occurs->file_index - 1];
+
+      char *file_end = stpcpy (reference.start, file_name);
+      reference.end = file_end + sprintf (file_end, ":%"PRIdMAX, line_ordinal);
+    }
+  else if (input_reference)
+    {
+
+      /* Reference starts at saved position for reference and extends right
+         until some white space is met.  */
+
+      reference.start = keyafter.start + occurs->reference;
+      reference.end = reference.start;
+      SKIP_NON_WHITE (reference.end, right_context_end);
+    }
+}
+
+/* Formatting and actual output - control routines.  */
+
+/*----------------------------------------------------------------------.
+| Output the current output fields as one line for 'troff' or 'nroff'.  |
+`----------------------------------------------------------------------*/
+
+static void
+output_one_roff_line (void)
+{
+  /* Output the 'tail' field.  */
+
+  printf (".%s \"", macro_name);
+  print_field (tail);
+  if (tail_truncation)
+    fputs (truncation_string, stdout);
+  putchar ('"');
+
+  /* Output the 'before' field.  */
+
+  fputs (" \"", stdout);
+  if (before_truncation)
+    fputs (truncation_string, stdout);
+  print_field (before);
+  putchar ('"');
+
+  /* Output the 'keyafter' field.  */
+
+  fputs (" \"", stdout);
+  print_field (keyafter);
+  if (keyafter_truncation)
+    fputs (truncation_string, stdout);
+  putchar ('"');
+
+  /* Output the 'head' field.  */
+
+  fputs (" \"", stdout);
+  if (head_truncation)
+    fputs (truncation_string, stdout);
+  print_field (head);
+  putchar ('"');
+
+  /* Conditionally output the 'reference' field.  */
+
+  if (auto_reference || input_reference)
+    {
+      fputs (" \"", stdout);
+      print_field (reference);
+      putchar ('"');
+    }
+
+  putchar ('\n');
+}
+
+/*---------------------------------------------------------.
+| Output the current output fields as one line for 'TeX'.  |
+`---------------------------------------------------------*/
+
+static void
+output_one_tex_line (void)
+{
+  BLOCK key;			/* key field, isolated */
+  BLOCK after;			/* after field, isolated */
+  char *cursor;			/* running cursor in source text */
+
+  printf ("\\%s ", macro_name);
+  putchar ('{');
+  print_field (tail);
+  fputs ("}{", stdout);
+  print_field (before);
+  fputs ("}{", stdout);
+  key.start = keyafter.start;
+  after.end = keyafter.end;
+  cursor = keyafter.start;
+  SKIP_SOMETHING (cursor, keyafter.end);
+  key.end = cursor;
+  after.start = cursor;
+  print_field (key);
+  fputs ("}{", stdout);
+  print_field (after);
+  fputs ("}{", stdout);
+  print_field (head);
+  putchar ('}');
+  if (auto_reference || input_reference)
+    {
+      putchar ('{');
+      print_field (reference);
+      putchar ('}');
+    }
+  putchar ('\n');
+}
+
+/*-------------------------------------------------------------------.
+| Output the current output fields as one line for a dumb terminal.  |
+`-------------------------------------------------------------------*/
+
+static void
+output_one_dumb_line (void)
+{
+  if (!right_reference)
+    {
+      if (auto_reference)
+        {
+
+          /* Output the 'reference' field, in such a way that GNU emacs
+             next-error will handle it.  The ending colon is taken from the
+             gap which follows.  */
+
+          print_field (reference);
+          putchar (':');
+          print_spaces (reference_max_width
+                        + gap_size
+                        - (reference.end - reference.start)
+                        - 1);
+        }
+      else
+        {
+
+          /* Output the 'reference' field and its following gap.  */
+
+          print_field (reference);
+          print_spaces (reference_max_width
+                        + gap_size
+                        - (reference.end - reference.start));
+        }
+    }
+
+  if (tail.start < tail.end)
+    {
+      /* Output the 'tail' field.  */
+
+      print_field (tail);
+      if (tail_truncation)
+        fputs (truncation_string, stdout);
+
+      print_spaces (half_line_width - gap_size
+                    - (before.end - before.start)
+                    - (before_truncation ? truncation_string_length : 0)
+                    - (tail.end - tail.start)
+                    - (tail_truncation ? truncation_string_length : 0));
+    }
+  else
+    print_spaces (half_line_width - gap_size
+                  - (before.end - before.start)
+                  - (before_truncation ? truncation_string_length : 0));
+
+  /* Output the 'before' field.  */
+
+  if (before_truncation)
+    fputs (truncation_string, stdout);
+  print_field (before);
+
+  print_spaces (gap_size);
+
+  /* Output the 'keyafter' field.  */
+
+  print_field (keyafter);
+  if (keyafter_truncation)
+    fputs (truncation_string, stdout);
+
+  if (head.start < head.end)
+    {
+      /* Output the 'head' field.  */
+
+      print_spaces (half_line_width
+                    - (keyafter.end - keyafter.start)
+                    - (keyafter_truncation ? truncation_string_length : 0)
+                    - (head.end - head.start)
+                    - (head_truncation ? truncation_string_length : 0));
+      if (head_truncation)
+        fputs (truncation_string, stdout);
+      print_field (head);
+    }
+  else
+
+    if ((auto_reference || input_reference) && right_reference)
+      print_spaces (half_line_width
+                    - (keyafter.end - keyafter.start)
+                    - (keyafter_truncation ? truncation_string_length : 0));
+
+  if ((auto_reference || input_reference) && right_reference)
+    {
+      /* Output the 'reference' field.  */
+
+      print_spaces (gap_size);
+      print_field (reference);
+    }
+
+  putchar ('\n');
+}
+
+/*------------------------------------------------------------------------.
+| Scan the whole occurs table and, for each entry, output one line in the |
+| appropriate format.							  |
+`------------------------------------------------------------------------*/
+
+static void
+generate_all_output (void)
+{
+  ptrdiff_t occurs_index;	/* index of keyword entry being processed */
+  OCCURS *occurs_cursor;	/* current keyword entry being processed */
+
+  /* The following assignments are useful to provide default values in case
+     line contexts or references are not used, in which case these variables
+     would never be computed.  */
+
+  tail.start = NULL;
+  tail.end = NULL;
+  tail_truncation = false;
+
+  head.start = NULL;
+  head.end = NULL;
+  head_truncation = false;
+
+  /* Loop over all keyword occurrences.  */
+
+  occurs_cursor = occurs_table[0];
+
+  for (occurs_index = 0; occurs_index < number_of_occurs[0]; occurs_index++)
+    {
+      /* Compute the exact size of every field and whenever truncation flags
+         are present or not.  */
+
+      define_all_fields (occurs_cursor);
+
+      /* Produce one output line according to selected format.  */
+
+      switch (output_format)
+        {
+        case UNKNOWN_FORMAT:
+          /* Should never happen.  */
+
+        case DUMB_FORMAT:
+          output_one_dumb_line ();
+          break;
+
+        case ROFF_FORMAT:
+          output_one_roff_line ();
+          break;
+
+        case TEX_FORMAT:
+          output_one_tex_line ();
+          break;
+        }
+
+      /* Advance the cursor into the occurs table.  */
+
+      occurs_cursor++;
+    }
+}
+
+/* Option decoding and main program.  */
+
+/*------------------------------------------------------.
+| Print program identification and options, then exit.  |
+`------------------------------------------------------*/
+
+void
+usage (int status)
+{
+  if (status != EXIT_SUCCESS)
+    emit_try_help ();
+  else
+    {
+      printf (_("\
+Usage: %s [OPTION]... [INPUT]...   (without -G)\n\
+  or:  %s -G [OPTION]... [INPUT [OUTPUT]]\n"),
+              program_name, program_name);
+      fputs (_("\
+Output a permuted index, including context, of the words in the input files.\n\
+"), stdout);
+
+      emit_stdin_note ();
+      emit_mandatory_arg_note ();
+
+      fputs (_("\
+  -A, --auto-reference           output automatically generated references\n\
+  -G, --traditional              behave more like System V 'ptx'\n\
+"), stdout);
+      fputs (_("\
+  -F, --flag-truncation=STRING   use STRING for flagging line truncations.\n\
+                                 The default is '/'\n\
+"), stdout);
+      fputs (_("\
+  -M, --macro-name=STRING        macro name to use instead of 'xx'\n\
+  -O, --format=roff              generate output as roff directives\n\
+  -R, --right-side-refs          put references at right, not counted in -w\n\
+  -S, --sentence-regexp=REGEXP   for end of lines or end of sentences\n\
+  -T, --format=tex               generate output as TeX directives\n\
+"), stdout);
+      fputs (_("\
+  -W, --word-regexp=REGEXP       use REGEXP to match each keyword\n\
+  -b, --break-file=FILE          word break characters in this FILE\n\
+  -f, --ignore-case              fold lower case to upper case for sorting\n\
+  -g, --gap-size=NUMBER          gap size in columns between output fields\n\
+  -i, --ignore-file=FILE         read ignore word list from FILE\n\
+  -o, --only-file=FILE           read only word list from this FILE\n\
+"), stdout);
+      fputs (_("\
+  -r, --references               first field of each line is a reference\n\
+  -t, --typeset-mode               - not implemented -\n\
+  -w, --width=NUMBER             output width in columns, reference excluded\n\
+"), stdout);
+      fputs (HELP_OPTION_DESCRIPTION, stdout);
+      fputs (VERSION_OPTION_DESCRIPTION, stdout);
+      emit_ancillary_info (PROGRAM_NAME);
+    }
+  exit (status);
+}
+
+/*----------------------------------------------------------------------.
+| Main program.  Decode ARGC arguments passed through the ARGV array of |
+| strings, then launch execution.				        |
+`----------------------------------------------------------------------*/
+
+/* Long options equivalences.  */
+static struct option const long_options[] =
+{
+  {"auto-reference", no_argument, NULL, 'A'},
+  {"break-file", required_argument, NULL, 'b'},
+  {"flag-truncation", required_argument, NULL, 'F'},
+  {"ignore-case", no_argument, NULL, 'f'},
+  {"gap-size", required_argument, NULL, 'g'},
+  {"ignore-file", required_argument, NULL, 'i'},
+  {"macro-name", required_argument, NULL, 'M'},
+  {"only-file", required_argument, NULL, 'o'},
+  {"references", no_argument, NULL, 'r'},
+  {"right-side-refs", no_argument, NULL, 'R'},
+  {"format", required_argument, NULL, 10},
+  {"sentence-regexp", required_argument, NULL, 'S'},
+  {"traditional", no_argument, NULL, 'G'},
+  {"typeset-mode", no_argument, NULL, 't'},
+  {"width", required_argument, NULL, 'w'},
+  {"word-regexp", required_argument, NULL, 'W'},
+  {GETOPT_HELP_OPTION_DECL},
+  {GETOPT_VERSION_OPTION_DECL},
+  {NULL, 0, NULL, 0},
+};
+
+static char const *const format_args[] =
+{
+  "roff", "tex", NULL
+};
+
+static enum Format const format_vals[] =
+{
+  ROFF_FORMAT, TEX_FORMAT
+};
+
+int
+main (int argc, char **argv)
+{
+  int optchar;			/* argument character */
+  int file_index;		/* index in text input file arrays */
+
+  /* Decode program options.  */
+
+  initialize_main (&argc, &argv);
+  set_program_name (argv[0]);
+  setlocale (LC_ALL, "");
+  bindtextdomain (PACKAGE, LOCALEDIR);
+  textdomain (PACKAGE);
+
+  atexit (close_stdout);
+
+#if HAVE_SETCHRCLASS
+  setchrclass (NULL);
+#endif
+
+  while (optchar = getopt_long (argc, argv, "AF:GM:ORS:TW:b:i:fg:o:trw:",
+                                long_options, NULL),
+         optchar != EOF)
+    {
+      switch (optchar)
+        {
+        default:
+          usage (EXIT_FAILURE);
+
+        case 'G':
+          gnu_extensions = false;
+          break;
+
+        case 'b':
+          break_file = optarg;
+          break;
+
+        case 'f':
+          ignore_case = true;
+          break;
+
+        case 'g':
+          {
+            intmax_t tmp;
+            if (! (xstrtoimax (optarg, NULL, 0, &tmp, "") == LONGINT_OK
+                   && 0 < tmp && tmp <= PTRDIFF_MAX))
+              die (EXIT_FAILURE, 0, _("invalid gap width: %s"),
+                   quote (optarg));
+            gap_size = tmp;
+            break;
+          }
+
+        case 'i':
+          ignore_file = optarg;
+          break;
+
+        case 'o':
+          only_file = optarg;
+          break;
+
+        case 'r':
+          input_reference = true;
+          break;
+
+        case 't':
+          /* Yet to understand...  */
+          break;
+
+        case 'w':
+          {
+            intmax_t tmp;
+            if (! (xstrtoimax (optarg, NULL, 0, &tmp, "") == LONGINT_OK
+                   && 0 < tmp && tmp <= PTRDIFF_MAX))
+              die (EXIT_FAILURE, 0, _("invalid line width: %s"),
+                   quote (optarg));
+            line_width = tmp;
+            break;
+          }
+
+        case 'A':
+          auto_reference = true;
+          break;
+
+        case 'F':
+          truncation_string = optarg;
+          unescape_string (optarg);
+          break;
+
+        case 'M':
+          macro_name = optarg;
+          break;
+
+        case 'O':
+          output_format = ROFF_FORMAT;
+          break;
+
+        case 'R':
+          right_reference = true;
+          break;
+
+        case 'S':
+          context_regex.string = optarg;
+          unescape_string (optarg);
+          break;
+
+        case 'T':
+          output_format = TEX_FORMAT;
+          break;
+
+        case 'W':
+          word_regex.string = optarg;
+          unescape_string (optarg);
+          if (!*word_regex.string)
+            word_regex.string = NULL;
+          break;
+
+        case 10:
+          output_format = XARGMATCH ("--format", optarg,
+                                     format_args, format_vals);
+          break;
+
+        case_GETOPT_HELP_CHAR;
+
+        case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
+        }
+    }
+
+  /* Process remaining arguments.  If GNU extensions are enabled, process
+     all arguments as input parameters.  If disabled, accept at most two
+     arguments, the second of which is an output parameter.  */
+
+  if (optind == argc)
+    {
+
+      /* No more argument simply means: read standard input.  */
+
+      input_file_name = xmalloc (sizeof *input_file_name);
+      file_line_count = xmalloc (sizeof *file_line_count);
+      text_buffers =    xmalloc (sizeof *text_buffers);
+      number_input_files = 1;
+      input_file_name[0] = NULL;
+    }
+  else if (gnu_extensions)
+    {
+      number_input_files = argc - optind;
+      input_file_name = xnmalloc (number_input_files, sizeof *input_file_name);
+      file_line_count = xnmalloc (number_input_files, sizeof *file_line_count);
+      text_buffers    = xnmalloc (number_input_files, sizeof *text_buffers);
+
+      for (file_index = 0; file_index < number_input_files; file_index++)
+        {
+          if (!*argv[optind] || STREQ (argv[optind], "-"))
+            input_file_name[file_index] = NULL;
+          else
+            input_file_name[file_index] = argv[optind];
+          optind++;
+        }
+    }
+  else
+    {
+
+      /* There is one necessary input file.  */
+
+      number_input_files = 1;
+      input_file_name = xmalloc (sizeof *input_file_name);
+      file_line_count = xmalloc (sizeof *file_line_count);
+      text_buffers    = xmalloc (sizeof *text_buffers);
+      if (!*argv[optind] || STREQ (argv[optind], "-"))
+        input_file_name[0] = NULL;
+      else
+        input_file_name[0] = argv[optind];
+      optind++;
+
+      /* Redirect standard output, only if requested.  */
+
+      if (optind < argc)
+        {
+          if (! freopen (argv[optind], "w", stdout))
+            die (EXIT_FAILURE, errno, "%s", quotef (argv[optind]));
+          optind++;
+        }
+
+      /* Diagnose any other argument as an error.  */
+
+      if (optind < argc)
+        {
+          error (0, 0, _("extra operand %s"), quote (argv[optind]));
+          usage (EXIT_FAILURE);
+        }
+    }
+
+  /* If the output format has not been explicitly selected, choose dumb
+     terminal format if GNU extensions are enabled, else 'roff' format.  */
+
+  if (output_format == UNKNOWN_FORMAT)
+    output_format = gnu_extensions ? DUMB_FORMAT : ROFF_FORMAT;
+
+  /* Initialize the main tables.  */
+
+  initialize_regex ();
+
+  /* Read 'Break character' file, if any.  */
+
+  if (break_file)
+    digest_break_file (break_file);
+
+  /* Read 'Ignore words' file and 'Only words' files, if any.  If any of
+     these files is empty, reset the name of the file to NULL, to avoid
+     unnecessary calls to search_table. */
+
+  if (ignore_file)
+    {
+      digest_word_file (ignore_file, &ignore_table);
+      if (ignore_table.length == 0)
+        ignore_file = NULL;
+    }
+
+  if (only_file)
+    {
+      digest_word_file (only_file, &only_table);
+      if (only_table.length == 0)
+        only_file = NULL;
+    }
+
+  /* Prepare to study all the input files.  */
+
+  number_of_occurs[0] = 0;
+  total_line_count = 0;
+  maximum_word_length = 0;
+  reference_max_width = 0;
+
+  for (file_index = 0; file_index < number_input_files; file_index++)
+    {
+      BLOCK *text_buffer = text_buffers + file_index;
+
+      /* Read the file contents into memory, then study it.  */
+
+      swallow_file_in_memory (input_file_name[file_index], text_buffer);
+      find_occurs_in_text (file_index);
+
+      /* Maintain for each file how many lines has been read so far when its
+         end is reached.  Incrementing the count first is a simple kludge to
+         handle a possible incomplete line at end of file.  */
+
+      total_line_count++;
+      file_line_count[file_index] = total_line_count;
+    }
+
+  /* Do the output process phase.  */
+
+  sort_found_occurs ();
+  fix_output_parameters ();
+  generate_all_output ();
+
+  /* All done.  */
+
+  return EXIT_SUCCESS;
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 16:11:47 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 16:11:47 +0000
commit	758f820bcc0f68aeebac1717e537ca13a320b909 (patch)
tree	48111ece75cf4f98316848b37a7e26356e00669e /src/ptx.c
parent	Initial commit. (diff)
download	coreutils-758f820bcc0f68aeebac1717e537ca13a320b909.tar.xz coreutils-758f820bcc0f68aeebac1717e537ca13a320b909.zip