16 files changed, 10194 insertions, 0 deletions
diff --git a/src/preproc/refer/TODO b/src/preproc/refer/TODO
new file mode 100644
index 0000000..36f4508
--- /dev/null
+++ b/src/preproc/refer/TODO
@@ -0,0 +1,124 @@
+inline references
+
+Some sort of macro/subroutine that can cover several references.
+
+move-punctuation should ignore multiple punctuation characters.
+
+Make the index files machine independent.
+
+Allow search keys to be negated (with !) to indicate that the
+reference should not contain the key.  Ignore negated keys during
+indexed searching.
+
+Provide an option with lkbib and lookbib that prints the location
+(filename, position) of each reference.  Need to map filename_id's
+back to filenames.
+
+Rename join-authors to join-fields.  Have a separate label-join-fields
+command used by @ and #.
+
+Have some sort of quantifier: e.g., $.n#A means execute '$.n' for each
+instance of an A field, setting $ to that field, and then join the
+results using the join-authors command.
+
+no-text-in-bracket command which says not to allow post_text and
+pre_text when the [] flags has been given. Useful for superscripted
+footnotes.
+
+Make it possible to translate - to \(en in page ranges.
+
+Trim eign a bit.
+
+In indexed searching discard all numeric keys except dates.
+
+Allow '\ ' to separate article from first word.
+
+%also
+
+Option automatically to supply [] flags in every reference.
+
+See if we can avoid requiring a comma before jr. and so on
+in find_last_name().
+
+Cache sortified authors in authors string during tentative evaluation of
+label specification.
+
+Possibly don't allow * and % expressions in the first part of ?:, | or
+& expressions.
+
+Handle better the case where <> occurs inside functions and in the
+first operand of ~. Or perhaps implement <> using some magic character
+in the string.
+
+Should special treatment be given to lines beginning with . in
+references?  (Unix refer seems to treat them like '%').
+
+Add global flag to control whether all files should be stat-ed after
+loading, and whether they should be stat-ed before each search.
+Perhaps make this dependent on the number of files there are.
+
+Option to truncate keys to truncate_len in linear searching.
+
+Allow multiple -f options in indxbib.
+
+In indxbib, possibly store common words rather than common words
+filename.  In this case store only words that are actually present in
+the file.
+
+Perhaps we should put out an obnoxious copyright message when lookbib
+starts up.
+
+Provide an option that writes a file containing just the references
+actually used.  Useful if you want to distribute a document.
+
+Have a magic token such that
+%A <sort stuff><magic token><print stuff>
+will print as though it were
+%A <print stuff>
+but sort as though it were
+%A <sort stuff>
+Do we need this if we can specify author alternatives for sorting?
+No, provided we have separate alternatives for @.
+
+In consider_authors when last names are ambiguous we might be able to
+use just the first name and not Jr. bit. Or we might be able to
+abbreviate the author.
+
+It ought to be possible to specify an alternative field to sort on
+instead of date. (ie if there's a field giving the type of document --
+these references should sort after any years)
+
+Provide a way to execute a command using a command-line option.
+
+Option to set the label-spec as a command-line option (-L).
+
+Command to specify which fields can occur multiple times:
+multiple AE
+
+Command to specify how various fields sort:
+aort-as-name A
+sort-as-date D
+sort-as-title T
+sort-as-other O
+
+Command to specify which fields are author fields:
+# if we don't have A use field Q
+author-fields AQ
+
+Commands to set properties of tokens.
+sortify-token \(ae ae
+uppercase-token \[ae] \[AE]
+
+Command to set the names of months:
+months january february march april may ...
+
+Perhaps provide some sort of macro capability:
+# perhaps a macro capability
+defmacro foo
+annotation-field $1
+endef
+
+Command to control strings used in capitalization
+capitalize-start \s+2
+capitalize-end \s-2
+(perhaps make these arguments to the capitalize command.)
diff --git a/src/preproc/refer/command.cpp b/src/preproc/refer/command.cpp
new file mode 100644
index 0000000..b49e2be
--- /dev/null
+++ b/src/preproc/refer/command.cpp
@@ -0,0 +1,814 @@
+/* Copyright (C) 1989-2020 Free Software Foundation, Inc.
+     Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include "refer.h"
+#include "refid.h"
+#include "search.h"
+#include "command.h"
+
+cset cs_field_name = csalpha;
+
+class input_item {
+  input_item *next;
+  char *filename;
+  int first_lineno;
+  string buffer;
+  const char *ptr;
+  const char *end;
+public:
+  input_item(string &, const char *, int = 1);
+  ~input_item();
+  int get_char();
+  int peek_char();
+  void skip_char();
+  void get_location(const char **, int *);
+
+  friend class input_stack;
+};
+
+input_item::input_item(string &s, const char *fn, int ln)
+: filename(strsave(fn)), first_lineno(ln)
+{
+  buffer.move(s);
+  ptr = buffer.contents();
+  end = ptr + buffer.length();
+}
+
+input_item::~input_item()
+{
+  delete[] filename;
+}
+
+inline int input_item::peek_char()
+{
+  if (ptr >= end)
+    return EOF;
+  else
+    return (unsigned char)*ptr;
+}
+
+inline int input_item::get_char()
+{
+  if (ptr >= end)
+    return EOF;
+  else
+    return (unsigned char)*ptr++;
+}
+
+inline void input_item::skip_char()
+{
+  ptr++;
+}
+
+void input_item::get_location(const char **filenamep, int *linenop)
+{
+  *filenamep = filename;
+  if (ptr == buffer.contents())
+    *linenop = first_lineno;
+  else {
+    int ln = first_lineno;
+    const char *e = ptr - 1;
+    for (const char *p = buffer.contents(); p < e; p++)
+      if (*p == '\n')
+	ln++;
+    ln--; // Back up to identify line number _before_ last seen newline.
+    *linenop = ln;
+  }
+  return;
+}
+
+class input_stack {
+  static input_item *top;
+public:
+  static void init();
+  static int get_char();
+  static int peek_char();
+  static void skip_char() { top->skip_char(); }
+  static void push_file(const char *);
+  static void push_string(string &, const char *, int);
+  static void error(const char *format,
+		    const errarg &arg1 = empty_errarg,
+		    const errarg &arg2 = empty_errarg,
+		    const errarg &arg3 = empty_errarg);
+};
+
+input_item *input_stack::top = 0;
+
+void input_stack::init()
+{
+  while (top) {
+    input_item *tem = top;
+    top = top->next;
+    delete tem;
+  }
+}
+
+int input_stack::get_char()
+{
+  while (top) {
+    int c = top->get_char();
+    if (c >= 0)
+      return c;
+    input_item *tem = top;
+    top = top->next;
+    delete tem;
+  }
+  return -1;
+}
+
+int input_stack::peek_char()
+{
+  while (top) {
+    int c = top->peek_char();
+    if (c >= 0)
+      return c;
+    input_item *tem = top;
+    top = top->next;
+    delete tem;
+  }
+  return -1;
+}
+
+void input_stack::push_file(const char *fn)
+{
+  FILE *fp;
+  if (strcmp(fn, "-") == 0) {
+    fp = stdin;
+    fn = "<standard input>";
+  }
+  else {
+    errno = 0;
+    fp = fopen(fn, "r");
+    if (fp == 0) {
+      error("can't open '%1': %2", fn, strerror(errno));
+      return;
+    }
+  }
+  string buf;
+  bool is_at_beginning_of_line = true;
+  int lineno = 1;
+  for (;;) {
+    int c = getc(fp);
+    if (is_at_beginning_of_line && c == '.') {
+      // replace lines beginning with .R1 or .R2 with a blank line
+      c = getc(fp);
+      if (c == 'R') {
+	c = getc(fp);
+	if (c == '1' || c == '2') {
+	  int cc = c;
+	  c = getc(fp);
+	  if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
+	    while (c != '\n' && c != EOF)
+	      c = getc(fp);
+	  }
+	  else {
+	    buf += '.';
+	    buf += 'R';
+	    buf += cc;
+	  }
+	}
+	else {
+	  buf += '.';
+	  buf += 'R';
+	}
+      }
+      else
+	buf += '.';
+    }
+    if (c == EOF)
+      break;
+    if (is_invalid_input_char(c))
+      error_with_file_and_line(fn, lineno,
+			       "invalid input character code %1", c);
+    else {
+      buf += c;
+      if (c == '\n') {
+	is_at_beginning_of_line = true;
+	lineno++;
+      }
+      else
+	is_at_beginning_of_line = false;
+    }
+  }
+  if (fp != stdin)
+    fclose(fp);
+  if (buf.length() > 0 && buf[buf.length() - 1] != '\n')
+    buf += '\n';
+  input_item *it = new input_item(buf, fn);
+  it->next = top;
+  top = it;
+}
+
+void input_stack::push_string(string &s, const char *filename, int lineno)
+{
+  input_item *it = new input_item(s, filename, lineno);
+  it->next = top;
+  top = it;
+}
+
+void input_stack::error(const char *format, const errarg &arg1,
+			const errarg &arg2, const errarg &arg3)
+{
+  const char *filename;
+  int lineno;
+  for (input_item *it = top; it; it = it->next) {
+    it->get_location(&filename, &lineno);
+    error_with_file_and_line(filename, lineno, format, arg1, arg2, arg3);
+    return;
+  }
+  ::error(format, arg1, arg2, arg3);
+}
+
+void command_error(const char *format, const errarg &arg1,
+		   const errarg &arg2, const errarg &arg3)
+{
+  input_stack::error(format, arg1, arg2, arg3);
+}
+
+// # not recognized in ""
+// \<newline> is recognized in ""
+// # does not conceal newline
+// if missing closing quote, word extends to end of line
+// no special treatment of \ other than before newline
+// \<newline> not recognized after #
+// ; allowed as alternative to newline
+// ; not recognized in ""
+// don't clear word_buffer; just append on
+// return -1 for EOF, 0 for newline, 1 for word
+
+int get_word(string &word_buffer)
+{
+  int c = input_stack::get_char();
+  for (;;) {
+    if (c == '#') {
+      do {
+	c = input_stack::get_char();
+      } while (c != '\n' && c != EOF);
+      break;
+    }
+    if (c == '\\' && input_stack::peek_char() == '\n')
+      input_stack::skip_char();
+    else if (c != ' ' && c != '\t')
+      break;
+    c = input_stack::get_char();
+  }
+  if (c == EOF)
+    return -1;
+  if (c == '\n' || c == ';')
+    return 0;
+  if (c == '"') {
+    for (;;) {
+      c = input_stack::peek_char();
+      if (c == EOF || c == '\n')
+	break;
+      input_stack::skip_char();
+      if (c == '"') {
+	int d = input_stack::peek_char();
+	if (d == '"')
+	  input_stack::skip_char();
+	else
+	  break;
+      }
+      else if (c == '\\') {
+	int d = input_stack::peek_char();
+	if (d == '\n')
+	  input_stack::skip_char();
+	else
+	  word_buffer += '\\';
+      }
+      else
+	word_buffer += c;
+    }
+    return 1;
+  }
+  word_buffer += c;
+  for (;;) {
+    c = input_stack::peek_char();
+    if (c == ' ' || c == '\t' || c == '\n' || c == '#' || c == ';')
+      break;
+    input_stack::skip_char();
+    if (c == '\\') {
+      int d = input_stack::peek_char();
+      if (d == '\n')
+	input_stack::skip_char();
+      else
+	word_buffer += '\\';
+    }
+    else
+      word_buffer += c;
+  }
+  return 1;
+}
+
+union argument {
+  const char *s;
+  int n;
+};
+
+// This is for debugging.
+
+static void echo_command(int argc, argument *argv)
+{
+  for (int i = 0; i < argc; i++)
+    fprintf(stderr, "%s\n", argv[i].s);
+}
+
+static void include_command(int argc, argument *argv)
+{
+  assert(argc == 1);
+  input_stack::push_file(argv[0].s);
+}
+
+static void capitalize_command(int argc, argument *argv)
+{
+  if (argc > 0)
+    capitalize_fields = argv[0].s;
+  else
+    capitalize_fields.clear();
+}
+
+static void accumulate_command(int, argument *)
+{
+  accumulate = 1;
+}
+
+static void no_accumulate_command(int, argument *)
+{
+  accumulate = 0;
+}
+
+static void move_punctuation_command(int, argument *)
+{
+  move_punctuation = 1;
+}
+
+static void no_move_punctuation_command(int, argument *)
+{
+  move_punctuation = 0;
+}
+
+static void sort_command(int argc, argument *argv)
+{
+  if (argc == 0)
+    sort_fields = "AD";
+  else
+    sort_fields = argv[0].s;
+  accumulate = 1;
+}
+
+static void no_sort_command(int, argument *)
+{
+  sort_fields.clear();
+}
+
+static void articles_command(int argc, argument *argv)
+{
+  articles.clear();
+  int i;
+  for (i = 0; i < argc; i++) {
+    articles += argv[i].s;
+    articles += '\0';
+  }
+  int len = articles.length();
+  for (i = 0; i < len; i++)
+    articles[i] = cmlower(articles[i]);
+}
+
+static void database_command(int argc, argument *argv)
+{
+  for (int i = 0; i < argc; i++)
+    database_list.add_file(argv[i].s);
+}
+
+static void default_database_command(int, argument *)
+{
+  search_default = 1;
+}
+
+static void no_default_database_command(int, argument *)
+{
+  search_default = 0;
+}
+
+static void bibliography_command(int argc, argument *argv)
+{
+  have_bibliography = 1;
+  const char *saved_filename = current_filename;
+  int saved_lineno = current_lineno;
+  int saved_label_in_text = label_in_text;
+  label_in_text = 0;
+  if (!accumulate)
+    fputs(".]<\n", stdout);
+  for (int i = 0; i < argc; i++)
+    do_bib(argv[i].s);
+  if (accumulate)
+    output_references();
+  else
+    fputs(".]>\n", stdout);
+  current_filename = saved_filename;
+  current_lineno = saved_lineno;
+  label_in_text = saved_label_in_text;
+}
+
+static void annotate_command(int argc, argument *argv)
+{
+  if (argc > 0)
+    annotation_field = argv[0].s[0];
+  else
+    annotation_field = 'X';
+  if (argc == 2)
+    annotation_macro = argv[1].s;
+  else
+    annotation_macro = "AP";
+}
+
+static void no_annotate_command(int, argument *)
+{
+  annotation_macro.clear();
+  annotation_field = -1;
+}
+
+static void reverse_command(int, argument *argv)
+{
+  reverse_fields = argv[0].s;
+}
+
+static void no_reverse_command(int, argument *)
+{
+  reverse_fields.clear();
+}
+
+static void abbreviate_command(int argc, argument *argv)
+{
+  abbreviate_fields = argv[0].s;
+  period_before_initial = argc > 1 ? argv[1].s : ". ";
+  period_before_last_name = argc > 2 ? argv[2].s : ". ";
+  period_before_other = argc > 3 ? argv[3].s : ". ";
+  period_before_hyphen = argc > 4 ? argv[4].s : ".";
+}
+
+static void no_abbreviate_command(int, argument *)
+{
+  abbreviate_fields.clear();
+}
+
+string search_ignore_fields;
+
+static void search_ignore_command(int argc, argument *argv)
+{
+  if (argc > 0)
+    search_ignore_fields = argv[0].s;
+  else
+    search_ignore_fields = "XYZ";
+  search_ignore_fields += '\0';
+  linear_ignore_fields = search_ignore_fields.contents();
+}
+
+static void no_search_ignore_command(int, argument *)
+{
+  linear_ignore_fields = "";
+}
+
+static void search_truncate_command(int argc, argument *argv)
+{
+  if (argc > 0)
+    linear_truncate_len = argv[0].n;
+  else
+    linear_truncate_len = 6;
+}
+
+static void no_search_truncate_command(int, argument *)
+{
+  linear_truncate_len = -1;
+}
+
+static void discard_command(int argc, argument *argv)
+{
+  if (argc == 0)
+    discard_fields = "XYZ";
+  else
+    discard_fields = argv[0].s;
+  accumulate = 1;
+}
+
+static void no_discard_command(int, argument *)
+{
+  discard_fields.clear();
+}
+
+static void label_command(int, argument *argv)
+{
+  set_label_spec(argv[0].s);
+}
+
+static void abbreviate_label_ranges_command(int argc, argument *argv)
+{
+  abbreviate_label_ranges = 1;
+  label_range_indicator = argc > 0 ? argv[0].s : "-";
+}
+
+static void no_abbreviate_label_ranges_command(int, argument *)
+{
+  abbreviate_label_ranges = 0;
+}
+
+static void label_in_reference_command(int, argument *)
+{
+  label_in_reference = 1;
+}
+
+static void no_label_in_reference_command(int, argument *)
+{
+  label_in_reference = 0;
+}
+
+static void label_in_text_command(int, argument *)
+{
+  label_in_text = 1;
+}
+
+static void no_label_in_text_command(int, argument *)
+{
+  label_in_text = 0;
+}
+
+static void sort_adjacent_labels_command(int, argument *)
+{
+  sort_adjacent_labels = 1;
+}
+
+static void no_sort_adjacent_labels_command(int, argument *)
+{
+  sort_adjacent_labels = 0;
+}
+
+static void date_as_label_command(int argc, argument *argv)
+{
+  if (set_date_label_spec(argc > 0 ? argv[0].s : "D%a*"))
+    date_as_label = 1;
+}
+
+static void no_date_as_label_command(int, argument *)
+{
+  date_as_label = 0;
+}
+
+static void short_label_command(int, argument *argv)
+{
+  if (set_short_label_spec(argv[0].s))
+    short_label_flag = 1;
+}
+
+static void no_short_label_command(int, argument *)
+{
+  short_label_flag = 0;
+}
+
+static void compatible_command(int, argument *)
+{
+  compatible_flag = 1;
+}
+
+static void no_compatible_command(int, argument *)
+{
+  compatible_flag = 0;
+}
+
+static void join_authors_command(int argc, argument *argv)
+{
+  join_authors_exactly_two = argv[0].s;
+  join_authors_default = argc > 1 ? argv[1].s : argv[0].s;
+  join_authors_last_two = argc == 3 ? argv[2].s : argv[0].s;
+}
+
+static void bracket_label_command(int, argument *argv)
+{
+  pre_label = argv[0].s;
+  post_label = argv[1].s;
+  sep_label = argv[2].s;
+}
+
+static void separate_label_second_parts_command(int, argument *argv)
+{
+  separate_label_second_parts = argv[0].s;
+}
+
+static void et_al_command(int argc, argument *argv)
+{
+  et_al = argv[0].s;
+  et_al_min_elide = argv[1].n;
+  if (et_al_min_elide < 1)
+    et_al_min_elide = 1;
+  et_al_min_total = argc >= 3 ? argv[2].n : 0;
+}
+
+static void no_et_al_command(int, argument *)
+{
+  et_al.clear();
+  et_al_min_elide = 0;
+}
+
+typedef void (*command_t)(int, argument *);
+
+/* arg_types is a string describing the numbers and types of arguments.
+s means a string, i means an integer, f is a list of fields, F is
+a single field,
+? means that the previous argument is optional, * means that the
+previous argument can occur any number of times. */
+
+struct S {
+  const char *name;
+  command_t func;
+  const char *arg_types;
+} command_table[] = {
+  { "include", include_command, "s" },
+  { "echo", echo_command, "s*" },
+  { "capitalize", capitalize_command, "f?" },
+  { "accumulate", accumulate_command, "" },
+  { "no-accumulate", no_accumulate_command, "" },
+  { "move-punctuation", move_punctuation_command, "" },
+  { "no-move-punctuation", no_move_punctuation_command, "" },
+  { "sort", sort_command, "s?" },
+  { "no-sort", no_sort_command, "" },
+  { "articles", articles_command, "s*" },
+  { "database", database_command, "ss*" },
+  { "default-database", default_database_command, "" },
+  { "no-default-database", no_default_database_command, "" },
+  { "bibliography", bibliography_command, "ss*" },
+  { "annotate", annotate_command, "F?s?" },
+  { "no-annotate", no_annotate_command, "" },
+  { "reverse", reverse_command, "s" },
+  { "no-reverse", no_reverse_command, "" },
+  { "abbreviate", abbreviate_command, "ss?s?s?s?" },
+  { "no-abbreviate", no_abbreviate_command, "" },
+  { "search-ignore", search_ignore_command, "f?" },
+  { "no-search-ignore", no_search_ignore_command, "" },
+  { "search-truncate", search_truncate_command, "i?" },
+  { "no-search-truncate", no_search_truncate_command, "" },
+  { "discard", discard_command, "f?" },
+  { "no-discard", no_discard_command, "" },
+  { "label", label_command, "s" },
+  { "abbreviate-label-ranges", abbreviate_label_ranges_command, "s?" },
+  { "no-abbreviate-label-ranges", no_abbreviate_label_ranges_command, "" },
+  { "label-in-reference", label_in_reference_command, "" },
+  { "no-label-in-reference", no_label_in_reference_command, "" },
+  { "label-in-text", label_in_text_command, "" },
+  { "no-label-in-text", no_label_in_text_command, "" },
+  { "sort-adjacent-labels", sort_adjacent_labels_command, "" },
+  { "no-sort-adjacent-labels", no_sort_adjacent_labels_command, "" },
+  { "date-as-label", date_as_label_command, "s?" },
+  { "no-date-as-label", no_date_as_label_command, "" },
+  { "short-label", short_label_command, "s" },
+  { "no-short-label", no_short_label_command, "" },
+  { "compatible", compatible_command, "" },
+  { "no-compatible", no_compatible_command, "" },
+  { "join-authors", join_authors_command, "sss?" },
+  { "bracket-label", bracket_label_command, "sss" },
+  { "separate-label-second-parts", separate_label_second_parts_command, "s" },
+  { "et-al", et_al_command, "sii?" },
+  { "no-et-al", no_et_al_command, "" },
+};
+
+static int check_args(const char *types, const char *name,
+		      int argc, argument *argv)
+{
+  int argno = 0;
+  while (*types) {
+    if (argc == 0) {
+      if (types[1] == '?')
+	break;
+      else if (types[1] == '*') {
+	assert(types[2] == '\0');
+	break;
+      }
+      else {
+	input_stack::error("missing argument for command '%1'", name);
+	return 0;
+      }
+    }
+    switch (*types) {
+    case 's':
+      break;
+    case 'i':
+      {
+	char *ptr;
+	long n = strtol(argv->s, &ptr, 10);
+	if ((n == 0 && ptr == argv->s)
+	    || *ptr != '\0') {
+	  input_stack::error("argument %1 for command '%2' must be an integer",
+			     argno + 1, name);
+	  return 0;
+	}
+	argv->n = (int)n;
+	break;
+      }
+    case 'f':
+      {
+	for (const char *ptr = argv->s; *ptr != '\0'; ptr++)
+	  if (!cs_field_name(*ptr)) {
+	    input_stack::error("argument %1 for command '%2' must be a list of fields",
+			     argno + 1, name);
+	    return 0;
+	  }
+	break;
+      }
+    case 'F':
+      if (argv->s[0] == '\0' || argv->s[1] != '\0'
+	  || !cs_field_name(argv->s[0])) {
+	input_stack::error("argument %1 for command '%2' must be a field name",
+			   argno + 1, name);
+	return 0;
+      }
+      break;
+    default:
+      assert(0);
+    }
+    if (types[1] == '?')
+      types += 2;
+    else if (types[1] != '*')
+      types += 1;
+    --argc;
+    ++argv;
+    ++argno;
+  }
+  if (argc > 0) {
+    input_stack::error("too many arguments for command '%1'", name);
+    return 0;
+  }
+  return 1;
+}
+
+static void execute_command(const char *name, int argc, argument *argv)
+{
+  for (unsigned int i = 0;
+       i < sizeof(command_table)/sizeof(command_table[0]); i++)
+    if (strcmp(name, command_table[i].name) == 0) {
+      if (check_args(command_table[i].arg_types, name, argc, argv))
+	(*command_table[i].func)(argc, argv);
+      return;
+    }
+  input_stack::error("unknown command '%1'", name);
+}
+
+static void command_loop()
+{
+  string command;
+  for (;;) {
+    command.clear();
+    int res = get_word(command);
+    if (res != 1) {
+      if (res == 0)
+	continue;
+      break;
+    }
+    int argc = 0;
+    command += '\0';
+    while ((res = get_word(command)) == 1) {
+      argc++;
+      command += '\0';
+    }
+    argument *argv = new argument[argc];
+    const char *ptr = command.contents();
+    for (int i = 0; i < argc; i++)
+      argv[i].s = ptr = strchr(ptr, '\0') + 1;
+    execute_command(command.contents(), argc, argv);
+    delete[] argv;
+    if (res == -1)
+      break;
+  }
+}
+
+void process_commands(string &s, const char *file, int lineno)
+{
+  const char *saved_filename = current_filename;
+  int saved_lineno = current_lineno;
+  input_stack::init();
+  current_filename = file;
+  // Report diagnostics with respect to line _before_ last newline seen.
+  current_lineno = lineno - 1;
+  input_stack::push_string(s, file, lineno);
+  command_loop();
+  current_filename = saved_filename;
+  current_lineno = saved_lineno;
+}
+
+// Local Variables:
+// fill-column: 72
+// mode: C++
+// End:
+// vim: set cindent noexpandtab shiftwidth=2 textwidth=72:
diff --git a/src/preproc/refer/command.h b/src/preproc/refer/command.h
new file mode 100644
index 0000000..db850f4
--- /dev/null
+++ b/src/preproc/refer/command.h
@@ -0,0 +1,35 @@
+// -*- C++ -*-
+/* Copyright (C) 1989-2020 Free Software Foundation, Inc.
+     Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+void process_commands(string &s, const char *file, int lineno);
+
+extern int have_bibliography;
+extern int accumulate;
+extern int move_punctuation;
+extern int search_default;
+extern search_list database_list;
+extern int label_in_text;
+extern int label_in_reference;
+extern int sort_adjacent_labels;
+extern string pre_label;
+extern string post_label;
+extern string sep_label;
+
+extern void do_bib(const char *);
+extern void output_references();
diff --git a/src/preproc/refer/label.cpp b/src/preproc/refer/label.cpp
new file mode 100644
index 0000000..f8c1645
--- /dev/null
+++ b/src/preproc/refer/label.cpp
@@ -0,0 +1,2607 @@
+/* A Bison parser, made by GNU Bison 3.8.2.  */
+
+/* Bison implementation for Yacc-like parsers in C
+
+   Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2021 Free Software Foundation,
+   Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* As a special exception, you may create a larger work that contains
+   part or all of the Bison parser skeleton and distribute that work
+   under terms of your choice, so long as that work isn't itself a
+   parser generator using the skeleton or a modified version thereof
+   as a parser skeleton.  Alternatively, if you modify or redistribute
+   the parser skeleton itself, you may (at your option) remove this
+   special exception, which will cause the skeleton and the resulting
+   Bison output files to be licensed under the GNU General Public
+   License without this special exception.
+
+   This special exception was added by the Free Software Foundation in
+   version 2.2 of Bison.  */
+
+/* C LALR(1) parser skeleton written by Richard Stallman, by
+   simplifying the original so-called "semantic" parser.  */
+
+/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual,
+   especially those whose name start with YY_ or yy_.  They are
+   private implementation details that can be changed or removed.  */
+
+/* All symbols defined below should begin with yy or YY, to avoid
+   infringing on user name space.  This should be done even for local
+   variables, as they might otherwise be expanded by user macros.
+   There are some unavoidable exceptions within include files to
+   define necessary library symbols; they are noted "INFRINGES ON
+   USER NAME SPACE" below.  */
+
+/* Identify Bison output, and Bison version.  */
+#define YYBISON 30802
+
+/* Bison version string.  */
+#define YYBISON_VERSION "3.8.2"
+
+/* Skeleton name.  */
+#define YYSKELETON_NAME "yacc.c"
+
+/* Pure parsers.  */
+#define YYPURE 0
+
+/* Push parsers.  */
+#define YYPUSH 0
+
+/* Pull parsers.  */
+#define YYPULL 1
+
+
+
+
+/* First part of user prologue.  */
+#line 19 "../src/preproc/refer/label.ypp"
+
+
+#include "refer.h"
+#include "refid.h"
+#include "ref.h"
+#include "token.h"
+
+int yylex();
+void yyerror(const char *);
+
+static const char *format_serial(char c, int n);
+
+struct label_info {
+  int start;
+  int length;
+  int count;
+  int total;
+  label_info(const string &);
+};
+
+label_info *lookup_label(const string &label);
+
+struct expression {
+  enum {
+    // Does the tentative label depend on the reference?
+    CONTAINS_VARIABLE = 01, 
+    CONTAINS_STAR = 02,
+    CONTAINS_FORMAT = 04,
+    CONTAINS_AT = 010
+  };
+  virtual ~expression() { }
+  virtual void evaluate(int, const reference &, string &,
+			substring_position &) = 0;
+  virtual unsigned analyze() { return 0; }
+};
+
+class at_expr : public expression {
+public:
+  at_expr() { }
+  void evaluate(int, const reference &, string &, substring_position &);
+  unsigned analyze() { return CONTAINS_VARIABLE|CONTAINS_AT; }
+};
+
+class format_expr : public expression {
+  char type;
+  int width;
+  int first_number;
+public:
+  format_expr(char c, int w = 0, int f = 1)
+    : type(c), width(w), first_number(f) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+  unsigned analyze() { return CONTAINS_FORMAT; }
+};
+
+class field_expr : public expression {
+  int number;
+  char name;
+public:
+  field_expr(char nm, int num) : number(num), name(nm) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+  unsigned analyze() { return CONTAINS_VARIABLE; }
+};
+
+class literal_expr : public expression {
+  string s;
+public:
+  literal_expr(const char *ptr, int len) : s(ptr, len) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class unary_expr : public expression {
+protected:
+  expression *expr;
+public:
+  unary_expr(expression *e) : expr(e) { }
+  ~unary_expr() { delete expr; }
+  void evaluate(int, const reference &, string &, substring_position &) = 0;
+  unsigned analyze() { return expr ? expr->analyze() : 0; }
+};
+
+// This caches the analysis of an expression.
+
+class analyzed_expr : public unary_expr {
+  unsigned flags;
+public:
+  analyzed_expr(expression *);
+  void evaluate(int, const reference &, string &, substring_position &);
+  unsigned analyze() { return flags; }
+};
+
+class star_expr : public unary_expr {
+public:
+  star_expr(expression *e) : unary_expr(e) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+  unsigned analyze() {
+    return ((expr ? (expr->analyze() & ~CONTAINS_VARIABLE) : 0)
+	    | CONTAINS_STAR);
+  }
+};
+
+typedef void map_func(const char *, const char *, string &);
+
+class map_expr : public unary_expr {
+  map_func *func;
+public:
+  map_expr(expression *e, map_func *f) : unary_expr(e), func(f) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+};
+  
+typedef const char *extractor_func(const char *, const char *, const char **);
+
+class extractor_expr : public unary_expr {
+  int part;
+  extractor_func *func;
+public:
+  enum { BEFORE = +1, MATCH = 0, AFTER = -1 };
+  extractor_expr(expression *e, extractor_func *f, int pt)
+    : unary_expr(e), part(pt), func(f) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class truncate_expr : public unary_expr {
+  int n;
+public:
+  truncate_expr(expression *e, int i) : unary_expr(e), n(i) { } 
+  void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class separator_expr : public unary_expr {
+public:
+  separator_expr(expression *e) : unary_expr(e) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class binary_expr : public expression {
+protected:
+  expression *expr1;
+  expression *expr2;
+public:
+  binary_expr(expression *e1, expression *e2) : expr1(e1), expr2(e2) { }
+  ~binary_expr() { delete expr1; delete expr2; }
+  void evaluate(int, const reference &, string &, substring_position &) = 0;
+  unsigned analyze() {
+    return (expr1 ? expr1->analyze() : 0) | (expr2 ? expr2->analyze() : 0);
+  }
+};
+
+class alternative_expr : public binary_expr {
+public:
+  alternative_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class list_expr : public binary_expr {
+public:
+  list_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class substitute_expr : public binary_expr {
+public:
+  substitute_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class ternary_expr : public expression {
+protected:
+  expression *expr1;
+  expression *expr2;
+  expression *expr3;
+public:
+  ternary_expr(expression *e1, expression *e2, expression *e3)
+    : expr1(e1), expr2(e2), expr3(e3) { }
+  ~ternary_expr() { delete expr1; delete expr2; delete expr3; }
+  void evaluate(int, const reference &, string &, substring_position &) = 0;
+  unsigned analyze() {
+    return ((expr1 ? expr1->analyze() : 0)
+	    | (expr2 ? expr2->analyze() : 0)
+	    | (expr3 ? expr3->analyze() : 0));
+  }
+};
+
+class conditional_expr : public ternary_expr {
+public:
+  conditional_expr(expression *e1, expression *e2, expression *e3)
+    : ternary_expr(e1, e2, e3) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+};
+
+static expression *parsed_label = 0;
+static expression *parsed_date_label = 0;
+static expression *parsed_short_label = 0;
+
+static expression *parse_result;
+
+string literals;
+
+
+#line 270 "src/preproc/refer/label.cpp"
+
+# ifndef YY_CAST
+#  ifdef __cplusplus
+#   define YY_CAST(Type, Val) static_cast<Type> (Val)
+#   define YY_REINTERPRET_CAST(Type, Val) reinterpret_cast<Type> (Val)
+#  else
+#   define YY_CAST(Type, Val) ((Type) (Val))
+#   define YY_REINTERPRET_CAST(Type, Val) ((Type) (Val))
+#  endif
+# endif
+# ifndef YY_NULLPTR
+#  if defined __cplusplus
+#   if 201103L <= __cplusplus
+#    define YY_NULLPTR nullptr
+#   else
+#    define YY_NULLPTR 0
+#   endif
+#  else
+#   define YY_NULLPTR ((void*)0)
+#  endif
+# endif
+
+/* Use api.header.include to #include this header
+   instead of duplicating it here.  */
+#ifndef YY_YY_SRC_PREPROC_REFER_LABEL_HPP_INCLUDED
+# define YY_YY_SRC_PREPROC_REFER_LABEL_HPP_INCLUDED
+/* Debug traces.  */
+#ifndef YYDEBUG
+# define YYDEBUG 0
+#endif
+#if YYDEBUG
+extern int yydebug;
+#endif
+
+/* Token kinds.  */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+  enum yytokentype
+  {
+    YYEMPTY = -2,
+    YYEOF = 0,                     /* "end of file"  */
+    YYerror = 256,                 /* error  */
+    YYUNDEF = 257,                 /* "invalid token"  */
+    TOKEN_LETTER = 258,            /* TOKEN_LETTER  */
+    TOKEN_LITERAL = 259,           /* TOKEN_LITERAL  */
+    TOKEN_DIGIT = 260              /* TOKEN_DIGIT  */
+  };
+  typedef enum yytokentype yytoken_kind_t;
+#endif
+/* Token kinds.  */
+#define YYEMPTY -2
+#define YYEOF 0
+#define YYerror 256
+#define YYUNDEF 257
+#define TOKEN_LETTER 258
+#define TOKEN_LITERAL 259
+#define TOKEN_DIGIT 260
+
+/* Value type.  */
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+union YYSTYPE
+{
+#line 218 "../src/preproc/refer/label.ypp"
+
+  int num;
+  expression *expr;
+  struct { int ndigits; int val; } dig;
+  struct { int start; int len; } str;
+
+#line 340 "src/preproc/refer/label.cpp"
+
+};
+typedef union YYSTYPE YYSTYPE;
+# define YYSTYPE_IS_TRIVIAL 1
+# define YYSTYPE_IS_DECLARED 1
+#endif
+
+
+extern YYSTYPE yylval;
+
+
+int yyparse (void);
+
+
+#endif /* !YY_YY_SRC_PREPROC_REFER_LABEL_HPP_INCLUDED  */
+/* Symbol kind.  */
+enum yysymbol_kind_t
+{
+  YYSYMBOL_YYEMPTY = -2,
+  YYSYMBOL_YYEOF = 0,                      /* "end of file"  */
+  YYSYMBOL_YYerror = 1,                    /* error  */
+  YYSYMBOL_YYUNDEF = 2,                    /* "invalid token"  */
+  YYSYMBOL_TOKEN_LETTER = 3,               /* TOKEN_LETTER  */
+  YYSYMBOL_TOKEN_LITERAL = 4,              /* TOKEN_LITERAL  */
+  YYSYMBOL_TOKEN_DIGIT = 5,                /* TOKEN_DIGIT  */
+  YYSYMBOL_6_ = 6,                         /* '?'  */
+  YYSYMBOL_7_ = 7,                         /* ':'  */
+  YYSYMBOL_8_ = 8,                         /* '|'  */
+  YYSYMBOL_9_ = 9,                         /* '&'  */
+  YYSYMBOL_10_ = 10,                       /* '~'  */
+  YYSYMBOL_11_ = 11,                       /* '@'  */
+  YYSYMBOL_12_ = 12,                       /* '%'  */
+  YYSYMBOL_13_ = 13,                       /* '.'  */
+  YYSYMBOL_14_ = 14,                       /* '+'  */
+  YYSYMBOL_15_ = 15,                       /* '-'  */
+  YYSYMBOL_16_ = 16,                       /* '*'  */
+  YYSYMBOL_17_ = 17,                       /* '('  */
+  YYSYMBOL_18_ = 18,                       /* ')'  */
+  YYSYMBOL_19_ = 19,                       /* '<'  */
+  YYSYMBOL_20_ = 20,                       /* '>'  */
+  YYSYMBOL_YYACCEPT = 21,                  /* $accept  */
+  YYSYMBOL_expr = 22,                      /* expr  */
+  YYSYMBOL_conditional = 23,               /* conditional  */
+  YYSYMBOL_optional_conditional = 24,      /* optional_conditional  */
+  YYSYMBOL_alternative = 25,               /* alternative  */
+  YYSYMBOL_list = 26,                      /* list  */
+  YYSYMBOL_substitute = 27,                /* substitute  */
+  YYSYMBOL_string = 28,                    /* string  */
+  YYSYMBOL_optional_number = 29,           /* optional_number  */
+  YYSYMBOL_number = 30,                    /* number  */
+  YYSYMBOL_digits = 31,                    /* digits  */
+  YYSYMBOL_flag = 32                       /* flag  */
+};
+typedef enum yysymbol_kind_t yysymbol_kind_t;
+
+
+
+
+#ifdef short
+# undef short
+#endif
+
+/* On compilers that do not define __PTRDIFF_MAX__ etc., make sure
+   <limits.h> and (if available) <stdint.h> are included
+   so that the code can choose integer types of a good width.  */
+
+#ifndef __PTRDIFF_MAX__
+# include <limits.h> /* INFRINGES ON USER NAME SPACE */
+# if defined __STDC_VERSION__ && 199901 <= __STDC_VERSION__
+#  include <stdint.h> /* INFRINGES ON USER NAME SPACE */
+#  define YY_STDINT_H
+# endif
+#endif
+
+/* Narrow types that promote to a signed type and that can represent a
+   signed or unsigned integer of at least N bits.  In tables they can
+   save space and decrease cache pressure.  Promoting to a signed type
+   helps avoid bugs in integer arithmetic.  */
+
+#ifdef __INT_LEAST8_MAX__
+typedef __INT_LEAST8_TYPE__ yytype_int8;
+#elif defined YY_STDINT_H
+typedef int_least8_t yytype_int8;
+#else
+typedef signed char yytype_int8;
+#endif
+
+#ifdef __INT_LEAST16_MAX__
+typedef __INT_LEAST16_TYPE__ yytype_int16;
+#elif defined YY_STDINT_H
+typedef int_least16_t yytype_int16;
+#else
+typedef short yytype_int16;
+#endif
+
+/* Work around bug in HP-UX 11.23, which defines these macros
+   incorrectly for preprocessor constants.  This workaround can likely
+   be removed in 2023, as HPE has promised support for HP-UX 11.23
+   (aka HP-UX 11i v2) only through the end of 2022; see Table 2 of
+   <https://h20195.www2.hpe.com/V2/getpdf.aspx/4AA4-7673ENW.pdf>.  */
+#ifdef __hpux
+# undef UINT_LEAST8_MAX
+# undef UINT_LEAST16_MAX
+# define UINT_LEAST8_MAX 255
+# define UINT_LEAST16_MAX 65535
+#endif
+
+#if defined __UINT_LEAST8_MAX__ && __UINT_LEAST8_MAX__ <= __INT_MAX__
+typedef __UINT_LEAST8_TYPE__ yytype_uint8;
+#elif (!defined __UINT_LEAST8_MAX__ && defined YY_STDINT_H \
+       && UINT_LEAST8_MAX <= INT_MAX)
+typedef uint_least8_t yytype_uint8;
+#elif !defined __UINT_LEAST8_MAX__ && UCHAR_MAX <= INT_MAX
+typedef unsigned char yytype_uint8;
+#else
+typedef short yytype_uint8;
+#endif
+
+#if defined __UINT_LEAST16_MAX__ && __UINT_LEAST16_MAX__ <= __INT_MAX__
+typedef __UINT_LEAST16_TYPE__ yytype_uint16;
+#elif (!defined __UINT_LEAST16_MAX__ && defined YY_STDINT_H \
+       && UINT_LEAST16_MAX <= INT_MAX)
+typedef uint_least16_t yytype_uint16;
+#elif !defined __UINT_LEAST16_MAX__ && USHRT_MAX <= INT_MAX
+typedef unsigned short yytype_uint16;
+#else
+typedef int yytype_uint16;
+#endif
+
+#ifndef YYPTRDIFF_T
+# if defined __PTRDIFF_TYPE__ && defined __PTRDIFF_MAX__
+#  define YYPTRDIFF_T __PTRDIFF_TYPE__
+#  define YYPTRDIFF_MAXIMUM __PTRDIFF_MAX__
+# elif defined PTRDIFF_MAX
+#  ifndef ptrdiff_t
+#   include <stddef.h> /* INFRINGES ON USER NAME SPACE */
+#  endif
+#  define YYPTRDIFF_T ptrdiff_t
+#  define YYPTRDIFF_MAXIMUM PTRDIFF_MAX
+# else
+#  define YYPTRDIFF_T long
+#  define YYPTRDIFF_MAXIMUM LONG_MAX
+# endif
+#endif
+
+#ifndef YYSIZE_T
+# ifdef __SIZE_TYPE__
+#  define YYSIZE_T __SIZE_TYPE__
+# elif defined size_t
+#  define YYSIZE_T size_t
+# elif defined __STDC_VERSION__ && 199901 <= __STDC_VERSION__
+#  include <stddef.h> /* INFRINGES ON USER NAME SPACE */
+#  define YYSIZE_T size_t
+# else
+#  define YYSIZE_T unsigned
+# endif
+#endif
+
+#define YYSIZE_MAXIMUM                                  \
+  YY_CAST (YYPTRDIFF_T,                                 \
+           (YYPTRDIFF_MAXIMUM < YY_CAST (YYSIZE_T, -1)  \
+            ? YYPTRDIFF_MAXIMUM                         \
+            : YY_CAST (YYSIZE_T, -1)))
+
+#define YYSIZEOF(X) YY_CAST (YYPTRDIFF_T, sizeof (X))
+
+
+/* Stored state numbers (used for stacks). */
+typedef yytype_int8 yy_state_t;
+
+/* State numbers in computations.  */
+typedef int yy_state_fast_t;
+
+#ifndef YY_
+# if defined YYENABLE_NLS && YYENABLE_NLS
+#  if ENABLE_NLS
+#   include <libintl.h> /* INFRINGES ON USER NAME SPACE */
+#   define YY_(Msgid) dgettext ("bison-runtime", Msgid)
+#  endif
+# endif
+# ifndef YY_
+#  define YY_(Msgid) Msgid
+# endif
+#endif
+
+
+#ifndef YY_ATTRIBUTE_PURE
+# if defined __GNUC__ && 2 < __GNUC__ + (96 <= __GNUC_MINOR__)
+#  define YY_ATTRIBUTE_PURE __attribute__ ((__pure__))
+# else
+#  define YY_ATTRIBUTE_PURE
+# endif
+#endif
+
+#ifndef YY_ATTRIBUTE_UNUSED
+# if defined __GNUC__ && 2 < __GNUC__ + (7 <= __GNUC_MINOR__)
+#  define YY_ATTRIBUTE_UNUSED __attribute__ ((__unused__))
+# else
+#  define YY_ATTRIBUTE_UNUSED
+# endif
+#endif
+
+/* Suppress unused-variable warnings by "using" E.  */
+#if ! defined lint || defined __GNUC__
+# define YY_USE(E) ((void) (E))
+#else
+# define YY_USE(E) /* empty */
+#endif
+
+/* Suppress an incorrect diagnostic about yylval being uninitialized.  */
+#if defined __GNUC__ && ! defined __ICC && 406 <= __GNUC__ * 100 + __GNUC_MINOR__
+# if __GNUC__ * 100 + __GNUC_MINOR__ < 407
+#  define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN                           \
+    _Pragma ("GCC diagnostic push")                                     \
+    _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"")
+# else
+#  define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN                           \
+    _Pragma ("GCC diagnostic push")                                     \
+    _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"")              \
+    _Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"")
+# endif
+# define YY_IGNORE_MAYBE_UNINITIALIZED_END      \
+    _Pragma ("GCC diagnostic pop")
+#else
+# define YY_INITIAL_VALUE(Value) Value
+#endif
+#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+# define YY_IGNORE_MAYBE_UNINITIALIZED_END
+#endif
+#ifndef YY_INITIAL_VALUE
+# define YY_INITIAL_VALUE(Value) /* Nothing. */
+#endif
+
+#if defined __cplusplus && defined __GNUC__ && ! defined __ICC && 6 <= __GNUC__
+# define YY_IGNORE_USELESS_CAST_BEGIN                          \
+    _Pragma ("GCC diagnostic push")                            \
+    _Pragma ("GCC diagnostic ignored \"-Wuseless-cast\"")
+# define YY_IGNORE_USELESS_CAST_END            \
+    _Pragma ("GCC diagnostic pop")
+#endif
+#ifndef YY_IGNORE_USELESS_CAST_BEGIN
+# define YY_IGNORE_USELESS_CAST_BEGIN
+# define YY_IGNORE_USELESS_CAST_END
+#endif
+
+
+#define YY_ASSERT(E) ((void) (0 && (E)))
+
+#if !defined yyoverflow
+
+/* The parser invokes alloca or malloc; define the necessary symbols.  */
+
+# ifdef YYSTACK_USE_ALLOCA
+#  if YYSTACK_USE_ALLOCA
+#   ifdef __GNUC__
+#    define YYSTACK_ALLOC __builtin_alloca
+#   elif defined __BUILTIN_VA_ARG_INCR
+#    include <alloca.h> /* INFRINGES ON USER NAME SPACE */
+#   elif defined _AIX
+#    define YYSTACK_ALLOC __alloca
+#   elif defined _MSC_VER
+#    include <malloc.h> /* INFRINGES ON USER NAME SPACE */
+#    define alloca _alloca
+#   else
+#    define YYSTACK_ALLOC alloca
+#    if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS
+#     include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+      /* Use EXIT_SUCCESS as a witness for stdlib.h.  */
+#     ifndef EXIT_SUCCESS
+#      define EXIT_SUCCESS 0
+#     endif
+#    endif
+#   endif
+#  endif
+# endif
+
+# ifdef YYSTACK_ALLOC
+   /* Pacify GCC's 'empty if-body' warning.  */
+#  define YYSTACK_FREE(Ptr) do { /* empty */; } while (0)
+#  ifndef YYSTACK_ALLOC_MAXIMUM
+    /* The OS might guarantee only one guard page at the bottom of the stack,
+       and a page size can be as small as 4096 bytes.  So we cannot safely
+       invoke alloca (N) if N exceeds 4096.  Use a slightly smaller number
+       to allow for a few compiler-allocated temporary stack slots.  */
+#   define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */
+#  endif
+# else
+#  define YYSTACK_ALLOC YYMALLOC
+#  define YYSTACK_FREE YYFREE
+#  ifndef YYSTACK_ALLOC_MAXIMUM
+#   define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM
+#  endif
+#  if (defined __cplusplus && ! defined EXIT_SUCCESS \
+       && ! ((defined YYMALLOC || defined malloc) \
+             && (defined YYFREE || defined free)))
+#   include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+#   ifndef EXIT_SUCCESS
+#    define EXIT_SUCCESS 0
+#   endif
+#  endif
+#  ifndef YYMALLOC
+#   define YYMALLOC malloc
+#   if ! defined malloc && ! defined EXIT_SUCCESS
+void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */
+#   endif
+#  endif
+#  ifndef YYFREE
+#   define YYFREE free
+#   if ! defined free && ! defined EXIT_SUCCESS
+void free (void *); /* INFRINGES ON USER NAME SPACE */
+#   endif
+#  endif
+# endif
+#endif /* !defined yyoverflow */
+
+#if (! defined yyoverflow \
+     && (! defined __cplusplus \
+         || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL)))
+
+/* A type that is properly aligned for any stack member.  */
+union yyalloc
+{
+  yy_state_t yyss_alloc;
+  YYSTYPE yyvs_alloc;
+};
+
+/* The size of the maximum gap between one aligned stack and the next.  */
+# define YYSTACK_GAP_MAXIMUM (YYSIZEOF (union yyalloc) - 1)
+
+/* The size of an array large to enough to hold all stacks, each with
+   N elements.  */
+# define YYSTACK_BYTES(N) \
+     ((N) * (YYSIZEOF (yy_state_t) + YYSIZEOF (YYSTYPE)) \
+      + YYSTACK_GAP_MAXIMUM)
+
+# define YYCOPY_NEEDED 1
+
+/* Relocate STACK from its old location to the new one.  The
+   local variables YYSIZE and YYSTACKSIZE give the old and new number of
+   elements in the stack, and YYPTR gives the new location of the
+   stack.  Advance YYPTR to a properly aligned location for the next
+   stack.  */
+# define YYSTACK_RELOCATE(Stack_alloc, Stack)                           \
+    do                                                                  \
+      {                                                                 \
+        YYPTRDIFF_T yynewbytes;                                         \
+        YYCOPY (&yyptr->Stack_alloc, Stack, yysize);                    \
+        Stack = &yyptr->Stack_alloc;                                    \
+        yynewbytes = yystacksize * YYSIZEOF (*Stack) + YYSTACK_GAP_MAXIMUM; \
+        yyptr += yynewbytes / YYSIZEOF (*yyptr);                        \
+      }                                                                 \
+    while (0)
+
+#endif
+
+#if defined YYCOPY_NEEDED && YYCOPY_NEEDED
+/* Copy COUNT objects from SRC to DST.  The source and destination do
+   not overlap.  */
+# ifndef YYCOPY
+#  if defined __GNUC__ && 1 < __GNUC__
+#   define YYCOPY(Dst, Src, Count) \
+      __builtin_memcpy (Dst, Src, YY_CAST (YYSIZE_T, (Count)) * sizeof (*(Src)))
+#  else
+#   define YYCOPY(Dst, Src, Count)              \
+      do                                        \
+        {                                       \
+          YYPTRDIFF_T yyi;                      \
+          for (yyi = 0; yyi < (Count); yyi++)   \
+            (Dst)[yyi] = (Src)[yyi];            \
+        }                                       \
+      while (0)
+#  endif
+# endif
+#endif /* !YYCOPY_NEEDED */
+
+/* YYFINAL -- State number of the termination state.  */
+#define YYFINAL  21
+/* YYLAST -- Last index in YYTABLE.  */
+#define YYLAST   39
+
+/* YYNTOKENS -- Number of terminals.  */
+#define YYNTOKENS  21
+/* YYNNTS -- Number of nonterminals.  */
+#define YYNNTS  12
+/* YYNRULES -- Number of rules.  */
+#define YYNRULES  34
+/* YYNSTATES -- Number of states.  */
+#define YYNSTATES  49
+
+/* YYMAXUTOK -- Last valid token kind.  */
+#define YYMAXUTOK   260
+
+
+/* YYTRANSLATE(TOKEN-NUM) -- Symbol number corresponding to TOKEN-NUM
+   as returned by yylex, with out-of-bounds checking.  */
+#define YYTRANSLATE(YYX)                                \
+  (0 <= (YYX) && (YYX) <= YYMAXUTOK                     \
+   ? YY_CAST (yysymbol_kind_t, yytranslate[YYX])        \
+   : YYSYMBOL_YYUNDEF)
+
+/* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM
+   as returned by yylex.  */
+static const yytype_int8 yytranslate[] =
+{
+       0,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,    12,     9,     2,
+      17,    18,    16,    14,     2,    15,    13,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     7,     2,
+      19,     2,    20,     6,    11,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     8,     2,    10,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+       2,     2,     2,     2,     2,     2,     1,     2,     3,     4,
+       5
+};
+
+#if YYDEBUG
+/* YYRLINE[YYN] -- Source line where rule number YYN was defined.  */
+static const yytype_int16 yyrline[] =
+{
+       0,   246,   246,   251,   253,   259,   260,   265,   267,   269,
+     274,   276,   281,   283,   288,   290,   295,   297,   299,   315,
+     319,   350,   352,   354,   356,   358,   364,   365,   370,   372,
+     377,   379,   386,   387,   389
+};
+#endif
+
+/** Accessing symbol of state STATE.  */
+#define YY_ACCESSING_SYMBOL(State) YY_CAST (yysymbol_kind_t, yystos[State])
+
+#if YYDEBUG || 0
+/* The user-facing name of the symbol whose (internal) number is
+   YYSYMBOL.  No bounds checking.  */
+static const char *yysymbol_name (yysymbol_kind_t yysymbol) YY_ATTRIBUTE_UNUSED;
+
+/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
+   First, the terminals, then, starting at YYNTOKENS, nonterminals.  */
+static const char *const yytname[] =
+{
+  "\"end of file\"", "error", "\"invalid token\"", "TOKEN_LETTER",
+  "TOKEN_LITERAL", "TOKEN_DIGIT", "'?'", "':'", "'|'", "'&'", "'~'", "'@'",
+  "'%'", "'.'", "'+'", "'-'", "'*'", "'('", "')'", "'<'", "'>'", "$accept",
+  "expr", "conditional", "optional_conditional", "alternative", "list",
+  "substitute", "string", "optional_number", "number", "digits", "flag", YY_NULLPTR
+};
+
+static const char *
+yysymbol_name (yysymbol_kind_t yysymbol)
+{
+  return yytname[yysymbol];
+}
+#endif
+
+#define YYPACT_NINF (-26)
+
+#define yypact_value_is_default(Yyn) \
+  ((Yyn) == YYPACT_NINF)
+
+#define YYTABLE_NINF (-1)
+
+#define yytable_value_is_error(Yyn) \
+  0
+
+/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
+   STATE-NUM.  */
+static const yytype_int8 yypact[] =
+{
+       2,    11,   -26,   -26,    12,     2,     2,    24,   -26,   -26,
+      21,     2,    18,    -6,   -26,    26,   -26,   -26,    27,    15,
+      14,   -26,     2,     2,     2,    18,     2,    -3,    11,    11,
+     -26,   -26,   -26,   -26,   -26,    28,     2,     2,    -6,   -26,
+     -26,    33,    26,    26,     2,    11,   -26,   -26,    26
+};
+
+/* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM.
+   Performed when YYTABLE does not specify something else to do.  Zero
+   means the default is an error.  */
+static const yytype_int8 yydefact[] =
+{
+       5,    16,    15,    14,     0,     5,     5,     0,     6,     2,
+       3,     7,    10,    12,    28,    17,    18,    30,    19,     0,
+       0,     1,     5,     0,     0,    11,     0,    32,     0,     0,
+      23,    29,    31,    24,    25,     0,     8,     9,    13,    33,
+      34,     0,    21,    22,     0,    26,     4,    20,    27
+};
+
+/* YYPGOTO[NTERM-NUM].  */
+static const yytype_int8 yypgoto[] =
+{
+     -26,   -26,    -7,    -4,   -26,    -1,   -11,    13,   -26,   -25,
+     -26,   -26
+};
+
+/* YYDEFGOTO[NTERM-NUM].  */
+static const yytype_int8 yydefgoto[] =
+{
+       0,     7,     8,     9,    10,    11,    12,    13,    47,    15,
+      18,    41
+};
+
+/* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM.  If
+   positive, shift that token.  If negative, reduce the rule whose
+   number is the opposite.  If YYTABLE_NINF, syntax error.  */
+static const yytype_int8 yytable[] =
+{
+      25,    19,    20,    42,    43,     1,     2,    27,    28,    29,
+      30,    39,    40,     3,     4,    16,    14,    17,    35,     5,
+      48,     6,    36,    37,    21,    25,    25,    22,    26,    23,
+      24,    31,    32,    33,    34,    44,    45,    46,     0,    38
+};
+
+static const yytype_int8 yycheck[] =
+{
+      11,     5,     6,    28,    29,     3,     4,    13,    14,    15,
+      16,    14,    15,    11,    12,     3,     5,     5,    22,    17,
+      45,    19,    23,    24,     0,    36,    37,     6,    10,     8,
+       9,     5,     5,    18,    20,     7,     3,    44,    -1,    26
+};
+
+/* YYSTOS[STATE-NUM] -- The symbol kind of the accessing symbol of
+   state STATE-NUM.  */
+static const yytype_int8 yystos[] =
+{
+       0,     3,     4,    11,    12,    17,    19,    22,    23,    24,
+      25,    26,    27,    28,     5,    30,     3,     5,    31,    24,
+      24,     0,     6,     8,     9,    27,    10,    13,    14,    15,
+      16,     5,     5,    18,    20,    24,    26,    26,    28,    14,
+      15,    32,    30,    30,     7,     3,    23,    29,    30
+};
+
+/* YYR1[RULE-NUM] -- Symbol kind of the left-hand side of rule RULE-NUM.  */
+static const yytype_int8 yyr1[] =
+{
+       0,    21,    22,    23,    23,    24,    24,    25,    25,    25,
+      26,    26,    27,    27,    28,    28,    28,    28,    28,    28,
+      28,    28,    28,    28,    28,    28,    29,    29,    30,    30,
+      31,    31,    32,    32,    32
+};
+
+/* YYR2[RULE-NUM] -- Number of symbols on the right-hand side of rule RULE-NUM.  */
+static const yytype_int8 yyr2[] =
+{
+       0,     2,     1,     1,     5,     0,     1,     1,     3,     3,
+       1,     2,     1,     3,     1,     1,     1,     2,     2,     2,
+       5,     3,     3,     2,     3,     3,     0,     1,     1,     2,
+       1,     2,     0,     1,     1
+};
+
+
+enum { YYENOMEM = -2 };
+
+#define yyerrok         (yyerrstatus = 0)
+#define yyclearin       (yychar = YYEMPTY)
+
+#define YYACCEPT        goto yyacceptlab
+#define YYABORT         goto yyabortlab
+#define YYERROR         goto yyerrorlab
+#define YYNOMEM         goto yyexhaustedlab
+
+
+#define YYRECOVERING()  (!!yyerrstatus)
+
+#define YYBACKUP(Token, Value)                                    \
+  do                                                              \
+    if (yychar == YYEMPTY)                                        \
+      {                                                           \
+        yychar = (Token);                                         \
+        yylval = (Value);                                         \
+        YYPOPSTACK (yylen);                                       \
+        yystate = *yyssp;                                         \
+        goto yybackup;                                            \
+      }                                                           \
+    else                                                          \
+      {                                                           \
+        yyerror (YY_("syntax error: cannot back up")); \
+        YYERROR;                                                  \
+      }                                                           \
+  while (0)
+
+/* Backward compatibility with an undocumented macro.
+   Use YYerror or YYUNDEF. */
+#define YYERRCODE YYUNDEF
+
+
+/* Enable debugging if requested.  */
+#if YYDEBUG
+
+# ifndef YYFPRINTF
+#  include <stdio.h> /* INFRINGES ON USER NAME SPACE */
+#  define YYFPRINTF fprintf
+# endif
+
+# define YYDPRINTF(Args)                        \
+do {                                            \
+  if (yydebug)                                  \
+    YYFPRINTF Args;                             \
+} while (0)
+
+
+
+
+# define YY_SYMBOL_PRINT(Title, Kind, Value, Location)                    \
+do {                                                                      \
+  if (yydebug)                                                            \
+    {                                                                     \
+      YYFPRINTF (stderr, "%s ", Title);                                   \
+      yy_symbol_print (stderr,                                            \
+                  Kind, Value); \
+      YYFPRINTF (stderr, "\n");                                           \
+    }                                                                     \
+} while (0)
+
+
+/*-----------------------------------.
+| Print this symbol's value on YYO.  |
+`-----------------------------------*/
+
+static void
+yy_symbol_value_print (FILE *yyo,
+                       yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep)
+{
+  FILE *yyoutput = yyo;
+  YY_USE (yyoutput);
+  if (!yyvaluep)
+    return;
+  YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+  YY_USE (yykind);
+  YY_IGNORE_MAYBE_UNINITIALIZED_END
+}
+
+
+/*---------------------------.
+| Print this symbol on YYO.  |
+`---------------------------*/
+
+static void
+yy_symbol_print (FILE *yyo,
+                 yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep)
+{
+  YYFPRINTF (yyo, "%s %s (",
+             yykind < YYNTOKENS ? "token" : "nterm", yysymbol_name (yykind));
+
+  yy_symbol_value_print (yyo, yykind, yyvaluep);
+  YYFPRINTF (yyo, ")");
+}
+
+/*------------------------------------------------------------------.
+| yy_stack_print -- Print the state stack from its BOTTOM up to its |
+| TOP (included).                                                   |
+`------------------------------------------------------------------*/
+
+static void
+yy_stack_print (yy_state_t *yybottom, yy_state_t *yytop)
+{
+  YYFPRINTF (stderr, "Stack now");
+  for (; yybottom <= yytop; yybottom++)
+    {
+      int yybot = *yybottom;
+      YYFPRINTF (stderr, " %d", yybot);
+    }
+  YYFPRINTF (stderr, "\n");
+}
+
+# define YY_STACK_PRINT(Bottom, Top)                            \
+do {                                                            \
+  if (yydebug)                                                  \
+    yy_stack_print ((Bottom), (Top));                           \
+} while (0)
+
+
+/*------------------------------------------------.
+| Report that the YYRULE is going to be reduced.  |
+`------------------------------------------------*/
+
+static void
+yy_reduce_print (yy_state_t *yyssp, YYSTYPE *yyvsp,
+                 int yyrule)
+{
+  int yylno = yyrline[yyrule];
+  int yynrhs = yyr2[yyrule];
+  int yyi;
+  YYFPRINTF (stderr, "Reducing stack by rule %d (line %d):\n",
+             yyrule - 1, yylno);
+  /* The symbols being reduced.  */
+  for (yyi = 0; yyi < yynrhs; yyi++)
+    {
+      YYFPRINTF (stderr, "   $%d = ", yyi + 1);
+      yy_symbol_print (stderr,
+                       YY_ACCESSING_SYMBOL (+yyssp[yyi + 1 - yynrhs]),
+                       &yyvsp[(yyi + 1) - (yynrhs)]);
+      YYFPRINTF (stderr, "\n");
+    }
+}
+
+# define YY_REDUCE_PRINT(Rule)          \
+do {                                    \
+  if (yydebug)                          \
+    yy_reduce_print (yyssp, yyvsp, Rule); \
+} while (0)
+
+/* Nonzero means print parse trace.  It is left uninitialized so that
+   multiple parsers can coexist.  */
+int yydebug;
+#else /* !YYDEBUG */
+# define YYDPRINTF(Args) ((void) 0)
+# define YY_SYMBOL_PRINT(Title, Kind, Value, Location)
+# define YY_STACK_PRINT(Bottom, Top)
+# define YY_REDUCE_PRINT(Rule)
+#endif /* !YYDEBUG */
+
+
+/* YYINITDEPTH -- initial size of the parser's stacks.  */
+#ifndef YYINITDEPTH
+# define YYINITDEPTH 200
+#endif
+
+/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only
+   if the built-in stack extension method is used).
+
+   Do not make this value too large; the results are undefined if
+   YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH)
+   evaluated with infinite-precision integer arithmetic.  */
+
+#ifndef YYMAXDEPTH
+# define YYMAXDEPTH 10000
+#endif
+
+
+
+
+
+
+/*-----------------------------------------------.
+| Release the memory associated to this symbol.  |
+`-----------------------------------------------*/
+
+static void
+yydestruct (const char *yymsg,
+            yysymbol_kind_t yykind, YYSTYPE *yyvaluep)
+{
+  YY_USE (yyvaluep);
+  if (!yymsg)
+    yymsg = "Deleting";
+  YY_SYMBOL_PRINT (yymsg, yykind, yyvaluep, yylocationp);
+
+  YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+  YY_USE (yykind);
+  YY_IGNORE_MAYBE_UNINITIALIZED_END
+}
+
+
+/* Lookahead token kind.  */
+int yychar;
+
+/* The semantic value of the lookahead symbol.  */
+YYSTYPE yylval;
+/* Number of syntax errors so far.  */
+int yynerrs;
+
+
+
+
+/*----------.
+| yyparse.  |
+`----------*/
+
+int
+yyparse (void)
+{
+    yy_state_fast_t yystate = 0;
+    /* Number of tokens to shift before error messages enabled.  */
+    int yyerrstatus = 0;
+
+    /* Refer to the stacks through separate pointers, to allow yyoverflow
+       to reallocate them elsewhere.  */
+
+    /* Their size.  */
+    YYPTRDIFF_T yystacksize = YYINITDEPTH;
+
+    /* The state stack: array, bottom, top.  */
+    yy_state_t yyssa[YYINITDEPTH];
+    yy_state_t *yyss = yyssa;
+    yy_state_t *yyssp = yyss;
+
+    /* The semantic value stack: array, bottom, top.  */
+    YYSTYPE yyvsa[YYINITDEPTH];
+    YYSTYPE *yyvs = yyvsa;
+    YYSTYPE *yyvsp = yyvs;
+
+  int yyn;
+  /* The return value of yyparse.  */
+  int yyresult;
+  /* Lookahead symbol kind.  */
+  yysymbol_kind_t yytoken = YYSYMBOL_YYEMPTY;
+  /* The variables used to return semantic value and location from the
+     action routines.  */
+  YYSTYPE yyval;
+
+
+
+#define YYPOPSTACK(N)   (yyvsp -= (N), yyssp -= (N))
+
+  /* The number of symbols on the RHS of the reduced rule.
+     Keep to zero when no symbol should be popped.  */
+  int yylen = 0;
+
+  YYDPRINTF ((stderr, "Starting parse\n"));
+
+  yychar = YYEMPTY; /* Cause a token to be read.  */
+
+  goto yysetstate;
+
+
+/*------------------------------------------------------------.
+| yynewstate -- push a new state, which is found in yystate.  |
+`------------------------------------------------------------*/
+yynewstate:
+  /* In all cases, when you get here, the value and location stacks
+     have just been pushed.  So pushing a state here evens the stacks.  */
+  yyssp++;
+
+
+/*--------------------------------------------------------------------.
+| yysetstate -- set current state (the top of the stack) to yystate.  |
+`--------------------------------------------------------------------*/
+yysetstate:
+  YYDPRINTF ((stderr, "Entering state %d\n", yystate));
+  YY_ASSERT (0 <= yystate && yystate < YYNSTATES);
+  YY_IGNORE_USELESS_CAST_BEGIN
+  *yyssp = YY_CAST (yy_state_t, yystate);
+  YY_IGNORE_USELESS_CAST_END
+  YY_STACK_PRINT (yyss, yyssp);
+
+  if (yyss + yystacksize - 1 <= yyssp)
+#if !defined yyoverflow && !defined YYSTACK_RELOCATE
+    YYNOMEM;
+#else
+    {
+      /* Get the current used size of the three stacks, in elements.  */
+      YYPTRDIFF_T yysize = yyssp - yyss + 1;
+
+# if defined yyoverflow
+      {
+        /* Give user a chance to reallocate the stack.  Use copies of
+           these so that the &'s don't force the real ones into
+           memory.  */
+        yy_state_t *yyss1 = yyss;
+        YYSTYPE *yyvs1 = yyvs;
+
+        /* Each stack pointer address is followed by the size of the
+           data in use in that stack, in bytes.  This used to be a
+           conditional around just the two extra args, but that might
+           be undefined if yyoverflow is a macro.  */
+        yyoverflow (YY_("memory exhausted"),
+                    &yyss1, yysize * YYSIZEOF (*yyssp),
+                    &yyvs1, yysize * YYSIZEOF (*yyvsp),
+                    &yystacksize);
+        yyss = yyss1;
+        yyvs = yyvs1;
+      }
+# else /* defined YYSTACK_RELOCATE */
+      /* Extend the stack our own way.  */
+      if (YYMAXDEPTH <= yystacksize)
+        YYNOMEM;
+      yystacksize *= 2;
+      if (YYMAXDEPTH < yystacksize)
+        yystacksize = YYMAXDEPTH;
+
+      {
+        yy_state_t *yyss1 = yyss;
+        union yyalloc *yyptr =
+          YY_CAST (union yyalloc *,
+                   YYSTACK_ALLOC (YY_CAST (YYSIZE_T, YYSTACK_BYTES (yystacksize))));
+        if (! yyptr)
+          YYNOMEM;
+        YYSTACK_RELOCATE (yyss_alloc, yyss);
+        YYSTACK_RELOCATE (yyvs_alloc, yyvs);
+#  undef YYSTACK_RELOCATE
+        if (yyss1 != yyssa)
+          YYSTACK_FREE (yyss1);
+      }
+# endif
+
+      yyssp = yyss + yysize - 1;
+      yyvsp = yyvs + yysize - 1;
+
+      YY_IGNORE_USELESS_CAST_BEGIN
+      YYDPRINTF ((stderr, "Stack size increased to %ld\n",
+                  YY_CAST (long, yystacksize)));
+      YY_IGNORE_USELESS_CAST_END
+
+      if (yyss + yystacksize - 1 <= yyssp)
+        YYABORT;
+    }
+#endif /* !defined yyoverflow && !defined YYSTACK_RELOCATE */
+
+
+  if (yystate == YYFINAL)
+    YYACCEPT;
+
+  goto yybackup;
+
+
+/*-----------.
+| yybackup.  |
+`-----------*/
+yybackup:
+  /* Do appropriate processing given the current state.  Read a
+     lookahead token if we need one and don't already have one.  */
+
+  /* First try to decide what to do without reference to lookahead token.  */
+  yyn = yypact[yystate];
+  if (yypact_value_is_default (yyn))
+    goto yydefault;
+
+  /* Not known => get a lookahead token if don't already have one.  */
+
+  /* YYCHAR is either empty, or end-of-input, or a valid lookahead.  */
+  if (yychar == YYEMPTY)
+    {
+      YYDPRINTF ((stderr, "Reading a token\n"));
+      yychar = yylex ();
+    }
+
+  if (yychar <= YYEOF)
+    {
+      yychar = YYEOF;
+      yytoken = YYSYMBOL_YYEOF;
+      YYDPRINTF ((stderr, "Now at end of input.\n"));
+    }
+  else if (yychar == YYerror)
+    {
+      /* The scanner already issued an error message, process directly
+         to error recovery.  But do not keep the error token as
+         lookahead, it is too special and may lead us to an endless
+         loop in error recovery. */
+      yychar = YYUNDEF;
+      yytoken = YYSYMBOL_YYerror;
+      goto yyerrlab1;
+    }
+  else
+    {
+      yytoken = YYTRANSLATE (yychar);
+      YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc);
+    }
+
+  /* If the proper action on seeing token YYTOKEN is to reduce or to
+     detect an error, take that action.  */
+  yyn += yytoken;
+  if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken)
+    goto yydefault;
+  yyn = yytable[yyn];
+  if (yyn <= 0)
+    {
+      if (yytable_value_is_error (yyn))
+        goto yyerrlab;
+      yyn = -yyn;
+      goto yyreduce;
+    }
+
+  /* Count tokens shifted since error; after three, turn off error
+     status.  */
+  if (yyerrstatus)
+    yyerrstatus--;
+
+  /* Shift the lookahead token.  */
+  YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc);
+  yystate = yyn;
+  YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+  *++yyvsp = yylval;
+  YY_IGNORE_MAYBE_UNINITIALIZED_END
+
+  /* Discard the shifted token.  */
+  yychar = YYEMPTY;
+  goto yynewstate;
+
+
+/*-----------------------------------------------------------.
+| yydefault -- do the default action for the current state.  |
+`-----------------------------------------------------------*/
+yydefault:
+  yyn = yydefact[yystate];
+  if (yyn == 0)
+    goto yyerrlab;
+  goto yyreduce;
+
+
+/*-----------------------------.
+| yyreduce -- do a reduction.  |
+`-----------------------------*/
+yyreduce:
+  /* yyn is the number of a rule to reduce with.  */
+  yylen = yyr2[yyn];
+
+  /* If YYLEN is nonzero, implement the default value of the action:
+     '$$ = $1'.
+
+     Otherwise, the following line sets YYVAL to garbage.
+     This behavior is undocumented and Bison
+     users should not rely upon it.  Assigning to YYVAL
+     unconditionally makes the parser a bit smaller, and it avoids a
+     GCC warning that YYVAL may be used uninitialized.  */
+  yyval = yyvsp[1-yylen];
+
+
+  YY_REDUCE_PRINT (yyn);
+  switch (yyn)
+    {
+  case 2: /* expr: optional_conditional  */
+#line 247 "../src/preproc/refer/label.ypp"
+                { parse_result = ((yyvsp[0].expr) ? new analyzed_expr((yyvsp[0].expr)) : 0); }
+#line 1370 "src/preproc/refer/label.cpp"
+    break;
+
+  case 3: /* conditional: alternative  */
+#line 252 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = (yyvsp[0].expr); }
+#line 1376 "src/preproc/refer/label.cpp"
+    break;
+
+  case 4: /* conditional: alternative '?' optional_conditional ':' conditional  */
+#line 254 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = new conditional_expr((yyvsp[-4].expr), (yyvsp[-2].expr), (yyvsp[0].expr)); }
+#line 1382 "src/preproc/refer/label.cpp"
+    break;
+
+  case 5: /* optional_conditional: %empty  */
+#line 259 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = 0; }
+#line 1388 "src/preproc/refer/label.cpp"
+    break;
+
+  case 6: /* optional_conditional: conditional  */
+#line 261 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = (yyvsp[0].expr); }
+#line 1394 "src/preproc/refer/label.cpp"
+    break;
+
+  case 7: /* alternative: list  */
+#line 266 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = (yyvsp[0].expr); }
+#line 1400 "src/preproc/refer/label.cpp"
+    break;
+
+  case 8: /* alternative: alternative '|' list  */
+#line 268 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = new alternative_expr((yyvsp[-2].expr), (yyvsp[0].expr)); }
+#line 1406 "src/preproc/refer/label.cpp"
+    break;
+
+  case 9: /* alternative: alternative '&' list  */
+#line 270 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = new conditional_expr((yyvsp[-2].expr), (yyvsp[0].expr), 0); }
+#line 1412 "src/preproc/refer/label.cpp"
+    break;
+
+  case 10: /* list: substitute  */
+#line 275 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = (yyvsp[0].expr); }
+#line 1418 "src/preproc/refer/label.cpp"
+    break;
+
+  case 11: /* list: list substitute  */
+#line 277 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = new list_expr((yyvsp[-1].expr), (yyvsp[0].expr)); }
+#line 1424 "src/preproc/refer/label.cpp"
+    break;
+
+  case 12: /* substitute: string  */
+#line 282 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = (yyvsp[0].expr); }
+#line 1430 "src/preproc/refer/label.cpp"
+    break;
+
+  case 13: /* substitute: substitute '~' string  */
+#line 284 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = new substitute_expr((yyvsp[-2].expr), (yyvsp[0].expr)); }
+#line 1436 "src/preproc/refer/label.cpp"
+    break;
+
+  case 14: /* string: '@'  */
+#line 289 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = new at_expr; }
+#line 1442 "src/preproc/refer/label.cpp"
+    break;
+
+  case 15: /* string: TOKEN_LITERAL  */
+#line 291 "../src/preproc/refer/label.ypp"
+                {
+		  (yyval.expr) = new literal_expr(literals.contents() + (yyvsp[0].str).start,
+					(yyvsp[0].str).len);
+		}
+#line 1451 "src/preproc/refer/label.cpp"
+    break;
+
+  case 16: /* string: TOKEN_LETTER  */
+#line 296 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = new field_expr((yyvsp[0].num), 0); }
+#line 1457 "src/preproc/refer/label.cpp"
+    break;
+
+  case 17: /* string: TOKEN_LETTER number  */
+#line 298 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = new field_expr((yyvsp[-1].num), (yyvsp[0].num) - 1); }
+#line 1463 "src/preproc/refer/label.cpp"
+    break;
+
+  case 18: /* string: '%' TOKEN_LETTER  */
+#line 300 "../src/preproc/refer/label.ypp"
+                {
+		  switch ((yyvsp[0].num)) {
+		  case 'I':
+		  case 'i':
+		  case 'A':
+		  case 'a':
+		    (yyval.expr) = new format_expr((yyvsp[0].num));
+		    break;
+		  default:
+		    command_error("unrecognized format '%1'", char((yyvsp[0].num)));
+		    (yyval.expr) = new format_expr('a');
+		    break;
+		  }
+		}
+#line 1482 "src/preproc/refer/label.cpp"
+    break;
+
+  case 19: /* string: '%' digits  */
+#line 316 "../src/preproc/refer/label.ypp"
+                {
+		  (yyval.expr) = new format_expr('0', (yyvsp[0].dig).ndigits, (yyvsp[0].dig).val);
+		}
+#line 1490 "src/preproc/refer/label.cpp"
+    break;
+
+  case 20: /* string: string '.' flag TOKEN_LETTER optional_number  */
+#line 320 "../src/preproc/refer/label.ypp"
+                {
+		  switch ((yyvsp[-1].num)) {
+		  case 'l':
+		    (yyval.expr) = new map_expr((yyvsp[-4].expr), lowercase);
+		    break;
+		  case 'u':
+		    (yyval.expr) = new map_expr((yyvsp[-4].expr), uppercase);
+		    break;
+		  case 'c':
+		    (yyval.expr) = new map_expr((yyvsp[-4].expr), capitalize);
+		    break;
+		  case 'r':
+		    (yyval.expr) = new map_expr((yyvsp[-4].expr), reverse_name);
+		    break;
+		  case 'a':
+		    (yyval.expr) = new map_expr((yyvsp[-4].expr), abbreviate_name);
+		    break;
+		  case 'y':
+		    (yyval.expr) = new extractor_expr((yyvsp[-4].expr), find_year, (yyvsp[-2].num));
+		    break;
+		  case 'n':
+		    (yyval.expr) = new extractor_expr((yyvsp[-4].expr), find_last_name, (yyvsp[-2].num));
+		    break;
+		  default:
+		    (yyval.expr) = (yyvsp[-4].expr);
+		    command_error("unknown function '%1'", char((yyvsp[-1].num)));
+		    break;
+		  }
+		}
+#line 1524 "src/preproc/refer/label.cpp"
+    break;
+
+  case 21: /* string: string '+' number  */
+#line 351 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = new truncate_expr((yyvsp[-2].expr), (yyvsp[0].num)); }
+#line 1530 "src/preproc/refer/label.cpp"
+    break;
+
+  case 22: /* string: string '-' number  */
+#line 353 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = new truncate_expr((yyvsp[-2].expr), -(yyvsp[0].num)); }
+#line 1536 "src/preproc/refer/label.cpp"
+    break;
+
+  case 23: /* string: string '*'  */
+#line 355 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = new star_expr((yyvsp[-1].expr)); }
+#line 1542 "src/preproc/refer/label.cpp"
+    break;
+
+  case 24: /* string: '(' optional_conditional ')'  */
+#line 357 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = (yyvsp[-1].expr); }
+#line 1548 "src/preproc/refer/label.cpp"
+    break;
+
+  case 25: /* string: '<' optional_conditional '>'  */
+#line 359 "../src/preproc/refer/label.ypp"
+                { (yyval.expr) = new separator_expr((yyvsp[-1].expr)); }
+#line 1554 "src/preproc/refer/label.cpp"
+    break;
+
+  case 26: /* optional_number: %empty  */
+#line 364 "../src/preproc/refer/label.ypp"
+                { (yyval.num) = -1; }
+#line 1560 "src/preproc/refer/label.cpp"
+    break;
+
+  case 27: /* optional_number: number  */
+#line 366 "../src/preproc/refer/label.ypp"
+                { (yyval.num) = (yyvsp[0].num); }
+#line 1566 "src/preproc/refer/label.cpp"
+    break;
+
+  case 28: /* number: TOKEN_DIGIT  */
+#line 371 "../src/preproc/refer/label.ypp"
+                { (yyval.num) = (yyvsp[0].num); }
+#line 1572 "src/preproc/refer/label.cpp"
+    break;
+
+  case 29: /* number: number TOKEN_DIGIT  */
+#line 373 "../src/preproc/refer/label.ypp"
+                { (yyval.num) = (yyvsp[-1].num)*10 + (yyvsp[0].num); }
+#line 1578 "src/preproc/refer/label.cpp"
+    break;
+
+  case 30: /* digits: TOKEN_DIGIT  */
+#line 378 "../src/preproc/refer/label.ypp"
+                { (yyval.dig).ndigits = 1; (yyval.dig).val = (yyvsp[0].num); }
+#line 1584 "src/preproc/refer/label.cpp"
+    break;
+
+  case 31: /* digits: digits TOKEN_DIGIT  */
+#line 380 "../src/preproc/refer/label.ypp"
+                { (yyval.dig).ndigits = (yyvsp[-1].dig).ndigits + 1; (yyval.dig).val = (yyvsp[-1].dig).val*10 + (yyvsp[0].num); }
+#line 1590 "src/preproc/refer/label.cpp"
+    break;
+
+  case 32: /* flag: %empty  */
+#line 386 "../src/preproc/refer/label.ypp"
+                { (yyval.num) = 0; }
+#line 1596 "src/preproc/refer/label.cpp"
+    break;
+
+  case 33: /* flag: '+'  */
+#line 388 "../src/preproc/refer/label.ypp"
+                { (yyval.num) = 1; }
+#line 1602 "src/preproc/refer/label.cpp"
+    break;
+
+  case 34: /* flag: '-'  */
+#line 390 "../src/preproc/refer/label.ypp"
+                { (yyval.num) = -1; }
+#line 1608 "src/preproc/refer/label.cpp"
+    break;
+
+
+#line 1612 "src/preproc/refer/label.cpp"
+
+      default: break;
+    }
+  /* User semantic actions sometimes alter yychar, and that requires
+     that yytoken be updated with the new translation.  We take the
+     approach of translating immediately before every use of yytoken.
+     One alternative is translating here after every semantic action,
+     but that translation would be missed if the semantic action invokes
+     YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or
+     if it invokes YYBACKUP.  In the case of YYABORT or YYACCEPT, an
+     incorrect destructor might then be invoked immediately.  In the
+     case of YYERROR or YYBACKUP, subsequent parser actions might lead
+     to an incorrect destructor call or verbose syntax error message
+     before the lookahead is translated.  */
+  YY_SYMBOL_PRINT ("-> $$ =", YY_CAST (yysymbol_kind_t, yyr1[yyn]), &yyval, &yyloc);
+
+  YYPOPSTACK (yylen);
+  yylen = 0;
+
+  *++yyvsp = yyval;
+
+  /* Now 'shift' the result of the reduction.  Determine what state
+     that goes to, based on the state we popped back to and the rule
+     number reduced by.  */
+  {
+    const int yylhs = yyr1[yyn] - YYNTOKENS;
+    const int yyi = yypgoto[yylhs] + *yyssp;
+    yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyssp
+               ? yytable[yyi]
+               : yydefgoto[yylhs]);
+  }
+
+  goto yynewstate;
+
+
+/*--------------------------------------.
+| yyerrlab -- here on detecting error.  |
+`--------------------------------------*/
+yyerrlab:
+  /* Make sure we have latest lookahead translation.  See comments at
+     user semantic actions for why this is necessary.  */
+  yytoken = yychar == YYEMPTY ? YYSYMBOL_YYEMPTY : YYTRANSLATE (yychar);
+  /* If not already recovering from an error, report this error.  */
+  if (!yyerrstatus)
+    {
+      ++yynerrs;
+      yyerror (YY_("syntax error"));
+    }
+
+  if (yyerrstatus == 3)
+    {
+      /* If just tried and failed to reuse lookahead token after an
+         error, discard it.  */
+
+      if (yychar <= YYEOF)
+        {
+          /* Return failure if at end of input.  */
+          if (yychar == YYEOF)
+            YYABORT;
+        }
+      else
+        {
+          yydestruct ("Error: discarding",
+                      yytoken, &yylval);
+          yychar = YYEMPTY;
+        }
+    }
+
+  /* Else will try to reuse lookahead token after shifting the error
+     token.  */
+  goto yyerrlab1;
+
+
+/*---------------------------------------------------.
+| yyerrorlab -- error raised explicitly by YYERROR.  |
+`---------------------------------------------------*/
+yyerrorlab:
+  /* Pacify compilers when the user code never invokes YYERROR and the
+     label yyerrorlab therefore never appears in user code.  */
+  if (0)
+    YYERROR;
+  ++yynerrs;
+
+  /* Do not reclaim the symbols of the rule whose action triggered
+     this YYERROR.  */
+  YYPOPSTACK (yylen);
+  yylen = 0;
+  YY_STACK_PRINT (yyss, yyssp);
+  yystate = *yyssp;
+  goto yyerrlab1;
+
+
+/*-------------------------------------------------------------.
+| yyerrlab1 -- common code for both syntax error and YYERROR.  |
+`-------------------------------------------------------------*/
+yyerrlab1:
+  yyerrstatus = 3;      /* Each real token shifted decrements this.  */
+
+  /* Pop stack until we find a state that shifts the error token.  */
+  for (;;)
+    {
+      yyn = yypact[yystate];
+      if (!yypact_value_is_default (yyn))
+        {
+          yyn += YYSYMBOL_YYerror;
+          if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYSYMBOL_YYerror)
+            {
+              yyn = yytable[yyn];
+              if (0 < yyn)
+                break;
+            }
+        }
+
+      /* Pop the current state because it cannot handle the error token.  */
+      if (yyssp == yyss)
+        YYABORT;
+
+
+      yydestruct ("Error: popping",
+                  YY_ACCESSING_SYMBOL (yystate), yyvsp);
+      YYPOPSTACK (1);
+      yystate = *yyssp;
+      YY_STACK_PRINT (yyss, yyssp);
+    }
+
+  YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+  *++yyvsp = yylval;
+  YY_IGNORE_MAYBE_UNINITIALIZED_END
+
+
+  /* Shift the error token.  */
+  YY_SYMBOL_PRINT ("Shifting", YY_ACCESSING_SYMBOL (yyn), yyvsp, yylsp);
+
+  yystate = yyn;
+  goto yynewstate;
+
+
+/*-------------------------------------.
+| yyacceptlab -- YYACCEPT comes here.  |
+`-------------------------------------*/
+yyacceptlab:
+  yyresult = 0;
+  goto yyreturnlab;
+
+
+/*-----------------------------------.
+| yyabortlab -- YYABORT comes here.  |
+`-----------------------------------*/
+yyabortlab:
+  yyresult = 1;
+  goto yyreturnlab;
+
+
+/*-----------------------------------------------------------.
+| yyexhaustedlab -- YYNOMEM (memory exhaustion) comes here.  |
+`-----------------------------------------------------------*/
+yyexhaustedlab:
+  yyerror (YY_("memory exhausted"));
+  yyresult = 2;
+  goto yyreturnlab;
+
+
+/*----------------------------------------------------------.
+| yyreturnlab -- parsing is finished, clean up and return.  |
+`----------------------------------------------------------*/
+yyreturnlab:
+  if (yychar != YYEMPTY)
+    {
+      /* Make sure we have latest lookahead translation.  See comments at
+         user semantic actions for why this is necessary.  */
+      yytoken = YYTRANSLATE (yychar);
+      yydestruct ("Cleanup: discarding lookahead",
+                  yytoken, &yylval);
+    }
+  /* Do not reclaim the symbols of the rule whose action triggered
+     this YYABORT or YYACCEPT.  */
+  YYPOPSTACK (yylen);
+  YY_STACK_PRINT (yyss, yyssp);
+  while (yyssp != yyss)
+    {
+      yydestruct ("Cleanup: popping",
+                  YY_ACCESSING_SYMBOL (+*yyssp), yyvsp);
+      YYPOPSTACK (1);
+    }
+#ifndef yyoverflow
+  if (yyss != yyssa)
+    YYSTACK_FREE (yyss);
+#endif
+
+  return yyresult;
+}
+
+#line 393 "../src/preproc/refer/label.ypp"
+
+
+/* bison defines const to be empty unless __STDC__ is defined, which it
+isn't under cfront */
+
+#ifdef const
+#undef const
+#endif
+
+const char *spec_ptr;
+const char *spec_end;
+const char *spec_cur;
+
+static char uppercase_array[] = {
+  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
+  'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
+  'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
+  'Y', 'Z',
+};
+  
+static char lowercase_array[] = {
+  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
+  'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
+  'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
+  'y', 'z',
+};
+
+int yylex()
+{
+  while (spec_ptr < spec_end && csspace(*spec_ptr))
+    spec_ptr++;
+  spec_cur = spec_ptr;
+  if (spec_ptr >= spec_end)
+    return 0;
+  unsigned char c = *spec_ptr++;
+  if (csalpha(c)) {
+    yylval.num = c;
+    return TOKEN_LETTER;
+  }
+  if (csdigit(c)) {
+    yylval.num = c - '0';
+    return TOKEN_DIGIT;
+  }
+  if (c == '\'') {
+    yylval.str.start = literals.length();
+    for (; spec_ptr < spec_end; spec_ptr++) {
+      if (*spec_ptr == '\'') {
+	if (++spec_ptr < spec_end && *spec_ptr == '\'')
+	  literals += '\'';
+	else {
+	  yylval.str.len = literals.length() - yylval.str.start;
+	  return TOKEN_LITERAL;
+	}
+      }
+      else
+	literals += *spec_ptr;
+    }
+    yylval.str.len = literals.length() - yylval.str.start;
+    return TOKEN_LITERAL;
+  }
+  return c;
+}
+
+int set_label_spec(const char *label_spec)
+{
+  spec_cur = spec_ptr = label_spec;
+  spec_end = strchr(label_spec, '\0');
+  literals.clear();
+  if (yyparse())
+    return 0;
+  delete parsed_label;
+  parsed_label = parse_result;
+  return 1;
+}
+
+int set_date_label_spec(const char *label_spec)
+{
+  spec_cur = spec_ptr = label_spec;
+  spec_end = strchr(label_spec, '\0');
+  literals.clear();
+  if (yyparse())
+    return 0;
+  delete parsed_date_label;
+  parsed_date_label = parse_result;
+  return 1;
+}
+
+int set_short_label_spec(const char *label_spec)
+{
+  spec_cur = spec_ptr = label_spec;
+  spec_end = strchr(label_spec, '\0');
+  literals.clear();
+  if (yyparse())
+    return 0;
+  delete parsed_short_label;
+  parsed_short_label = parse_result;
+  return 1;
+}
+
+void yyerror(const char *message)
+{
+  if (spec_cur < spec_end)
+    command_error("label specification %1 before '%2'", message, spec_cur);
+  else
+    command_error("label specification %1 at end of string",
+		  message, spec_cur);
+}
+
+void at_expr::evaluate(int tentative, const reference &ref,
+		       string &result, substring_position &)
+{
+  if (tentative)
+    ref.canonicalize_authors(result);
+  else {
+    const char *end, *start = ref.get_authors(&end);
+    if (start)
+      result.append(start, end - start);
+  }
+}
+
+void format_expr::evaluate(int tentative, const reference &ref,
+			   string &result, substring_position &)
+{
+  if (tentative)
+    return;
+  const label_info *lp = ref.get_label_ptr();
+  int num = lp == 0 ? ref.get_number() : lp->count;
+  if (type != '0')
+    result += format_serial(type, num + 1);
+  else {
+    const char *ptr = i_to_a(num + first_number);
+    int pad = width - strlen(ptr);
+    while (--pad >= 0)
+      result += '0';
+    result += ptr;
+  }
+}
+
+static const char *format_serial(char c, int n)
+{
+  assert(n > 0);
+  static char buf[128]; // more than enough.
+  switch (c) {
+  case 'i':
+  case 'I':
+    {
+      char *p = buf;
+      // troff uses z and w to represent 10000 and 5000 in Roman
+      // numerals; I can find no historical basis for this usage
+      const char *s = c == 'i' ? "zwmdclxvi" : "ZWMDCLXVI";
+      if (n >= 40000)
+	return i_to_a(n);
+      while (n >= 10000) {
+	*p++ = s[0];
+	n -= 10000;
+      }
+      for (int i = 1000; i > 0; i /= 10, s += 2) {
+	int m = n/i;
+	n -= m*i;
+	switch (m) {
+	case 3:
+	  *p++ = s[2];
+	  /* falls through */
+	case 2:
+	  *p++ = s[2];
+	  /* falls through */
+	case 1:
+	  *p++ = s[2];
+	  break;
+	case 4:
+	  *p++ = s[2];
+	  *p++ = s[1];
+	  break;
+	case 8:
+	  *p++ = s[1];
+	  *p++ = s[2];
+	  *p++ = s[2];
+	  *p++ = s[2];
+	  break;
+	case 7:
+	  *p++ = s[1];
+	  *p++ = s[2];
+	  *p++ = s[2];
+	  break;
+	case 6:
+	  *p++ = s[1];
+	  *p++ = s[2];
+	  break;
+	case 5:
+	  *p++ = s[1];
+	  break;
+	case 9:
+	  *p++ = s[2];
+	  *p++ = s[0];
+	}
+      }
+      *p = 0;
+      break;
+    }
+  case 'a':
+  case 'A':
+    {
+      char *p = buf;
+      // this is derived from troff/reg.c
+      while (n > 0) {
+	int d = n % 26;
+	if (d == 0)
+	  d = 26;
+	n -= d;
+	n /= 26;
+	*p++ = c == 'a' ? lowercase_array[d - 1] :
+			       uppercase_array[d - 1];
+      }
+      *p-- = 0;
+      // Reverse it.
+      char *q = buf;
+      while (q < p) {
+	char temp = *q;
+	*q = *p;
+	*p = temp;
+	--p;
+	++q;
+      }
+      break;
+    }
+  default:
+    assert(0);
+  }
+  return buf;
+}
+
+void field_expr::evaluate(int, const reference &ref,
+			  string &result, substring_position &)
+{
+  const char *end;
+  const char *start = ref.get_field(name, &end);
+  if (start) {
+    start = nth_field(number, start, &end);
+    if (start)
+      result.append(start, end - start);
+  }
+}
+
+void literal_expr::evaluate(int, const reference &,
+			    string &result, substring_position &)
+{
+  result += s;
+}
+
+analyzed_expr::analyzed_expr(expression *e)
+: unary_expr(e), flags(e ? e->analyze() : 0)
+{
+}
+
+void analyzed_expr::evaluate(int tentative, const reference &ref,
+			     string &result, substring_position &pos)
+{
+  if (expr)
+    expr->evaluate(tentative, ref, result, pos);
+}
+
+void star_expr::evaluate(int tentative, const reference &ref,
+			 string &result, substring_position &pos)
+{
+  const label_info *lp = ref.get_label_ptr();
+  if (!tentative
+      && (lp == 0 || lp->total > 1)
+      && expr)
+    expr->evaluate(tentative, ref, result, pos);
+}
+
+void separator_expr::evaluate(int tentative, const reference &ref,
+			      string &result, substring_position &pos)
+{
+  int start_length = result.length();
+  int is_first = pos.start < 0;
+  if (expr)
+    expr->evaluate(tentative, ref, result, pos);
+  if (is_first) {
+    pos.start = start_length;
+    pos.length = result.length() - start_length;
+  }
+}
+
+void map_expr::evaluate(int tentative, const reference &ref,
+			string &result, substring_position &)
+{
+  if (expr) {
+    string temp;
+    substring_position temp_pos;
+    expr->evaluate(tentative, ref, temp, temp_pos);
+    (*func)(temp.contents(), temp.contents() + temp.length(), result);
+  }
+}
+
+void extractor_expr::evaluate(int tentative, const reference &ref,
+			      string &result, substring_position &)
+{
+  if (expr) {
+    string temp;
+    substring_position temp_pos;
+    expr->evaluate(tentative, ref, temp, temp_pos);
+    const char *end, *start = (*func)(temp.contents(),
+				      temp.contents() + temp.length(),
+				      &end);
+    switch (part) {
+    case BEFORE:
+      if (start)
+	result.append(temp.contents(), start - temp.contents());
+      else
+	result += temp;
+      break;
+    case MATCH:
+      if (start)
+	result.append(start, end - start);
+      break;
+    case AFTER:
+      if (start)
+	result.append(end, temp.contents() + temp.length() - end);
+      break;
+    default:
+      assert(0);
+    }
+  }
+}
+
+static void first_part(int len, const char *ptr, const char *end,
+			  string &result)
+{
+  for (;;) {
+    const char *token_start = ptr;
+    if (!get_token(&ptr, end))
+      break;
+    const token_info *ti = lookup_token(token_start, ptr);
+    int counts = ti->sortify_non_empty(token_start, ptr);
+    if (counts && --len < 0)
+      break;
+    if (counts || ti->is_accent())
+      result.append(token_start, ptr - token_start);
+  }
+}
+
+static void last_part(int len, const char *ptr, const char *end,
+		      string &result)
+{
+  const char *start = ptr;
+  int count = 0;
+  for (;;) {
+    const char *token_start = ptr;
+    if (!get_token(&ptr, end))
+      break;
+    const token_info *ti = lookup_token(token_start, ptr);
+    if (ti->sortify_non_empty(token_start, ptr))
+      count++;
+  }
+  ptr = start;
+  int skip = count - len;
+  if (skip > 0) {
+    for (;;) {
+      const char *token_start = ptr;
+      if (!get_token(&ptr, end))
+	assert(0);
+      const token_info *ti = lookup_token(token_start, ptr);
+      if (ti->sortify_non_empty(token_start, ptr) && --skip < 0) {
+	ptr = token_start;
+	break;
+      }
+    }
+  }
+  first_part(len, ptr, end, result);
+}
+
+void truncate_expr::evaluate(int tentative, const reference &ref,
+			     string &result, substring_position &)
+{
+  if (expr) {
+    string temp;
+    substring_position temp_pos;
+    expr->evaluate(tentative, ref, temp, temp_pos);
+    const char *start = temp.contents();
+    const char *end = start + temp.length();
+    if (n > 0)
+      first_part(n, start, end, result);
+    else if (n < 0)
+      last_part(-n, start, end, result);
+  }
+}
+
+void alternative_expr::evaluate(int tentative, const reference &ref,
+				string &result, substring_position &pos)
+{
+  int start_length = result.length();
+  if (expr1)
+    expr1->evaluate(tentative, ref, result, pos);
+  if (result.length() == start_length && expr2)
+    expr2->evaluate(tentative, ref, result, pos);
+}
+
+void list_expr::evaluate(int tentative, const reference &ref,
+			 string &result, substring_position &pos)
+{
+  if (expr1)
+    expr1->evaluate(tentative, ref, result, pos);
+  if (expr2)
+    expr2->evaluate(tentative, ref, result, pos);
+}
+
+void substitute_expr::evaluate(int tentative, const reference &ref,
+			       string &result, substring_position &pos)
+{
+  int start_length = result.length();
+  if (expr1)
+    expr1->evaluate(tentative, ref, result, pos);
+  if (result.length() > start_length && result[result.length() - 1] == '-') {
+    // ought to see if pos covers the -
+    result.set_length(result.length() - 1);
+    if (expr2)
+      expr2->evaluate(tentative, ref, result, pos);
+  }
+}
+
+void conditional_expr::evaluate(int tentative, const reference &ref,
+				string &result, substring_position &pos)
+{
+  string temp;
+  substring_position temp_pos;
+  if (expr1)
+    expr1->evaluate(tentative, ref, temp, temp_pos);
+  if (temp.length() > 0) {
+    if (expr2)
+      expr2->evaluate(tentative, ref, result, pos);
+  }
+  else {
+    if (expr3)
+      expr3->evaluate(tentative, ref, result, pos);
+  }
+}
+
+void reference::pre_compute_label()
+{
+  if (parsed_label != 0
+      && (parsed_label->analyze() & expression::CONTAINS_VARIABLE)) {
+    label.clear();
+    substring_position temp_pos;
+    parsed_label->evaluate(1, *this, label, temp_pos);
+    label_ptr = lookup_label(label);
+  }
+}
+
+void reference::compute_label()
+{
+  label.clear();
+  if (parsed_label)
+    parsed_label->evaluate(0, *this, label, separator_pos);
+  if (short_label_flag && parsed_short_label)
+    parsed_short_label->evaluate(0, *this, short_label, short_separator_pos);
+  if (date_as_label) {
+    string new_date;
+    if (parsed_date_label) {
+      substring_position temp_pos;
+      parsed_date_label->evaluate(0, *this, new_date, temp_pos);
+    }
+    set_date(new_date);
+  }
+  if (label_ptr)
+    label_ptr->count += 1;
+}
+
+void reference::immediate_compute_label()
+{
+  if (label_ptr)
+    label_ptr->total = 2;	// force use of disambiguator
+  compute_label();
+}
+
+int reference::merge_labels(reference **v, int n, label_type type,
+			    string &result)
+{
+  if (abbreviate_label_ranges)
+    return merge_labels_by_number(v, n, type, result);
+  else
+    return merge_labels_by_parts(v, n, type, result);
+}
+
+int reference::merge_labels_by_number(reference **v, int n, label_type type,
+				      string &result)
+{
+  if (n <= 1)
+    return 0;
+  int num = get_number();
+  // Only merge three or more labels.
+  if (v[0]->get_number() != num + 1
+      || v[1]->get_number() != num + 2)
+    return 0;
+  int i;
+  for (i = 2; i < n; i++)
+    if (v[i]->get_number() != num + i + 1)
+      break;
+  result = get_label(type);
+  result += label_range_indicator;
+  result += v[i - 1]->get_label(type);
+  return i;
+}
+
+const substring_position &reference::get_separator_pos(label_type type) const
+{
+  if (type == SHORT_LABEL && short_label_flag)
+    return short_separator_pos;
+  else
+    return separator_pos;
+}
+
+const string &reference::get_label(label_type type) const
+{
+  if (type == SHORT_LABEL && short_label_flag)
+    return short_label; 
+  else
+    return label;
+}
+
+int reference::merge_labels_by_parts(reference **v, int n, label_type type,
+				     string &result)
+{
+  if (n <= 0)
+    return 0;
+  const string &lb = get_label(type);
+  const substring_position &sp = get_separator_pos(type);
+  if (sp.start < 0
+      || sp.start != v[0]->get_separator_pos(type).start 
+      || memcmp(lb.contents(), v[0]->get_label(type).contents(),
+		sp.start) != 0)
+    return 0;
+  result = lb;
+  int i = 0;
+  do {
+    result += separate_label_second_parts;
+    const substring_position &s = v[i]->get_separator_pos(type);
+    int sep_end_pos = s.start + s.length;
+    result.append(v[i]->get_label(type).contents() + sep_end_pos,
+		  v[i]->get_label(type).length() - sep_end_pos);
+  } while (++i < n
+	   && sp.start == v[i]->get_separator_pos(type).start
+	   && memcmp(lb.contents(), v[i]->get_label(type).contents(),
+		     sp.start) == 0);
+  return i;
+}
+
+string label_pool;
+
+label_info::label_info(const string &s)
+: start(label_pool.length()), length(s.length()), count(0), total(1)
+{
+  label_pool += s;
+}
+
+static label_info **label_table = 0;
+static int label_table_size = 0;
+static int label_table_used = 0;
+
+label_info *lookup_label(const string &label)
+{
+  if (label_table == 0) {
+    label_table = new label_info *[17];
+    label_table_size = 17;
+    for (int i = 0; i < 17; i++)
+      label_table[i] = 0;
+  }
+  unsigned h = hash_string(label.contents(), label.length()) % label_table_size;
+  label_info **ptr;
+  for (ptr = label_table + h;
+       *ptr != 0;
+       (ptr == label_table)
+       ? (ptr = label_table + label_table_size - 1)
+       : ptr--)
+    if ((*ptr)->length == label.length()
+	&& memcmp(label_pool.contents() + (*ptr)->start, label.contents(),
+		  label.length()) == 0) {
+      (*ptr)->total += 1;
+      return *ptr;
+    }
+  label_info *result = *ptr = new label_info(label);
+  if (++label_table_used * 2 > label_table_size) {
+    // Rehash the table.
+    label_info **old_table = label_table;
+    int old_size = label_table_size;
+    label_table_size = next_size(label_table_size);
+    label_table = new label_info *[label_table_size];
+    int i;
+    for (i = 0; i < label_table_size; i++)
+      label_table[i] = 0;
+    for (i = 0; i < old_size; i++)
+      if (old_table[i]) {
+	h = hash_string(label_pool.contents() + old_table[i]->start,
+			old_table[i]->length);
+	label_info **p;
+	for (p = label_table + (h % label_table_size);
+	     *p != 0;
+	     (p == label_table)
+	     ? (p = label_table + label_table_size - 1)
+	     : --p)
+	    ;
+	*p = old_table[i];
+	}
+    delete[] old_table;
+  }
+  return result;
+}
+
+void clear_labels()
+{
+  for (int i = 0; i < label_table_size; i++) {
+    delete label_table[i];
+    label_table[i] = 0;
+  }
+  label_table_used = 0;
+  label_pool.clear();
+}
+
+static void consider_authors(reference **start, reference **end, int i);
+
+void compute_labels(reference **v, int n)
+{
+  if (parsed_label
+      && (parsed_label->analyze() & expression::CONTAINS_AT)
+      && sort_fields.length() >= 2
+      && sort_fields[0] == 'A'
+      && sort_fields[1] == '+')
+    consider_authors(v, v + n, 0);
+  for (int i = 0; i < n; i++)
+    v[i]->compute_label();
+}
+
+
+/* A reference with a list of authors <A0,A1,...,AN> _needs_ author i
+where 0 <= i <= N if there exists a reference with a list of authors
+<B0,B1,...,BM> such that <A0,A1,...,AN> != <B0,B1,...,BM> and M >= i
+and Aj = Bj for 0 <= j < i. In this case if we can't say "A0,
+A1,...,A(i-1) et al" because this would match both <A0,A1,...,AN> and
+<B0,B1,...,BM>.  If a reference needs author i we only have to call
+need_author(j) for some j >= i such that the reference also needs
+author j. */
+
+/* This function handles 2 tasks:
+determine which authors are needed (cannot be elided with et al.);
+determine which authors can have only last names in the labels.
+
+References >= start and < end have the same first i author names.
+Also they're sorted by A+. */
+
+static void consider_authors(reference **start, reference **end, int i)
+{
+  if (start >= end)
+    return;
+  reference **p = start;
+  if (i >= (*p)->get_nauthors()) {
+    for (++p; p < end && i >= (*p)->get_nauthors(); p++)
+      ;
+    if (p < end && i > 0) {
+      // If we have an author list <A B C> and an author list <A B C D>,
+      // then both lists need C.
+      for (reference **q = start; q < end; q++)
+	(*q)->need_author(i - 1);
+    }
+    start = p;
+  }
+  while (p < end) {
+    reference **last_name_start = p;
+    reference **name_start = p;
+    for (++p;
+	 p < end && i < (*p)->get_nauthors()
+	 && same_author_last_name(**last_name_start, **p, i);
+	 p++) {
+      if (!same_author_name(**name_start, **p, i)) {
+	consider_authors(name_start, p, i + 1);
+	name_start = p;
+      }
+    }
+    consider_authors(name_start, p, i + 1);
+    if (last_name_start == name_start) {
+      for (reference **q = last_name_start; q < p; q++)
+	(*q)->set_last_name_unambiguous(i);
+    }
+    // If we have an author list <A B C D> and <A B C E>, then the lists
+    // need author D and E respectively.
+    if (name_start > start || p < end) {
+      for (reference **q = last_name_start; q < p; q++)
+	(*q)->need_author(i);
+    }
+  }
+}
+
+int same_author_last_name(const reference &r1, const reference &r2, int n)
+{
+  const char *ae1;
+  const char *as1 = r1.get_sort_field(0, n, 0, &ae1);
+  const char *ae2;
+  const char *as2 = r2.get_sort_field(0, n, 0, &ae2);
+  if (!as1 && !as2) return 1;	// they are the same
+  if (!as1 || !as2) return 0;
+  return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
+}
+
+int same_author_name(const reference &r1, const reference &r2, int n)
+{
+  const char *ae1;
+  const char *as1 = r1.get_sort_field(0, n, -1, &ae1);
+  const char *ae2;
+  const char *as2 = r2.get_sort_field(0, n, -1, &ae2);
+  if (!as1 && !as2) return 1;	// they are the same
+  if (!as1 || !as2) return 0;
+  return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
+}
+
+
+void int_set::set(int i)
+{
+  assert(i >= 0);
+  int bytei = i >> 3;
+  if (bytei >= v.length()) {
+    int old_length = v.length();
+    v.set_length(bytei + 1);
+    for (int j = old_length; j <= bytei; j++)
+      v[j] = 0;
+  }
+  v[bytei] |= 1 << (i & 7);
+}
+
+int int_set::get(int i) const
+{
+  assert(i >= 0);
+  int bytei = i >> 3;
+  return bytei >= v.length() ? 0 : (v[bytei] & (1 << (i & 7))) != 0;
+}
+
+void reference::set_last_name_unambiguous(int i)
+{
+  last_name_unambiguous.set(i);
+}
+
+void reference::need_author(int n)
+{
+  if (n > last_needed_author)
+    last_needed_author = n;
+}
+
+const char *reference::get_authors(const char **end) const
+{
+  if (!computed_authors) {
+    ((reference *)this)->computed_authors = 1;
+    string &result = ((reference *)this)->authors;
+    int na = get_nauthors();
+    result.clear();
+    for (int i = 0; i < na; i++) {
+      if (last_name_unambiguous.get(i)) {
+	const char *e, *start = get_author_last_name(i, &e);
+	assert(start != 0);
+	result.append(start, e - start);
+      }
+      else {
+	const char *e, *start = get_author(i, &e);
+	assert(start != 0);
+	result.append(start, e - start);
+      }
+      if (i == last_needed_author
+	  && et_al.length() > 0
+	  && et_al_min_elide > 0
+	  && last_needed_author + et_al_min_elide < na
+	  && na >= et_al_min_total) {
+	result += et_al;
+	break;
+      }
+      if (i < na - 1) {
+	if (na == 2)
+	  result += join_authors_exactly_two;
+	else if (i < na - 2)
+	  result += join_authors_default;
+	else
+	  result += join_authors_last_two;
+      }
+    }
+  }
+  const char *start = authors.contents();
+  *end = start + authors.length();
+  return start;
+}
+
+int reference::get_nauthors() const
+{
+  if (nauthors < 0) {
+    const char *dummy;
+    int na;
+    for (na = 0; get_author(na, &dummy) != 0; na++)
+      ;
+    ((reference *)this)->nauthors = na;
+  }
+  return nauthors;
+}
+
+// Local Variables:
+// fill-column: 72
+// mode: C++
+// End:
+// vim: set cindent noexpandtab shiftwidth=2 textwidth=72:
diff --git a/src/preproc/refer/label.hpp b/src/preproc/refer/label.hpp
new file mode 100644
index 0000000..9f79fd2
--- /dev/null
+++ b/src/preproc/refer/label.hpp
@@ -0,0 +1,98 @@
+/* A Bison parser, made by GNU Bison 3.8.2.  */
+
+/* Bison interface for Yacc-like parsers in C
+
+   Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2021 Free Software Foundation,
+   Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* As a special exception, you may create a larger work that contains
+   part or all of the Bison parser skeleton and distribute that work
+   under terms of your choice, so long as that work isn't itself a
+   parser generator using the skeleton or a modified version thereof
+   as a parser skeleton.  Alternatively, if you modify or redistribute
+   the parser skeleton itself, you may (at your option) remove this
+   special exception, which will cause the skeleton and the resulting
+   Bison output files to be licensed under the GNU General Public
+   License without this special exception.
+
+   This special exception was added by the Free Software Foundation in
+   version 2.2 of Bison.  */
+
+/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual,
+   especially those whose name start with YY_ or yy_.  They are
+   private implementation details that can be changed or removed.  */
+
+#ifndef YY_YY_SRC_PREPROC_REFER_LABEL_HPP_INCLUDED
+# define YY_YY_SRC_PREPROC_REFER_LABEL_HPP_INCLUDED
+/* Debug traces.  */
+#ifndef YYDEBUG
+# define YYDEBUG 0
+#endif
+#if YYDEBUG
+extern int yydebug;
+#endif
+
+/* Token kinds.  */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+  enum yytokentype
+  {
+    YYEMPTY = -2,
+    YYEOF = 0,                     /* "end of file"  */
+    YYerror = 256,                 /* error  */
+    YYUNDEF = 257,                 /* "invalid token"  */
+    TOKEN_LETTER = 258,            /* TOKEN_LETTER  */
+    TOKEN_LITERAL = 259,           /* TOKEN_LITERAL  */
+    TOKEN_DIGIT = 260              /* TOKEN_DIGIT  */
+  };
+  typedef enum yytokentype yytoken_kind_t;
+#endif
+/* Token kinds.  */
+#define YYEMPTY -2
+#define YYEOF 0
+#define YYerror 256
+#define YYUNDEF 257
+#define TOKEN_LETTER 258
+#define TOKEN_LITERAL 259
+#define TOKEN_DIGIT 260
+
+/* Value type.  */
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+union YYSTYPE
+{
+#line 218 "../src/preproc/refer/label.ypp"
+
+  int num;
+  expression *expr;
+  struct { int ndigits; int val; } dig;
+  struct { int start; int len; } str;
+
+#line 84 "src/preproc/refer/label.hpp"
+
+};
+typedef union YYSTYPE YYSTYPE;
+# define YYSTYPE_IS_TRIVIAL 1
+# define YYSTYPE_IS_DECLARED 1
+#endif
+
+
+extern YYSTYPE yylval;
+
+
+int yyparse (void);
+
+
+#endif /* !YY_YY_SRC_PREPROC_REFER_LABEL_HPP_INCLUDED  */
diff --git a/src/preproc/refer/label.ypp b/src/preproc/refer/label.ypp
new file mode 100644
index 0000000..f5210d5
--- /dev/null
+++ b/src/preproc/refer/label.ypp
@@ -0,0 +1,1195 @@
+/* Copyright (C) 1989-2020 Free Software Foundation, Inc.
+     Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+%{
+
+#include "refer.h"
+#include "refid.h"
+#include "ref.h"
+#include "token.h"
+
+int yylex();
+void yyerror(const char *);
+
+static const char *format_serial(char c, int n);
+
+struct label_info {
+  int start;
+  int length;
+  int count;
+  int total;
+  label_info(const string &);
+};
+
+label_info *lookup_label(const string &label);
+
+struct expression {
+  enum {
+    // Does the tentative label depend on the reference?
+    CONTAINS_VARIABLE = 01, 
+    CONTAINS_STAR = 02,
+    CONTAINS_FORMAT = 04,
+    CONTAINS_AT = 010
+  };
+  virtual ~expression() { }
+  virtual void evaluate(int, const reference &, string &,
+			substring_position &) = 0;
+  virtual unsigned analyze() { return 0; }
+};
+
+class at_expr : public expression {
+public:
+  at_expr() { }
+  void evaluate(int, const reference &, string &, substring_position &);
+  unsigned analyze() { return CONTAINS_VARIABLE|CONTAINS_AT; }
+};
+
+class format_expr : public expression {
+  char type;
+  int width;
+  int first_number;
+public:
+  format_expr(char c, int w = 0, int f = 1)
+    : type(c), width(w), first_number(f) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+  unsigned analyze() { return CONTAINS_FORMAT; }
+};
+
+class field_expr : public expression {
+  int number;
+  char name;
+public:
+  field_expr(char nm, int num) : number(num), name(nm) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+  unsigned analyze() { return CONTAINS_VARIABLE; }
+};
+
+class literal_expr : public expression {
+  string s;
+public:
+  literal_expr(const char *ptr, int len) : s(ptr, len) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class unary_expr : public expression {
+protected:
+  expression *expr;
+public:
+  unary_expr(expression *e) : expr(e) { }
+  ~unary_expr() { delete expr; }
+  void evaluate(int, const reference &, string &, substring_position &) = 0;
+  unsigned analyze() { return expr ? expr->analyze() : 0; }
+};
+
+// This caches the analysis of an expression.
+
+class analyzed_expr : public unary_expr {
+  unsigned flags;
+public:
+  analyzed_expr(expression *);
+  void evaluate(int, const reference &, string &, substring_position &);
+  unsigned analyze() { return flags; }
+};
+
+class star_expr : public unary_expr {
+public:
+  star_expr(expression *e) : unary_expr(e) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+  unsigned analyze() {
+    return ((expr ? (expr->analyze() & ~CONTAINS_VARIABLE) : 0)
+	    | CONTAINS_STAR);
+  }
+};
+
+typedef void map_func(const char *, const char *, string &);
+
+class map_expr : public unary_expr {
+  map_func *func;
+public:
+  map_expr(expression *e, map_func *f) : unary_expr(e), func(f) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+};
+  
+typedef const char *extractor_func(const char *, const char *, const char **);
+
+class extractor_expr : public unary_expr {
+  int part;
+  extractor_func *func;
+public:
+  enum { BEFORE = +1, MATCH = 0, AFTER = -1 };
+  extractor_expr(expression *e, extractor_func *f, int pt)
+    : unary_expr(e), part(pt), func(f) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class truncate_expr : public unary_expr {
+  int n;
+public:
+  truncate_expr(expression *e, int i) : unary_expr(e), n(i) { } 
+  void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class separator_expr : public unary_expr {
+public:
+  separator_expr(expression *e) : unary_expr(e) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class binary_expr : public expression {
+protected:
+  expression *expr1;
+  expression *expr2;
+public:
+  binary_expr(expression *e1, expression *e2) : expr1(e1), expr2(e2) { }
+  ~binary_expr() { delete expr1; delete expr2; }
+  void evaluate(int, const reference &, string &, substring_position &) = 0;
+  unsigned analyze() {
+    return (expr1 ? expr1->analyze() : 0) | (expr2 ? expr2->analyze() : 0);
+  }
+};
+
+class alternative_expr : public binary_expr {
+public:
+  alternative_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class list_expr : public binary_expr {
+public:
+  list_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class substitute_expr : public binary_expr {
+public:
+  substitute_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+};
+
+class ternary_expr : public expression {
+protected:
+  expression *expr1;
+  expression *expr2;
+  expression *expr3;
+public:
+  ternary_expr(expression *e1, expression *e2, expression *e3)
+    : expr1(e1), expr2(e2), expr3(e3) { }
+  ~ternary_expr() { delete expr1; delete expr2; delete expr3; }
+  void evaluate(int, const reference &, string &, substring_position &) = 0;
+  unsigned analyze() {
+    return ((expr1 ? expr1->analyze() : 0)
+	    | (expr2 ? expr2->analyze() : 0)
+	    | (expr3 ? expr3->analyze() : 0));
+  }
+};
+
+class conditional_expr : public ternary_expr {
+public:
+  conditional_expr(expression *e1, expression *e2, expression *e3)
+    : ternary_expr(e1, e2, e3) { }
+  void evaluate(int, const reference &, string &, substring_position &);
+};
+
+static expression *parsed_label = 0;
+static expression *parsed_date_label = 0;
+static expression *parsed_short_label = 0;
+
+static expression *parse_result;
+
+string literals;
+
+%}
+
+%union {
+  int num;
+  expression *expr;
+  struct { int ndigits; int val; } dig;
+  struct { int start; int len; } str;
+}
+
+/* uppercase or lowercase letter */
+%token <num> TOKEN_LETTER
+/* literal characters */
+%token <str> TOKEN_LITERAL
+/* digit */
+%token <num> TOKEN_DIGIT
+
+%type <expr> conditional
+%type <expr> alternative
+%type <expr> list
+%type <expr> string
+%type <expr> substitute
+%type <expr> optional_conditional
+%type <num> number
+%type <dig> digits
+%type <num> optional_number
+%type <num> flag
+
+%%
+
+expr:
+	optional_conditional
+		{ parse_result = ($1 ? new analyzed_expr($1) : 0); }
+	;
+
+conditional:
+	alternative
+		{ $$ = $1; }
+	| alternative '?' optional_conditional ':' conditional
+		{ $$ = new conditional_expr($1, $3, $5); }
+	;
+
+optional_conditional:
+	/* empty */
+		{ $$ = 0; }
+	| conditional
+		{ $$ = $1; }
+	;
+
+alternative:
+	list
+		{ $$ = $1; }
+	| alternative '|' list
+		{ $$ = new alternative_expr($1, $3); }
+	| alternative '&' list
+		{ $$ = new conditional_expr($1, $3, 0); }
+	;	
+
+list:
+	substitute
+		{ $$ = $1; }
+	| list substitute
+		{ $$ = new list_expr($1, $2); }
+	;
+
+substitute:
+	string
+		{ $$ = $1; }
+	| substitute '~' string
+		{ $$ = new substitute_expr($1, $3); }
+	;
+
+string:
+	'@'
+		{ $$ = new at_expr; }
+	| TOKEN_LITERAL
+		{
+		  $$ = new literal_expr(literals.contents() + $1.start,
+					$1.len);
+		}
+	| TOKEN_LETTER
+		{ $$ = new field_expr($1, 0); }
+	| TOKEN_LETTER number
+		{ $$ = new field_expr($1, $2 - 1); }
+	| '%' TOKEN_LETTER
+		{
+		  switch ($2) {
+		  case 'I':
+		  case 'i':
+		  case 'A':
+		  case 'a':
+		    $$ = new format_expr($2);
+		    break;
+		  default:
+		    command_error("unrecognized format '%1'", char($2));
+		    $$ = new format_expr('a');
+		    break;
+		  }
+		}
+	
+	| '%' digits
+		{
+		  $$ = new format_expr('0', $2.ndigits, $2.val);
+		}
+	| string '.' flag TOKEN_LETTER optional_number
+		{
+		  switch ($4) {
+		  case 'l':
+		    $$ = new map_expr($1, lowercase);
+		    break;
+		  case 'u':
+		    $$ = new map_expr($1, uppercase);
+		    break;
+		  case 'c':
+		    $$ = new map_expr($1, capitalize);
+		    break;
+		  case 'r':
+		    $$ = new map_expr($1, reverse_name);
+		    break;
+		  case 'a':
+		    $$ = new map_expr($1, abbreviate_name);
+		    break;
+		  case 'y':
+		    $$ = new extractor_expr($1, find_year, $3);
+		    break;
+		  case 'n':
+		    $$ = new extractor_expr($1, find_last_name, $3);
+		    break;
+		  default:
+		    $$ = $1;
+		    command_error("unknown function '%1'", char($4));
+		    break;
+		  }
+		}
+
+	| string '+' number
+		{ $$ = new truncate_expr($1, $3); }
+	| string '-' number
+		{ $$ = new truncate_expr($1, -$3); }
+	| string '*'
+		{ $$ = new star_expr($1); }
+	| '(' optional_conditional ')'
+		{ $$ = $2; }
+	| '<' optional_conditional '>'
+		{ $$ = new separator_expr($2); }
+	;
+
+optional_number:
+	/* empty */
+		{ $$ = -1; }
+	| number
+		{ $$ = $1; }
+	;
+
+number:
+	TOKEN_DIGIT
+		{ $$ = $1; }
+	| number TOKEN_DIGIT
+		{ $$ = $1*10 + $2; }
+	;
+
+digits:
+	TOKEN_DIGIT
+		{ $$.ndigits = 1; $$.val = $1; }
+	| digits TOKEN_DIGIT
+		{ $$.ndigits = $1.ndigits + 1; $$.val = $1.val*10 + $2; }
+	;
+	
+      
+flag:
+	/* empty */
+		{ $$ = 0; }
+	| '+'
+		{ $$ = 1; }
+	| '-'
+		{ $$ = -1; }
+	;
+
+%%
+
+/* bison defines const to be empty unless __STDC__ is defined, which it
+isn't under cfront */
+
+#ifdef const
+#undef const
+#endif
+
+const char *spec_ptr;
+const char *spec_end;
+const char *spec_cur;
+
+static char uppercase_array[] = {
+  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
+  'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
+  'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
+  'Y', 'Z',
+};
+  
+static char lowercase_array[] = {
+  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
+  'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
+  'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
+  'y', 'z',
+};
+
+int yylex()
+{
+  while (spec_ptr < spec_end && csspace(*spec_ptr))
+    spec_ptr++;
+  spec_cur = spec_ptr;
+  if (spec_ptr >= spec_end)
+    return 0;
+  unsigned char c = *spec_ptr++;
+  if (csalpha(c)) {
+    yylval.num = c;
+    return TOKEN_LETTER;
+  }
+  if (csdigit(c)) {
+    yylval.num = c - '0';
+    return TOKEN_DIGIT;
+  }
+  if (c == '\'') {
+    yylval.str.start = literals.length();
+    for (; spec_ptr < spec_end; spec_ptr++) {
+      if (*spec_ptr == '\'') {
+	if (++spec_ptr < spec_end && *spec_ptr == '\'')
+	  literals += '\'';
+	else {
+	  yylval.str.len = literals.length() - yylval.str.start;
+	  return TOKEN_LITERAL;
+	}
+      }
+      else
+	literals += *spec_ptr;
+    }
+    yylval.str.len = literals.length() - yylval.str.start;
+    return TOKEN_LITERAL;
+  }
+  return c;
+}
+
+int set_label_spec(const char *label_spec)
+{
+  spec_cur = spec_ptr = label_spec;
+  spec_end = strchr(label_spec, '\0');
+  literals.clear();
+  if (yyparse())
+    return 0;
+  delete parsed_label;
+  parsed_label = parse_result;
+  return 1;
+}
+
+int set_date_label_spec(const char *label_spec)
+{
+  spec_cur = spec_ptr = label_spec;
+  spec_end = strchr(label_spec, '\0');
+  literals.clear();
+  if (yyparse())
+    return 0;
+  delete parsed_date_label;
+  parsed_date_label = parse_result;
+  return 1;
+}
+
+int set_short_label_spec(const char *label_spec)
+{
+  spec_cur = spec_ptr = label_spec;
+  spec_end = strchr(label_spec, '\0');
+  literals.clear();
+  if (yyparse())
+    return 0;
+  delete parsed_short_label;
+  parsed_short_label = parse_result;
+  return 1;
+}
+
+void yyerror(const char *message)
+{
+  if (spec_cur < spec_end)
+    command_error("label specification %1 before '%2'", message, spec_cur);
+  else
+    command_error("label specification %1 at end of string",
+		  message, spec_cur);
+}
+
+void at_expr::evaluate(int tentative, const reference &ref,
+		       string &result, substring_position &)
+{
+  if (tentative)
+    ref.canonicalize_authors(result);
+  else {
+    const char *end, *start = ref.get_authors(&end);
+    if (start)
+      result.append(start, end - start);
+  }
+}
+
+void format_expr::evaluate(int tentative, const reference &ref,
+			   string &result, substring_position &)
+{
+  if (tentative)
+    return;
+  const label_info *lp = ref.get_label_ptr();
+  int num = lp == 0 ? ref.get_number() : lp->count;
+  if (type != '0')
+    result += format_serial(type, num + 1);
+  else {
+    const char *ptr = i_to_a(num + first_number);
+    int pad = width - strlen(ptr);
+    while (--pad >= 0)
+      result += '0';
+    result += ptr;
+  }
+}
+
+static const char *format_serial(char c, int n)
+{
+  assert(n > 0);
+  static char buf[128]; // more than enough.
+  switch (c) {
+  case 'i':
+  case 'I':
+    {
+      char *p = buf;
+      // troff uses z and w to represent 10000 and 5000 in Roman
+      // numerals; I can find no historical basis for this usage
+      const char *s = c == 'i' ? "zwmdclxvi" : "ZWMDCLXVI";
+      if (n >= 40000)
+	return i_to_a(n);
+      while (n >= 10000) {
+	*p++ = s[0];
+	n -= 10000;
+      }
+      for (int i = 1000; i > 0; i /= 10, s += 2) {
+	int m = n/i;
+	n -= m*i;
+	switch (m) {
+	case 3:
+	  *p++ = s[2];
+	  /* falls through */
+	case 2:
+	  *p++ = s[2];
+	  /* falls through */
+	case 1:
+	  *p++ = s[2];
+	  break;
+	case 4:
+	  *p++ = s[2];
+	  *p++ = s[1];
+	  break;
+	case 8:
+	  *p++ = s[1];
+	  *p++ = s[2];
+	  *p++ = s[2];
+	  *p++ = s[2];
+	  break;
+	case 7:
+	  *p++ = s[1];
+	  *p++ = s[2];
+	  *p++ = s[2];
+	  break;
+	case 6:
+	  *p++ = s[1];
+	  *p++ = s[2];
+	  break;
+	case 5:
+	  *p++ = s[1];
+	  break;
+	case 9:
+	  *p++ = s[2];
+	  *p++ = s[0];
+	}
+      }
+      *p = 0;
+      break;
+    }
+  case 'a':
+  case 'A':
+    {
+      char *p = buf;
+      // this is derived from troff/reg.c
+      while (n > 0) {
+	int d = n % 26;
+	if (d == 0)
+	  d = 26;
+	n -= d;
+	n /= 26;
+	*p++ = c == 'a' ? lowercase_array[d - 1] :
+			       uppercase_array[d - 1];
+      }
+      *p-- = 0;
+      // Reverse it.
+      char *q = buf;
+      while (q < p) {
+	char temp = *q;
+	*q = *p;
+	*p = temp;
+	--p;
+	++q;
+      }
+      break;
+    }
+  default:
+    assert(0);
+  }
+  return buf;
+}
+
+void field_expr::evaluate(int, const reference &ref,
+			  string &result, substring_position &)
+{
+  const char *end;
+  const char *start = ref.get_field(name, &end);
+  if (start) {
+    start = nth_field(number, start, &end);
+    if (start)
+      result.append(start, end - start);
+  }
+}
+
+void literal_expr::evaluate(int, const reference &,
+			    string &result, substring_position &)
+{
+  result += s;
+}
+
+analyzed_expr::analyzed_expr(expression *e)
+: unary_expr(e), flags(e ? e->analyze() : 0)
+{
+}
+
+void analyzed_expr::evaluate(int tentative, const reference &ref,
+			     string &result, substring_position &pos)
+{
+  if (expr)
+    expr->evaluate(tentative, ref, result, pos);
+}
+
+void star_expr::evaluate(int tentative, const reference &ref,
+			 string &result, substring_position &pos)
+{
+  const label_info *lp = ref.get_label_ptr();
+  if (!tentative
+      && (lp == 0 || lp->total > 1)
+      && expr)
+    expr->evaluate(tentative, ref, result, pos);
+}
+
+void separator_expr::evaluate(int tentative, const reference &ref,
+			      string &result, substring_position &pos)
+{
+  int start_length = result.length();
+  int is_first = pos.start < 0;
+  if (expr)
+    expr->evaluate(tentative, ref, result, pos);
+  if (is_first) {
+    pos.start = start_length;
+    pos.length = result.length() - start_length;
+  }
+}
+
+void map_expr::evaluate(int tentative, const reference &ref,
+			string &result, substring_position &)
+{
+  if (expr) {
+    string temp;
+    substring_position temp_pos;
+    expr->evaluate(tentative, ref, temp, temp_pos);
+    (*func)(temp.contents(), temp.contents() + temp.length(), result);
+  }
+}
+
+void extractor_expr::evaluate(int tentative, const reference &ref,
+			      string &result, substring_position &)
+{
+  if (expr) {
+    string temp;
+    substring_position temp_pos;
+    expr->evaluate(tentative, ref, temp, temp_pos);
+    const char *end, *start = (*func)(temp.contents(),
+				      temp.contents() + temp.length(),
+				      &end);
+    switch (part) {
+    case BEFORE:
+      if (start)
+	result.append(temp.contents(), start - temp.contents());
+      else
+	result += temp;
+      break;
+    case MATCH:
+      if (start)
+	result.append(start, end - start);
+      break;
+    case AFTER:
+      if (start)
+	result.append(end, temp.contents() + temp.length() - end);
+      break;
+    default:
+      assert(0);
+    }
+  }
+}
+
+static void first_part(int len, const char *ptr, const char *end,
+			  string &result)
+{
+  for (;;) {
+    const char *token_start = ptr;
+    if (!get_token(&ptr, end))
+      break;
+    const token_info *ti = lookup_token(token_start, ptr);
+    int counts = ti->sortify_non_empty(token_start, ptr);
+    if (counts && --len < 0)
+      break;
+    if (counts || ti->is_accent())
+      result.append(token_start, ptr - token_start);
+  }
+}
+
+static void last_part(int len, const char *ptr, const char *end,
+		      string &result)
+{
+  const char *start = ptr;
+  int count = 0;
+  for (;;) {
+    const char *token_start = ptr;
+    if (!get_token(&ptr, end))
+      break;
+    const token_info *ti = lookup_token(token_start, ptr);
+    if (ti->sortify_non_empty(token_start, ptr))
+      count++;
+  }
+  ptr = start;
+  int skip = count - len;
+  if (skip > 0) {
+    for (;;) {
+      const char *token_start = ptr;
+      if (!get_token(&ptr, end))
+	assert(0);
+      const token_info *ti = lookup_token(token_start, ptr);
+      if (ti->sortify_non_empty(token_start, ptr) && --skip < 0) {
+	ptr = token_start;
+	break;
+      }
+    }
+  }
+  first_part(len, ptr, end, result);
+}
+
+void truncate_expr::evaluate(int tentative, const reference &ref,
+			     string &result, substring_position &)
+{
+  if (expr) {
+    string temp;
+    substring_position temp_pos;
+    expr->evaluate(tentative, ref, temp, temp_pos);
+    const char *start = temp.contents();
+    const char *end = start + temp.length();
+    if (n > 0)
+      first_part(n, start, end, result);
+    else if (n < 0)
+      last_part(-n, start, end, result);
+  }
+}
+
+void alternative_expr::evaluate(int tentative, const reference &ref,
+				string &result, substring_position &pos)
+{
+  int start_length = result.length();
+  if (expr1)
+    expr1->evaluate(tentative, ref, result, pos);
+  if (result.length() == start_length && expr2)
+    expr2->evaluate(tentative, ref, result, pos);
+}
+
+void list_expr::evaluate(int tentative, const reference &ref,
+			 string &result, substring_position &pos)
+{
+  if (expr1)
+    expr1->evaluate(tentative, ref, result, pos);
+  if (expr2)
+    expr2->evaluate(tentative, ref, result, pos);
+}
+
+void substitute_expr::evaluate(int tentative, const reference &ref,
+			       string &result, substring_position &pos)
+{
+  int start_length = result.length();
+  if (expr1)
+    expr1->evaluate(tentative, ref, result, pos);
+  if (result.length() > start_length && result[result.length() - 1] == '-') {
+    // ought to see if pos covers the -
+    result.set_length(result.length() - 1);
+    if (expr2)
+      expr2->evaluate(tentative, ref, result, pos);
+  }
+}
+
+void conditional_expr::evaluate(int tentative, const reference &ref,
+				string &result, substring_position &pos)
+{
+  string temp;
+  substring_position temp_pos;
+  if (expr1)
+    expr1->evaluate(tentative, ref, temp, temp_pos);
+  if (temp.length() > 0) {
+    if (expr2)
+      expr2->evaluate(tentative, ref, result, pos);
+  }
+  else {
+    if (expr3)
+      expr3->evaluate(tentative, ref, result, pos);
+  }
+}
+
+void reference::pre_compute_label()
+{
+  if (parsed_label != 0
+      && (parsed_label->analyze() & expression::CONTAINS_VARIABLE)) {
+    label.clear();
+    substring_position temp_pos;
+    parsed_label->evaluate(1, *this, label, temp_pos);
+    label_ptr = lookup_label(label);
+  }
+}
+
+void reference::compute_label()
+{
+  label.clear();
+  if (parsed_label)
+    parsed_label->evaluate(0, *this, label, separator_pos);
+  if (short_label_flag && parsed_short_label)
+    parsed_short_label->evaluate(0, *this, short_label, short_separator_pos);
+  if (date_as_label) {
+    string new_date;
+    if (parsed_date_label) {
+      substring_position temp_pos;
+      parsed_date_label->evaluate(0, *this, new_date, temp_pos);
+    }
+    set_date(new_date);
+  }
+  if (label_ptr)
+    label_ptr->count += 1;
+}
+
+void reference::immediate_compute_label()
+{
+  if (label_ptr)
+    label_ptr->total = 2;	// force use of disambiguator
+  compute_label();
+}
+
+int reference::merge_labels(reference **v, int n, label_type type,
+			    string &result)
+{
+  if (abbreviate_label_ranges)
+    return merge_labels_by_number(v, n, type, result);
+  else
+    return merge_labels_by_parts(v, n, type, result);
+}
+
+int reference::merge_labels_by_number(reference **v, int n, label_type type,
+				      string &result)
+{
+  if (n <= 1)
+    return 0;
+  int num = get_number();
+  // Only merge three or more labels.
+  if (v[0]->get_number() != num + 1
+      || v[1]->get_number() != num + 2)
+    return 0;
+  int i;
+  for (i = 2; i < n; i++)
+    if (v[i]->get_number() != num + i + 1)
+      break;
+  result = get_label(type);
+  result += label_range_indicator;
+  result += v[i - 1]->get_label(type);
+  return i;
+}
+
+const substring_position &reference::get_separator_pos(label_type type) const
+{
+  if (type == SHORT_LABEL && short_label_flag)
+    return short_separator_pos;
+  else
+    return separator_pos;
+}
+
+const string &reference::get_label(label_type type) const
+{
+  if (type == SHORT_LABEL && short_label_flag)
+    return short_label; 
+  else
+    return label;
+}
+
+int reference::merge_labels_by_parts(reference **v, int n, label_type type,
+				     string &result)
+{
+  if (n <= 0)
+    return 0;
+  const string &lb = get_label(type);
+  const substring_position &sp = get_separator_pos(type);
+  if (sp.start < 0
+      || sp.start != v[0]->get_separator_pos(type).start 
+      || memcmp(lb.contents(), v[0]->get_label(type).contents(),
+		sp.start) != 0)
+    return 0;
+  result = lb;
+  int i = 0;
+  do {
+    result += separate_label_second_parts;
+    const substring_position &s = v[i]->get_separator_pos(type);
+    int sep_end_pos = s.start + s.length;
+    result.append(v[i]->get_label(type).contents() + sep_end_pos,
+		  v[i]->get_label(type).length() - sep_end_pos);
+  } while (++i < n
+	   && sp.start == v[i]->get_separator_pos(type).start
+	   && memcmp(lb.contents(), v[i]->get_label(type).contents(),
+		     sp.start) == 0);
+  return i;
+}
+
+string label_pool;
+
+label_info::label_info(const string &s)
+: start(label_pool.length()), length(s.length()), count(0), total(1)
+{
+  label_pool += s;
+}
+
+static label_info **label_table = 0;
+static int label_table_size = 0;
+static int label_table_used = 0;
+
+label_info *lookup_label(const string &label)
+{
+  if (label_table == 0) {
+    label_table = new label_info *[17];
+    label_table_size = 17;
+    for (int i = 0; i < 17; i++)
+      label_table[i] = 0;
+  }
+  unsigned h = hash_string(label.contents(), label.length()) % label_table_size;
+  label_info **ptr;
+  for (ptr = label_table + h;
+       *ptr != 0;
+       (ptr == label_table)
+       ? (ptr = label_table + label_table_size - 1)
+       : ptr--)
+    if ((*ptr)->length == label.length()
+	&& memcmp(label_pool.contents() + (*ptr)->start, label.contents(),
+		  label.length()) == 0) {
+      (*ptr)->total += 1;
+      return *ptr;
+    }
+  label_info *result = *ptr = new label_info(label);
+  if (++label_table_used * 2 > label_table_size) {
+    // Rehash the table.
+    label_info **old_table = label_table;
+    int old_size = label_table_size;
+    label_table_size = next_size(label_table_size);
+    label_table = new label_info *[label_table_size];
+    int i;
+    for (i = 0; i < label_table_size; i++)
+      label_table[i] = 0;
+    for (i = 0; i < old_size; i++)
+      if (old_table[i]) {
+	h = hash_string(label_pool.contents() + old_table[i]->start,
+			old_table[i]->length);
+	label_info **p;
+	for (p = label_table + (h % label_table_size);
+	     *p != 0;
+	     (p == label_table)
+	     ? (p = label_table + label_table_size - 1)
+	     : --p)
+	    ;
+	*p = old_table[i];
+	}
+    delete[] old_table;
+  }
+  return result;
+}
+
+void clear_labels()
+{
+  for (int i = 0; i < label_table_size; i++) {
+    delete label_table[i];
+    label_table[i] = 0;
+  }
+  label_table_used = 0;
+  label_pool.clear();
+}
+
+static void consider_authors(reference **start, reference **end, int i);
+
+void compute_labels(reference **v, int n)
+{
+  if (parsed_label
+      && (parsed_label->analyze() & expression::CONTAINS_AT)
+      && sort_fields.length() >= 2
+      && sort_fields[0] == 'A'
+      && sort_fields[1] == '+')
+    consider_authors(v, v + n, 0);
+  for (int i = 0; i < n; i++)
+    v[i]->compute_label();
+}
+
+
+/* A reference with a list of authors <A0,A1,...,AN> _needs_ author i
+where 0 <= i <= N if there exists a reference with a list of authors
+<B0,B1,...,BM> such that <A0,A1,...,AN> != <B0,B1,...,BM> and M >= i
+and Aj = Bj for 0 <= j < i. In this case if we can't say "A0,
+A1,...,A(i-1) et al" because this would match both <A0,A1,...,AN> and
+<B0,B1,...,BM>.  If a reference needs author i we only have to call
+need_author(j) for some j >= i such that the reference also needs
+author j. */
+
+/* This function handles 2 tasks:
+determine which authors are needed (cannot be elided with et al.);
+determine which authors can have only last names in the labels.
+
+References >= start and < end have the same first i author names.
+Also they're sorted by A+. */
+
+static void consider_authors(reference **start, reference **end, int i)
+{
+  if (start >= end)
+    return;
+  reference **p = start;
+  if (i >= (*p)->get_nauthors()) {
+    for (++p; p < end && i >= (*p)->get_nauthors(); p++)
+      ;
+    if (p < end && i > 0) {
+      // If we have an author list <A B C> and an author list <A B C D>,
+      // then both lists need C.
+      for (reference **q = start; q < end; q++)
+	(*q)->need_author(i - 1);
+    }
+    start = p;
+  }
+  while (p < end) {
+    reference **last_name_start = p;
+    reference **name_start = p;
+    for (++p;
+	 p < end && i < (*p)->get_nauthors()
+	 && same_author_last_name(**last_name_start, **p, i);
+	 p++) {
+      if (!same_author_name(**name_start, **p, i)) {
+	consider_authors(name_start, p, i + 1);
+	name_start = p;
+      }
+    }
+    consider_authors(name_start, p, i + 1);
+    if (last_name_start == name_start) {
+      for (reference **q = last_name_start; q < p; q++)
+	(*q)->set_last_name_unambiguous(i);
+    }
+    // If we have an author list <A B C D> and <A B C E>, then the lists
+    // need author D and E respectively.
+    if (name_start > start || p < end) {
+      for (reference **q = last_name_start; q < p; q++)
+	(*q)->need_author(i);
+    }
+  }
+}
+
+int same_author_last_name(const reference &r1, const reference &r2, int n)
+{
+  const char *ae1;
+  const char *as1 = r1.get_sort_field(0, n, 0, &ae1);
+  const char *ae2;
+  const char *as2 = r2.get_sort_field(0, n, 0, &ae2);
+  if (!as1 && !as2) return 1;	// they are the same
+  if (!as1 || !as2) return 0;
+  return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
+}
+
+int same_author_name(const reference &r1, const reference &r2, int n)
+{
+  const char *ae1;
+  const char *as1 = r1.get_sort_field(0, n, -1, &ae1);
+  const char *ae2;
+  const char *as2 = r2.get_sort_field(0, n, -1, &ae2);
+  if (!as1 && !as2) return 1;	// they are the same
+  if (!as1 || !as2) return 0;
+  return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
+}
+
+
+void int_set::set(int i)
+{
+  assert(i >= 0);
+  int bytei = i >> 3;
+  if (bytei >= v.length()) {
+    int old_length = v.length();
+    v.set_length(bytei + 1);
+    for (int j = old_length; j <= bytei; j++)
+      v[j] = 0;
+  }
+  v[bytei] |= 1 << (i & 7);
+}
+
+int int_set::get(int i) const
+{
+  assert(i >= 0);
+  int bytei = i >> 3;
+  return bytei >= v.length() ? 0 : (v[bytei] & (1 << (i & 7))) != 0;
+}
+
+void reference::set_last_name_unambiguous(int i)
+{
+  last_name_unambiguous.set(i);
+}
+
+void reference::need_author(int n)
+{
+  if (n > last_needed_author)
+    last_needed_author = n;
+}
+
+const char *reference::get_authors(const char **end) const
+{
+  if (!computed_authors) {
+    ((reference *)this)->computed_authors = 1;
+    string &result = ((reference *)this)->authors;
+    int na = get_nauthors();
+    result.clear();
+    for (int i = 0; i < na; i++) {
+      if (last_name_unambiguous.get(i)) {
+	const char *e, *start = get_author_last_name(i, &e);
+	assert(start != 0);
+	result.append(start, e - start);
+      }
+      else {
+	const char *e, *start = get_author(i, &e);
+	assert(start != 0);
+	result.append(start, e - start);
+      }
+      if (i == last_needed_author
+	  && et_al.length() > 0
+	  && et_al_min_elide > 0
+	  && last_needed_author + et_al_min_elide < na
+	  && na >= et_al_min_total) {
+	result += et_al;
+	break;
+      }
+      if (i < na - 1) {
+	if (na == 2)
+	  result += join_authors_exactly_two;
+	else if (i < na - 2)
+	  result += join_authors_default;
+	else
+	  result += join_authors_last_two;
+      }
+    }
+  }
+  const char *start = authors.contents();
+  *end = start + authors.length();
+  return start;
+}
+
+int reference::get_nauthors() const
+{
+  if (nauthors < 0) {
+    const char *dummy;
+    int na;
+    for (na = 0; get_author(na, &dummy) != 0; na++)
+      ;
+    ((reference *)this)->nauthors = na;
+  }
+  return nauthors;
+}
+
+// Local Variables:
+// fill-column: 72
+// mode: C++
+// End:
+// vim: set cindent noexpandtab shiftwidth=2 textwidth=72:
diff --git a/src/preproc/refer/ref.cpp b/src/preproc/refer/ref.cpp
new file mode 100644
index 0000000..9e1b5e7
--- /dev/null
+++ b/src/preproc/refer/ref.cpp
@@ -0,0 +1,1161 @@
+// -*- C++ -*-
+/* Copyright (C) 1989-2020 Free Software Foundation, Inc.
+Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+     
+#include "refer.h"
+#include "refid.h"
+#include "ref.h"
+#include "token.h"
+
+static const char *find_day(const char *, const char *, const char **);
+static int find_month(const char *start, const char *end);
+static void abbreviate_names(string &);
+
+#define DEFAULT_ARTICLES "the\000a\000an"
+     
+string articles(DEFAULT_ARTICLES, sizeof(DEFAULT_ARTICLES));
+
+// Multiple occurrences of fields are separated by FIELD_SEPARATOR.
+const char FIELD_SEPARATOR = '\0';
+
+const char MULTI_FIELD_NAMES[] = "AE";
+const char *AUTHOR_FIELDS = "AQ";
+
+enum { OTHER, JOURNAL_ARTICLE, BOOK, ARTICLE_IN_BOOK, TECH_REPORT, BELL_TM };
+
+const char *reference_types[] = {
+  "other",
+  "journal-article",
+  "book",
+  "article-in-book",
+  "tech-report",
+  "bell-tm",
+};
+
+static string temp_fields[256];
+
+reference::reference(const char *start, int len, reference_id *ridp)
+: h(0), merged(0), no(-1), field(0), nfields(0), label_ptr(0),
+  computed_authors(0), last_needed_author(-1), nauthors(-1)
+{
+  int i;
+  for (i = 0; i < 256; i++)
+    field_index[i] = NULL_FIELD_INDEX;
+  if (ridp)
+    rid = *ridp;
+  if (start == 0)
+    return;
+  if (len <= 0)
+    return;
+  const char *end = start + len;
+  const char *ptr = start;
+  assert(*ptr == '%');
+  while (ptr < end) {
+    if (ptr + 1 < end && ptr[1] != '\0'
+	&& ((ptr[1] != '%' && ptr[1] == annotation_field)
+	    || (ptr + 2 < end && ptr[1] == '%' && ptr[2] != '\0'
+		&& discard_fields.search(ptr[2]) < 0))) {
+      if (ptr[1] == '%')
+	ptr++;
+      string &f = temp_fields[(unsigned char)ptr[1]];
+      ptr += 2;
+      while (ptr < end && csspace(*ptr))
+	ptr++;
+      for (;;) {
+	for (;;) {
+	  if (ptr >= end) {
+	    f += '\n';
+	    break;
+	  }
+	  f += *ptr;
+	  if (*ptr++ == '\n')
+	    break;
+	}
+	if (ptr >= end || *ptr == '%')
+	  break;
+      }
+    }
+    else if (ptr + 1 < end && ptr[1] != '\0' && ptr[1] != '%'
+	     && discard_fields.search(ptr[1]) < 0) {
+      string &f = temp_fields[(unsigned char)ptr[1]];
+      if (f.length() > 0) {
+	if (strchr(MULTI_FIELD_NAMES, ptr[1]) != 0)
+	  f += FIELD_SEPARATOR;
+	else
+	  f.clear();
+      }
+      ptr += 2;
+      if (ptr < end) {
+	if (*ptr == ' ')
+	  ptr++;
+	for (;;) {
+	  const char *p = ptr;
+	  while (ptr < end && *ptr != '\n')
+	    ptr++;
+	  // strip trailing white space
+	  const char *q = ptr;
+	  while (q > p && q[-1] != '\n' && csspace(q[-1]))
+	    q--;
+	  while (p < q)
+	    f += *p++;
+	  if (ptr >= end)
+	    break;
+	  ptr++;
+	  if (ptr >= end)
+	    break;
+	  if (*ptr == '%')
+	    break;
+	  f += ' ';
+	}
+      }
+    }
+    else {
+      // skip this field
+      for (;;) {
+	while (ptr < end && *ptr++ != '\n')
+	  ;
+	if (ptr >= end || *ptr == '%')
+	  break;
+      }
+    }
+  }
+  for (i = 0; i < 256; i++)
+    if (temp_fields[i].length() > 0)
+      nfields++;
+  field = new string[nfields];
+  int j = 0;
+  for (i = 0; i < 256; i++)
+    if (temp_fields[i].length() > 0) {
+      field[j].move(temp_fields[i]);
+      if (abbreviate_fields.search(i) >= 0)
+	abbreviate_names(field[j]);
+      field_index[i] = j;
+      j++;
+    }
+}
+
+reference::~reference()
+{
+  if (nfields > 0)
+    delete[] field;
+}
+
+// ref is the inline, this is the database ref
+
+void reference::merge(reference &ref)
+{
+  int i;
+  for (i = 0; i < 256; i++)
+    if (field_index[i] != NULL_FIELD_INDEX)
+      temp_fields[i].move(field[field_index[i]]);
+  for (i = 0; i < 256; i++)
+    if (ref.field_index[i] != NULL_FIELD_INDEX)
+      temp_fields[i].move(ref.field[ref.field_index[i]]);
+  for (i = 0; i < 256; i++)
+    field_index[i] = NULL_FIELD_INDEX;
+  int old_nfields = nfields;
+  nfields = 0;
+  for (i = 0; i < 256; i++)
+    if (temp_fields[i].length() > 0)
+      nfields++;
+  if (nfields != old_nfields) {
+    if (old_nfields > 0)
+      delete[] field;
+    field = new string[nfields];
+  }
+  int j = 0;
+  for (i = 0; i < 256; i++)
+    if (temp_fields[i].length() > 0) {
+      field[j].move(temp_fields[i]);
+      field_index[i] = j;
+      j++;
+    }
+  merged = 1;
+}
+
+void reference::insert_field(unsigned char c, string &s)
+{
+  assert(s.length() > 0);
+  if (field_index[c] != NULL_FIELD_INDEX) {
+    field[field_index[c]].move(s);
+    return;
+  }
+  assert(field_index[c] == NULL_FIELD_INDEX);
+  string *old_field = field;
+  field = new string[nfields + 1];
+  int pos = 0;
+  int i;
+  for (i = 0; i < int(c); i++)
+    if (field_index[i] != NULL_FIELD_INDEX)
+      pos++;
+  for (i = 0; i < pos; i++)
+    field[i].move(old_field[i]);
+  field[pos].move(s);
+  for (i = pos; i < nfields; i++)
+    field[i + 1].move(old_field[i]);
+  if (nfields > 0)
+    delete[] old_field;
+  nfields++;
+  field_index[c] = pos;
+  for (i = c + 1; i < 256; i++)
+    if (field_index[i] != NULL_FIELD_INDEX)
+      field_index[i] += 1;
+}
+
+void reference::delete_field(unsigned char c)
+{
+  if (field_index[c] == NULL_FIELD_INDEX)
+    return;
+  string *old_field = field;
+  field = new string[nfields - 1];
+  int i;
+  for (i = 0; i < int(field_index[c]); i++)
+    field[i].move(old_field[i]);
+  for (i = field_index[c]; i < nfields - 1; i++)
+    field[i].move(old_field[i + 1]);
+  if (nfields > 0)
+    delete[] old_field;
+  nfields--;
+  field_index[c] = NULL_FIELD_INDEX;
+  for (i = c + 1; i < 256; i++)
+    if (field_index[i] != NULL_FIELD_INDEX)
+      field_index[i] -= 1;
+}
+
+void reference::compute_hash_code()
+{
+  if (!rid.is_null())
+    h = rid.hash();
+  else {
+    h = 0;
+    for (int i = 0; i < nfields; i++)
+      if (field[i].length() > 0) {
+	h <<= 4;
+	h ^= hash_string(field[i].contents(), field[i].length());
+      }
+  }
+}
+
+void reference::set_number(int n)
+{
+  no = n;
+}
+
+const char SORT_SEP = '\001';
+const char SORT_SUB_SEP = '\002';
+const char SORT_SUB_SUB_SEP = '\003';
+
+// sep specifies additional word separators
+
+void sortify_words(const char *s, const char *end, const char *sep,
+		   string &result)
+{
+  int non_empty = 0;
+  int need_separator = 0;
+  for (;;) {
+    const char *token_start = s;
+    if (!get_token(&s, end))
+      break;
+    if ((s - token_start == 1
+	 && (*token_start == ' '
+	     || *token_start == '\n'
+	     || (sep && *token_start != '\0'
+		 && strchr(sep, *token_start) != 0)))
+	|| (s - token_start == 2
+	    && token_start[0] == '\\' && token_start[1] == ' ')) {
+      if (non_empty)
+	need_separator = 1;
+    }
+    else {
+      const token_info *ti = lookup_token(token_start, s);
+      if (ti->sortify_non_empty(token_start, s)) {
+	if (need_separator) {
+	  result += ' ';
+	  need_separator = 0;
+	}
+	ti->sortify(token_start, s, result);
+	non_empty = 1;
+      }
+    }
+  }
+}
+
+void sortify_word(const char *s, const char *end, string &result)
+{
+  for (;;) {
+    const char *token_start = s;
+    if (!get_token(&s, end))
+      break;
+    const token_info *ti = lookup_token(token_start, s);
+    ti->sortify(token_start, s, result);
+  }
+}
+
+void sortify_other(const char *s, int len, string &key)
+{
+  sortify_words(s, s + len, 0, key);
+}
+
+void sortify_title(const char *s, int len, string &key)
+{
+  const char *end = s + len;
+  for (; s < end && (*s == ' ' || *s == '\n'); s++) 
+    ;
+  const char *ptr = s;
+  for (;;) {
+    const char *token_start = ptr;
+    if (!get_token(&ptr, end))
+      break;
+    if (ptr - token_start == 1
+	&& (*token_start == ' ' || *token_start == '\n'))
+      break;
+  }
+  if (ptr < end) {
+    unsigned int first_word_len = ptr - s - 1;
+    const char *ae = articles.contents() + articles.length();
+    for (const char *a = articles.contents();
+	 a < ae;
+	 a = strchr(a, '\0') + 1)
+      if (first_word_len == strlen(a)) {
+	unsigned int j;
+	for (j = 0; j < first_word_len; j++)
+	  if (a[j] != cmlower(s[j]))
+	    break;
+	if (j >= first_word_len) {
+	  s = ptr;
+	  for (; s < end && (*s == ' ' || *s == '\n'); s++)
+	    ;
+	  break;
+	}
+      }
+  }
+  sortify_words(s, end, 0, key);
+}
+
+void sortify_name(const char *s, int len, string &key)
+{
+  const char *last_name_end;
+  const char *last_name = find_last_name(s, s + len, &last_name_end);
+  sortify_word(last_name, last_name_end, key);
+  key += SORT_SUB_SUB_SEP;
+  if (last_name > s)
+    sortify_words(s, last_name, ".", key);
+  key += SORT_SUB_SUB_SEP;
+  if (last_name_end < s + len)
+    sortify_words(last_name_end, s + len, ".,", key);
+}
+
+void sortify_date(const char *s, int len, string &key)
+{
+  const char *year_end;
+  const char *year_start = find_year(s, s + len, &year_end);
+  if (!year_start) {
+    // Things without years are often 'forthcoming', so it makes sense
+    // that they sort after things with explicit years.
+    key += 'A';
+    sortify_words(s, s + len, 0, key);
+    return;
+  }
+  int n = year_end - year_start;
+  while (n < 4) {
+    key += '0';
+    n++;
+  }
+  while (year_start < year_end)
+    key += *year_start++;
+  int m = find_month(s, s + len);
+  if (m < 0)
+    return;
+  key += 'A' + m;
+  const char *day_end;
+  const char *day_start = find_day(s, s + len, &day_end);
+  if (!day_start)
+    return;
+  if (day_end - day_start == 1)
+    key += '0';
+  while (day_start < day_end)
+    key += *day_start++;
+}
+
+// SORT_{SUB,SUB_SUB}_SEP can creep in from use of @ in label specification.
+
+void sortify_label(const char *s, int len, string &key)
+{
+  const char *end = s + len;
+  for (;;) {
+    const char *ptr;
+    for (ptr = s;
+	 ptr < end && *ptr != SORT_SUB_SEP && *ptr != SORT_SUB_SUB_SEP;
+	 ptr++)
+      ;
+    if (ptr > s)
+      sortify_words(s, ptr, 0, key);
+    s = ptr;
+    if (s >= end)
+      break;
+    key += *s++;
+  }
+}
+
+void reference::compute_sort_key()
+{
+  if (sort_fields.length() == 0)
+    return;
+  sort_fields += '\0';
+  const char *sf = sort_fields.contents();
+  int first_time = 1;
+  while (*sf != '\0') {
+    if (!first_time)
+      sort_key += SORT_SEP;
+    first_time = 0;
+    char f = *sf++;
+    int n = 1;
+    if (*sf == '+') {
+      n = INT_MAX;
+      sf++;
+    }
+    else if (csdigit(*sf)) {
+      char *ptr;
+      long l = strtol(sf, &ptr, 10);
+      if (l == 0 && ptr == sf)
+	;
+      else {
+	sf = ptr;
+	if (l < 0) {
+	  n = 1;
+	}
+	else {
+	  n = int(l);
+	}
+      }
+    }
+    if (f == '.')
+      sortify_label(label.contents(), label.length(), sort_key);
+    else if (f == AUTHOR_FIELDS[0])
+      sortify_authors(n, sort_key);
+    else
+      sortify_field(f, n, sort_key);
+  }
+  sort_fields.set_length(sort_fields.length() - 1);
+}
+
+void reference::sortify_authors(int n, string &result) const
+{
+  for (const char *p = AUTHOR_FIELDS; *p != '\0'; p++)
+    if (contains_field(*p)) {
+      sortify_field(*p, n, result);
+      return;
+    }
+  sortify_field(AUTHOR_FIELDS[0], n, result);
+}
+
+void reference::canonicalize_authors(string &result) const
+{
+  int len = result.length();
+  sortify_authors(INT_MAX, result);
+  if (result.length() > len)
+    result += SORT_SUB_SEP;
+}
+
+void reference::sortify_field(unsigned char f, int n, string &result) const
+{
+  typedef void (*sortify_t)(const char *, int, string &);
+  sortify_t sortifier = sortify_other;
+  switch (f) {
+  case 'A':
+  case 'E':
+    sortifier = sortify_name;
+    break;
+  case 'D':
+    sortifier = sortify_date;
+    break;
+  case 'B':
+  case 'J':
+  case 'T':
+    sortifier = sortify_title;
+    break;
+  }
+  int fi = field_index[(unsigned char)f];
+  if (fi != NULL_FIELD_INDEX) {
+    string &str = field[fi];
+    const char *start = str.contents();
+    const char *end = start + str.length();
+    for (int i = 0; i < n && start < end; i++) {
+      const char *p = start;
+      while (start < end && *start != FIELD_SEPARATOR)
+	start++;
+      if (i > 0)
+	result += SORT_SUB_SEP;
+      (*sortifier)(p, start - p, result);
+      if (start < end)
+	start++;
+    }
+  }
+}
+
+int compare_reference(const reference &r1, const reference &r2)
+{
+  assert(r1.no >= 0);
+  assert(r2.no >= 0);
+  const char *s1 = r1.sort_key.contents();
+  int n1 = r1.sort_key.length();
+  const char *s2 = r2.sort_key.contents();
+  int n2 = r2.sort_key.length();
+  for (; n1 > 0 && n2 > 0; --n1, --n2, ++s1, ++s2)
+    if (*s1 != *s2)
+      return (int)(unsigned char)*s1 - (int)(unsigned char)*s2;
+  if (n2 > 0)
+    return -1;
+  if (n1 > 0)
+    return 1;
+  return r1.no - r2.no;
+}
+
+int same_reference(const reference &r1, const reference &r2)
+{
+  if (!r1.rid.is_null() && r1.rid == r2.rid)
+    return 1;
+  if (r1.h != r2.h)
+    return 0;
+  if (r1.nfields != r2.nfields)
+    return 0;
+  int i = 0; 
+  for (i = 0; i < 256; i++)
+    if (r1.field_index != r2.field_index)
+      return 0;
+  for (i = 0; i < r1.nfields; i++)
+    if (r1.field[i] != r2.field[i])
+      return 0;
+  return 1;
+}
+
+const char *find_last_name(const char *start, const char *end,
+			   const char **endp)
+{
+  const char *ptr = start;
+  const char *last_word = start;
+  for (;;) {
+    const char *token_start = ptr;
+    if (!get_token(&ptr, end))
+      break;
+    if (ptr - token_start == 1) {
+      if (*token_start == ',') {
+	*endp = token_start;
+	return last_word;
+      }
+      else if (*token_start == ' ' || *token_start == '\n') {
+	if (ptr < end && *ptr != ' ' && *ptr != '\n')
+	  last_word = ptr;
+      }
+    }
+  }
+  *endp = end;
+  return last_word;
+}
+
+void abbreviate_name(const char *ptr, const char *end, string &result)
+{
+  const char *last_name_end;
+  const char *last_name_start = find_last_name(ptr, end, &last_name_end);
+  int need_period = 0;
+  for (;;) {
+    const char *token_start = ptr;
+    if (!get_token(&ptr, last_name_start))
+      break;
+    const token_info *ti = lookup_token(token_start, ptr);
+    if (need_period) {
+      if ((ptr - token_start == 1 && *token_start == ' ')
+	  || (ptr - token_start == 2 && token_start[0] == '\\'
+	      && token_start[1] == ' '))
+	continue;
+      if (ti->is_upper())
+	result += period_before_initial;
+      else
+	result += period_before_other;
+      need_period = 0;
+    }
+    result.append(token_start, ptr - token_start);
+    if (ti->is_upper()) {
+      const char *lower_ptr = ptr;
+      int first_token = 1;
+      for (;;) {
+	token_start = ptr;
+	if (!get_token(&ptr, last_name_start))
+	  break;
+	if ((ptr - token_start == 1 && *token_start == ' ')
+	    || (ptr - token_start == 2 && token_start[0] == '\\'
+		&& token_start[1] == ' '))
+	  break;
+	ti = lookup_token(token_start, ptr);
+	if (ti->is_hyphen()) {
+	  const char *ptr1 = ptr;
+	  if (get_token(&ptr1, last_name_start)) {
+	    ti = lookup_token(ptr, ptr1);
+	    if (ti->is_upper()) {
+	      result += period_before_hyphen;
+	      result.append(token_start, ptr1 - token_start);
+	      ptr = ptr1;
+	    }
+	  }
+	}
+	else if (ti->is_upper()) {
+	  // MacDougal -> MacD.
+	  result.append(lower_ptr, ptr - lower_ptr);
+	  lower_ptr = ptr;
+	  first_token = 1;
+	}
+	else if (first_token && ti->is_accent()) {
+	  result.append(token_start, ptr - token_start);
+	  lower_ptr = ptr;
+	}
+	first_token = 0;
+      }
+      need_period = 1;
+    }
+  }
+  if (need_period)
+    result += period_before_last_name;
+  result.append(last_name_start, end - last_name_start);
+}
+
+static void abbreviate_names(string &result)
+{
+  string str;
+  str.move(result);
+  const char *ptr = str.contents();
+  const char *end = ptr + str.length();
+  while (ptr < end) {
+    const char *name_end = (char *)memchr(ptr, FIELD_SEPARATOR, end - ptr);
+    if (name_end == 0)
+      name_end = end;
+    abbreviate_name(ptr, name_end, result);
+    if (name_end >= end)
+      break;
+    ptr = name_end + 1;
+    result += FIELD_SEPARATOR;
+  }
+}
+
+void reverse_name(const char *ptr, const char *name_end, string &result)
+{
+  const char *last_name_end;
+  const char *last_name_start = find_last_name(ptr, name_end, &last_name_end);
+  result.append(last_name_start, last_name_end - last_name_start);
+  while (last_name_start > ptr
+	 && (last_name_start[-1] == ' ' || last_name_start[-1] == '\n'))
+    last_name_start--;
+  if (last_name_start > ptr) {
+    result += ", ";
+    result.append(ptr, last_name_start - ptr);
+  }
+  if (last_name_end < name_end)
+    result.append(last_name_end, name_end - last_name_end);
+}
+
+void reverse_names(string &result, int n)
+{
+  if (n <= 0)
+    return;
+  string str;
+  str.move(result);
+  const char *ptr = str.contents();
+  const char *end = ptr + str.length();
+  while (ptr < end) {
+    if (--n < 0) {
+      result.append(ptr, end - ptr);
+      break;
+    }
+    const char *name_end = (char *)memchr(ptr, FIELD_SEPARATOR, end - ptr);
+    if (name_end == 0)
+      name_end = end;
+    reverse_name(ptr, name_end, result);
+    if (name_end >= end)
+      break;
+    ptr = name_end + 1;
+    result += FIELD_SEPARATOR;
+  }
+}
+
+// Return number of field separators.
+
+int join_fields(string &f)
+{
+  const char *ptr = f.contents();
+  int len = f.length();
+  int nfield_seps = 0;
+  int j;
+  for (j = 0; j < len; j++)
+    if (ptr[j] == FIELD_SEPARATOR)
+      nfield_seps++;
+  if (nfield_seps == 0)
+    return 0;
+  string temp;
+  int field_seps_left = nfield_seps;
+  for (j = 0; j < len; j++) {
+    if (ptr[j] == FIELD_SEPARATOR) {
+      if (nfield_seps == 1)
+	temp += join_authors_exactly_two;
+      else if (--field_seps_left == 0)
+	temp += join_authors_last_two;
+      else
+	temp += join_authors_default;
+    }
+    else
+      temp += ptr[j];
+  }
+  f = temp;
+  return nfield_seps;
+}
+
+void uppercase(const char *start, const char *end, string &result)
+{
+  for (;;) {
+    const char *token_start = start;
+    if (!get_token(&start, end))
+      break;
+    const token_info *ti = lookup_token(token_start, start);
+    ti->upper_case(token_start, start, result);
+  }
+}
+
+void lowercase(const char *start, const char *end, string &result)
+{
+  for (;;) {
+    const char *token_start = start;
+    if (!get_token(&start, end))
+      break;
+    const token_info *ti = lookup_token(token_start, start);
+    ti->lower_case(token_start, start, result);
+  }
+}
+
+void capitalize(const char *ptr, const char *end, string &result)
+{
+  int in_small_point_size = 0;
+  for (;;) {
+    const char *start = ptr;
+    if (!get_token(&ptr, end))
+      break;
+    const token_info *ti = lookup_token(start, ptr);
+    const char *char_end = ptr;
+    int is_lower = ti->is_lower();
+    if ((is_lower || ti->is_upper()) && get_token(&ptr, end)) {
+      const token_info *ti2 = lookup_token(char_end, ptr);
+      if (!ti2->is_accent())
+	ptr = char_end;
+    }
+    if (is_lower) {
+      if (!in_small_point_size) {
+	result += "\\s-2";
+	in_small_point_size = 1;
+      }
+      ti->upper_case(start, char_end, result);
+      result.append(char_end, ptr - char_end);
+    }
+    else {
+      if (in_small_point_size) {
+	result += "\\s+2";
+	in_small_point_size = 0;
+      }
+      result.append(start, ptr - start);
+    }
+  }
+  if (in_small_point_size)
+    result += "\\s+2";
+}
+
+void capitalize_field(string &str)
+{
+  string temp;
+  capitalize(str.contents(), str.contents() + str.length(), temp);
+  str.move(temp);
+}
+
+int is_terminated(const char *ptr, const char *end)
+{
+  const char *last_token = end;
+  for (;;) {
+    const char *p = ptr;
+    if (!get_token(&ptr, end))
+      break;
+    last_token = p;
+  }
+  return end - last_token == 1
+    && (*last_token == '.' || *last_token == '!' || *last_token == '?');
+}
+
+void reference::output(FILE *fp)
+{
+  fputs(".]-\n", fp);
+  for (int i = 0; i < 256; i++)
+    if (field_index[i] != NULL_FIELD_INDEX && i != annotation_field) {
+      string &f = field[field_index[i]];
+      if (!csdigit(i)) {
+	int j = reverse_fields.search(i);
+	if (j >= 0) {
+	  int n;
+	  int len = reverse_fields.length();
+	  if (++j < len && csdigit(reverse_fields[j])) {
+	    n = reverse_fields[j] - '0';
+	    for (++j; j < len && csdigit(reverse_fields[j]); j++)
+	      // should check for overflow
+	      n = n*10 + reverse_fields[j] - '0';
+	  }
+	  else 
+	    n = INT_MAX;
+	  reverse_names(f, n);
+	}
+      }
+      int is_multiple = join_fields(f) > 0;
+      if (capitalize_fields.search(i) >= 0)
+	capitalize_field(f);
+      if (memchr(f.contents(), '\n', f.length()) == 0) {
+	fprintf(fp, ".ds [%c ", i);
+	if (f[0] == ' ' || f[0] == '\\' || f[0] == '"')
+	  putc('"', fp);
+	put_string(f, fp);
+	putc('\n', fp);
+      }
+      else {
+	fprintf(fp, ".de [%c\n", i);
+	put_string(f, fp);
+	fputs("..\n", fp);
+      }
+      if (i == 'P') {
+	int multiple_pages = 0;
+	const char *s = f.contents();
+	const char *end = f.contents() + f.length();
+	for (;;) {
+	  const char *token_start = s;
+	  if (!get_token(&s, end))
+	    break;
+	  const token_info *ti = lookup_token(token_start, s);
+	  if (ti->is_hyphen() || ti->is_range_sep()) {
+	    multiple_pages = 1;
+	    break;
+	  }
+	}
+	fprintf(fp, ".nr [P %d\n", multiple_pages);
+      }
+      else if (i == 'E')
+	fprintf(fp, ".nr [E %d\n", is_multiple);
+    }
+  for (const char *p = "TAO"; *p; p++) {
+    int fi = field_index[(unsigned char)*p];
+    if (fi != NULL_FIELD_INDEX) {
+      string &f = field[fi];
+      fprintf(fp, ".nr [%c %d\n", *p,
+	      is_terminated(f.contents(), f.contents() + f.length()));
+    }
+  }
+  int t = classify();
+  fprintf(fp, ".][ %d %s\n", t, reference_types[t]);
+  if (annotation_macro.length() > 0 && annotation_field >= 0
+      && field_index[annotation_field] != NULL_FIELD_INDEX) {
+    putc('.', fp);
+    put_string(annotation_macro, fp);
+    putc('\n', fp);
+    put_string(field[field_index[annotation_field]], fp);
+  }
+}
+
+void reference::print_sort_key_comment(FILE *fp)
+{
+  fputs(".\\\"", fp);
+  put_string(sort_key, fp);
+  putc('\n', fp);
+}
+
+const char *find_year(const char *start, const char *end, const char **endp)
+{
+  for (;;) {
+    while (start < end && !csdigit(*start))
+      start++;
+    const char *ptr = start;
+    if (start == end)
+      break;
+    while (ptr < end && csdigit(*ptr))
+      ptr++;
+    if (ptr - start == 4 || ptr - start == 3
+	|| (ptr - start == 2
+	    && (start[0] >= '4' || (start[0] == '3' && start[1] >= '2')))) {
+      *endp = ptr;
+      return start;
+    }
+    start = ptr;
+  }
+  return 0;
+}
+
+static const char *find_day(const char *start, const char *end,
+			    const char **endp)
+{
+  for (;;) {
+    while (start < end && !csdigit(*start))
+      start++;
+    const char *ptr = start;
+    if (start == end)
+      break;
+    while (ptr < end && csdigit(*ptr))
+      ptr++;
+    if ((ptr - start == 1 && start[0] != '0')
+	|| (ptr - start == 2 &&
+	    (start[0] == '1'
+	     || start[0] == '2'
+	     || (start[0] == '3' && start[1] <= '1')
+	     || (start[0] == '0' && start[1] != '0')))) {
+      *endp = ptr;
+      return start;
+    }
+    start = ptr;
+  }
+  return 0;
+}
+
+static int find_month(const char *start, const char *end)
+{
+  static const char *months[] = {
+    "january",
+    "february",
+    "march",
+    "april",
+    "may",
+    "june",
+    "july",
+    "august",
+    "september",
+    "october",
+    "november",
+    "december",
+  };
+  for (;;) {
+    while (start < end && !csalpha(*start))
+      start++;
+    const char *ptr = start;
+    if (start == end)
+      break;
+    while (ptr < end && csalpha(*ptr))
+      ptr++;
+    if (ptr - start >= 3) {
+      for (unsigned int i = 0; i < sizeof(months)/sizeof(months[0]); i++) {
+	const char *q = months[i];
+	const char *p = start;
+	for (; p < ptr; p++, q++)
+	  if (cmlower(*p) != *q)
+	    break;
+	if (p >= ptr)
+	  return i;
+      }
+    }
+    start = ptr;
+  }
+  return -1;
+}
+
+int reference::contains_field(char c) const
+{
+  return field_index[(unsigned char)c] != NULL_FIELD_INDEX;
+}
+
+int reference::classify()
+{
+  if (contains_field('J'))
+    return JOURNAL_ARTICLE;
+  if (contains_field('B'))
+    return ARTICLE_IN_BOOK;
+  if (contains_field('G'))
+    return TECH_REPORT;
+  if (contains_field('R'))
+    return TECH_REPORT;
+  if (contains_field('I'))
+    return BOOK;
+  if (contains_field('M'))
+    return BELL_TM;
+  return OTHER;
+}
+
+const char *reference::get_year(const char **endp) const
+{
+  if (field_index['D'] != NULL_FIELD_INDEX) {
+    string &date = field[field_index['D']];
+    const char *start = date.contents();
+    const char *end = start + date.length();
+    return find_year(start, end, endp);
+  }
+  else
+    return 0;
+}
+
+const char *reference::get_field(unsigned char c, const char **endp) const
+{
+  if (field_index[c] != NULL_FIELD_INDEX) {
+    string &f = field[field_index[c]];
+    const char *start = f.contents();
+    *endp = start + f.length();
+    return start;
+  }
+  else
+    return 0;
+}
+
+const char *reference::get_date(const char **endp) const
+{
+  return get_field('D', endp);
+}
+
+const char *nth_field(int i, const char *start, const char **endp)
+{
+  while (--i >= 0) {
+    start = (char *)memchr(start, FIELD_SEPARATOR, *endp - start);
+    if (!start)
+      return 0;
+    start++;
+  }
+  const char *e = (char *)memchr(start, FIELD_SEPARATOR, *endp - start);
+  if (e)
+    *endp = e;
+  return start;
+}
+
+const char *reference::get_author(int i, const char **endp) const
+{
+  for (const char *f = AUTHOR_FIELDS; *f != '\0'; f++) {
+    const char *start = get_field(*f, endp);
+    if (start) {
+      if (strchr(MULTI_FIELD_NAMES, *f) != 0)
+	return nth_field(i, start, endp);
+      else if (i == 0)
+	return start;
+      else
+	return 0;
+    }
+  }
+  return 0;
+}
+
+const char *reference::get_author_last_name(int i, const char **endp) const
+{
+  for (const char *f = AUTHOR_FIELDS; *f != '\0'; f++) {
+    const char *start = get_field(*f, endp);
+    if (start) {
+      if (strchr(MULTI_FIELD_NAMES, *f) != 0) {
+	start = nth_field(i, start, endp);
+	if (!start)
+	  return 0;
+      }
+      if (*f == 'A')
+	return find_last_name(start, *endp, endp);
+      else
+	return start;
+    }
+  }
+  return 0;
+}
+
+void reference::set_date(string &d)
+{
+  if (d.length() == 0)
+    delete_field('D');
+  else
+    insert_field('D', d);
+}
+
+int same_year(const reference &r1, const reference &r2)
+{
+  const char *ye1;
+  const char *ys1 = r1.get_year(&ye1);
+  const char *ye2;
+  const char *ys2 = r2.get_year(&ye2);
+  if (ys1 == 0) {
+    if (ys2 == 0)
+      return same_date(r1, r2);
+    else
+      return 0;
+  }
+  else if (ys2 == 0)
+    return 0;
+  else if (ye1 - ys1 != ye2 - ys2)
+    return 0;
+  else
+    return memcmp(ys1, ys2, ye1 - ys1) == 0;
+}
+
+int same_date(const reference &r1, const reference &r2)
+{
+  const char *e1;
+  const char *s1 = r1.get_date(&e1);
+  const char *e2;
+  const char *s2 = r2.get_date(&e2);
+  if (s1 == 0)
+    return s2 == 0;
+  else if (s2 == 0)
+    return 0;
+  else if (e1 - s1 != e2 - s2)
+    return 0;
+  else
+    return memcmp(s1, s2, e1 - s1) == 0;
+}
+
+const char *reference::get_sort_field(int i, int si, int ssi,
+				      const char **endp) const
+{
+  const char *start = sort_key.contents();
+  const char *end = start + sort_key.length();
+  if (i < 0) {
+    *endp = end;
+    return start;
+  }
+  while (--i >= 0) {
+    start = (char *)memchr(start, SORT_SEP, end - start);
+    if (!start)
+      return 0;
+    start++;
+  }
+  const char *e = (char *)memchr(start, SORT_SEP, end - start);
+  if (e)
+    end = e;
+  if (si < 0) {
+    *endp = end;
+    return start;
+  }
+  while (--si >= 0) {
+    start = (char *)memchr(start, SORT_SUB_SEP, end - start);
+    if (!start)
+      return 0;
+    start++;
+  }
+  e = (char *)memchr(start, SORT_SUB_SEP, end - start);
+  if (e)
+    end = e;
+  if (ssi < 0) {
+    *endp = end;
+    return start;
+  }
+  while (--ssi >= 0) {
+    start = (char *)memchr(start, SORT_SUB_SUB_SEP, end - start);
+    if (!start)
+      return 0;
+    start++;
+  }
+  e = (char *)memchr(start, SORT_SUB_SUB_SEP, end - start);
+  if (e)
+    end = e;
+  *endp = end;
+  return start;
+}
+
diff --git a/src/preproc/refer/ref.h b/src/preproc/refer/ref.h
new file mode 100644
index 0000000..1205a28
--- /dev/null
+++ b/src/preproc/refer/ref.h
@@ -0,0 +1,127 @@
+// -*- C++ -*-
+/* Copyright (C) 1989-2020 Free Software Foundation, Inc.
+     Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+// declarations to avoid friend name injection problems
+int compare_reference(const reference &, const reference &);
+int same_reference(const reference &, const reference &);
+int same_year(const reference &, const reference &);
+int same_date(const reference &, const reference &);
+int same_author_last_name(const reference &, const reference &, int);
+int same_author_name(const reference &, const reference &, int);
+
+struct label_info;
+
+enum label_type { NORMAL_LABEL, SHORT_LABEL };
+const int N_LABEL_TYPES = 2;
+
+struct substring_position {
+  int start;
+  int length;
+  substring_position() : start(-1) { }
+};
+
+class int_set {
+  string v;
+public:
+  int_set() { }
+  void set(int i);
+  int get(int i) const;
+};
+
+class reference {
+private:
+  unsigned h;
+  reference_id rid;
+  int merged;
+  string sort_key;
+  int no;
+  string *field;
+  int nfields;
+  unsigned char field_index[256];
+  enum { NULL_FIELD_INDEX = 255 };
+  string label;
+  substring_position separator_pos;
+  string short_label;
+  substring_position short_separator_pos;
+  label_info *label_ptr;
+  string authors;
+  int computed_authors;
+  int last_needed_author;
+  int nauthors;
+  int_set last_name_unambiguous;
+
+  int contains_field(char) const;
+  void insert_field(unsigned char, string &s);
+  void delete_field(unsigned char);
+  void set_date(string &);
+  const char *get_sort_field(int i, int si, int ssi, const char **endp) const;
+  int merge_labels_by_parts(reference **, int, label_type, string &);
+  int merge_labels_by_number(reference **, int, label_type, string &);
+public:
+  reference(const char * = 0, int = -1, reference_id * = 0);
+  ~reference();
+  void output(FILE *);
+  void print_sort_key_comment(FILE *);
+  void set_number(int);
+  int get_number() const { return no; }
+  unsigned hash() const { return h; }
+  const string &get_label(label_type type) const;
+  const substring_position &get_separator_pos(label_type) const;
+  int is_merged() const { return merged; }
+  void compute_sort_key();
+  void compute_hash_code();
+  void pre_compute_label();
+  void compute_label();
+  void immediate_compute_label();
+  int classify();
+  void merge(reference &);
+  int merge_labels(reference **, int, label_type, string &);
+  int get_nauthors() const;
+  void need_author(int);
+  void set_last_name_unambiguous(int);
+  void sortify_authors(int, string &) const;
+  void canonicalize_authors(string &) const;
+  void sortify_field(unsigned char, int, string &) const;
+  const char *get_author(int, const char **) const;
+  const char *get_author_last_name(int, const char **) const;
+  const char *get_date(const char **) const;
+  const char *get_year(const char **) const;
+  const char *get_field(unsigned char, const char **) const;
+  const label_info *get_label_ptr() const { return label_ptr; }
+  const char *get_authors(const char **) const;
+  // for sorting
+  friend int compare_reference(const reference &r1, const reference &r2);
+  // for merging
+  friend int same_reference(const reference &, const reference &);
+  friend int same_year(const reference &, const reference &);
+  friend int same_date(const reference &, const reference &);
+  friend int same_author_last_name(const reference &, const reference &, int);
+  friend int same_author_name(const reference &, const reference &, int);
+};
+
+const char *find_year(const char *, const char *, const char **);
+const char *find_last_name(const char *, const char *, const char **);
+
+const char *nth_field(int i, const char *start, const char **endp);
+
+void capitalize(const char *ptr, const char *end, string &result);
+void reverse_name(const char *ptr, const char *end, string &result);
+void uppercase(const char *ptr, const char *end, string &result);
+void lowercase(const char *ptr, const char *end, string &result);
+void abbreviate_name(const char *ptr, const char *end, string &result);
diff --git a/src/preproc/refer/refer.1.man b/src/preproc/refer/refer.1.man
new file mode 100644
index 0000000..210afe7
--- /dev/null
+++ b/src/preproc/refer/refer.1.man
@@ -0,0 +1,2020 @@
+.TH @g@refer @MAN1EXT@ "@MDATE@" "groff @VERSION@"
+.SH Name
+@g@refer \- process bibliographic references for
+.I groff
+.
+.
+.\" ====================================================================
+.\" Legal Terms
+.\" ====================================================================
+.\"
+.\" Copyright (C) 1989-2021 Free Software Foundation, Inc.
+.\"
+.\" Permission is granted to make and distribute verbatim copies of this
+.\" manual provided the copyright notice and this permission notice are
+.\" preserved on all copies.
+.\"
+.\" Permission is granted to copy and distribute modified versions of
+.\" this manual under the conditions for verbatim copying, provided that
+.\" the entire resulting derived work is distributed under the terms of
+.\" a permission notice identical to this one.
+.\"
+.\" Permission is granted to copy and distribute translations of this
+.\" manual into another language, under the above conditions for
+.\" modified versions, except that this permission notice may be
+.\" included in translations approved by the Free Software Foundation
+.\" instead of in the original English.
+.
+.
+.\" Save and disable compatibility mode (for, e.g., Solaris 10/11).
+.do nr *groff_refer_1_man_C \n[.cp]
+.cp 0
+.
+.\" Define fallback for groff 1.23's MR macro if the system lacks it.
+.nr do-fallback 0
+.if !\n(.f           .nr do-fallback 1 \" mandoc
+.if  \n(.g .if !d MR .nr do-fallback 1 \" older groff
+.if !\n(.g           .nr do-fallback 1 \" non-groff *roff
+.if \n[do-fallback]  \{\
+.  de MR
+.    ie \\n(.$=1 \
+.      I \%\\$1
+.    el \
+.      IR \%\\$1 (\\$2)\\$3
+.  .
+.\}
+.rr do-fallback
+.
+.
+.\" ====================================================================
+.SH Synopsis
+.\" ====================================================================
+.
+.SY @g@refer
+.RB [ \-bCenPRS ]
+.RB [ \-a\~\c
+.IR n ]
+.RB [ \-B
+.IB field . macro\c
+]
+.RB [ \-c\~\c
+.IR fields ]
+.RB [ \-f\~\c
+.IR n ]
+.RB [ \-i\~\c
+.IR fields ]
+.RB [ \-k\~\c
+.IR field ]
+.RB [ \-l\~\c
+.IR range-expression ]
+.RB [ \-p\~\c
+.IR database-file ]
+.RB [ \-s\~\c
+.IR fields ]
+.RB [ \-t\~\c
+.IR n ]
+.RI [ file\~ .\|.\|.]
+.YS
+.
+.
+.SY @g@refer
+.B \-\-help
+.YS
+.
+.
+.SY @g@refer
+.B \-v
+.
+.SY @g@refer
+.B \-\-version
+.YS
+.
+.
+.\" ====================================================================
+.SH Description
+.\" ====================================================================
+.
+The GNU implementation of
+.I \%refer \" generic
+is part of the
+.MR groff @MAN1EXT@
+document formatting system.
+.
+.I @g@refer
+is a
+.MR @g@troff @MAN1EXT@
+preprocessor that prepares bibilographic citations by looking up
+keywords specified in a
+.MR roff @MAN7EXT@
+input document,
+obviating the need to type such annotations,
+and permitting the citation style in formatted output to be altered
+independently and systematically.
+.
+It copies the contents of each
+.I file
+to the standard output stream,
+except that it interprets lines between
+.B .[
+and
+.B .]\&
+as citations to be translated into
+.I groff
+input,
+and lines between
+.B .R1
+and
+.B .R2
+as instructions regarding how citations are to be processed.
+.
+Normally,
+.I @g@refer
+is not executed directly by the user,
+but invoked by specifying the
+.B \-R
+option to
+.MR groff @MAN1EXT@ .
+.
+If no
+.I file
+operands are given on the command line,
+or if
+.I file
+is
+.RB \[lq] \- \[rq],
+the standard input stream is read.
+.
+.
+.LP
+Each citation specifies a reference.
+.
+The citation can specify a reference that is contained in a
+bibliographic database by giving a set of keywords that only that
+reference contains.
+.
+Alternatively it can specify a reference by supplying a database record
+in the citation.
+.
+A combination of these alternatives is also possible.
+.
+.
+.LP
+For each citation,
+.I @g@refer
+can produce a mark in the text.
+.
+This mark consists of some label which can be separated from the text
+and from other labels in various ways.
+.
+For each reference it also outputs
+.MR groff @MAN7EXT@
+language commands that can be used by a macro package to produce a
+formatted reference for each citation.
+.
+The output of
+.I @g@refer
+must therefore be processed using a suitable macro package,
+such as
+.\" .IR man ,
+.IR me ,
+.IR mm ,
+.IR mom ,
+or
+.IR ms .
+.
+The commands to format a citation's reference can be output immediately
+after the citation,
+or the references may be accumulated,
+and the commands output at some later point.
+.
+If the references are accumulated,
+then multiple citations of the same reference will produce a single
+formatted reference.
+.
+.
+.LP
+The interpretation of lines between
+.B .R1
+and
+.B .R2
+as prepreocessor commands is a feature of GNU
+.IR \%refer . \" GNU
+.
+Documents making use of this feature can still be processed by AT&T
+.I \%refer \" AT&T
+just by adding the lines
+.
+.RS
+.EX
+\&.de R1
+\&.ig R2
+\&..
+.EE
+.RE
+.
+to the beginning of the document.
+.
+This will cause
+.MR @g@troff @MAN1EXT@
+to ignore everything between
+.B .R1
+and
+.BR .R2 .
+.
+The effect of some commands can also be achieved by options.
+.
+These options are supported mainly for compatibility with AT&T
+.IR \%refer . \" AT&T
+.
+It is usually more convenient to use commands.
+.
+.
+.LP
+.I @g@refer
+generates
+.B .lf
+requests so that file names and line numbers in messages produced by
+commands that read
+.I @g@refer
+output will be correct;
+it also interprets lines beginning with
+.B .lf
+so that file names and line numbers in the messages and
+.B .lf
+lines that it produces will be accurate even if the input has been
+preprocessed by a command such as
+.MR @g@soelim @MAN1EXT@ .
+.
+.
+.\" ====================================================================
+.SS "Bibliographic databases"
+.\" ====================================================================
+.
+The bibliographic database is a text file consisting of records
+separated by one or more blank lines.
+.
+Within each record fields start with a
+.B %
+at the beginning of a line.
+.
+Each field has a one character name that immediately follows the
+.BR % .
+It is best to use only upper and lower case letters for the names
+of fields.
+.
+The name of the field should be followed by exactly one space,
+and then by the contents of the field.
+.
+Empty fields are ignored.
+.
+The conventional meaning of each field is as follows:
+.
+.
+.TP
+.B %A
+The name of an author.
+.
+If the name contains a suffix such as \[lq]Jr.\&\[rq],
+it should be separated from the last name by a comma.
+.
+There can be multiple occurrences of the
+.B %A
+field.
+.
+The order is significant.
+.
+It is a good idea always to supply an
+.B %A
+field or a
+.B %Q
+field.
+.
+.
+.TP
+.B %B
+For an article that is part of a book,
+the title of the book.
+.
+.
+.TP
+.B %C
+The place (city) of publication.
+.
+.
+.TP
+.B %D
+The date of publication.
+.
+The year should be specified in full.
+.
+If the month is specified,
+the name rather than the number of the month should be used,
+but only the first three letters are required.
+.
+It is a good idea always to supply a
+.B %D
+field;
+if the date is unknown,
+a value such as
+.B in press
+or
+.B unknown
+can be used.
+.
+.
+.TP
+.B %E
+For an article that is part of a book,
+the name of an editor of the book.
+.
+Where the work has editors and no authors,
+the names of the editors should be given as
+.B %A
+fields and
+.RB \[lq] ,\~(ed.)\& \[rq]
+or
+.RB \[lq] ,\~(eds.)\& \[rq]
+should be appended to the last author.
+.
+.
+.TP
+.B %G
+U.S. government ordering number.
+.
+.
+.TP
+.B %I
+The publisher (issuer).
+.
+.
+.TP
+.B %J
+For an article in a journal,
+the name of the journal.
+.
+.
+.TP
+.B %K
+Keywords to be used for searching.
+.
+.
+.TP
+.B %L
+Label.
+.
+.
+.TP
+.B %N
+Journal issue number.
+.
+.
+.TP
+.B %O
+Other information.
+.
+This is usually printed at the end of the reference.
+.
+.
+.TP
+.B %P
+Page number.
+.
+A range of pages can be specified as
+.IB m \- \c
+.IR n .
+.
+.
+.TP
+.B %Q
+The name of the author,
+if the author is not a person.
+.
+This will only be used if there are no
+.B %A
+fields.
+.
+There can only be one
+.B %Q
+field.
+.
+.
+.TP
+.B %R
+Technical report number.
+.
+.
+.TP
+.B %S
+Series name.
+.
+.
+.TP
+.B %T
+Title.
+.
+For an article in a book or journal,
+this should be the title of the article.
+.
+.
+.TP
+.B %V
+Volume number of the journal or book.
+.
+.
+.TP
+.B %X
+Annotation.
+.
+.
+.LP
+For all fields except
+.B %A
+and
+.BR %E ,
+if there is more than one occurrence of a particular field in a record,
+only the last such field will be used.
+.
+.
+.P
+If accent strings are used,
+they should follow the character to be accented.
+.
+This means that an
+.I ms
+document must call the
+.B .AM
+macro when it initializes.
+.
+Accent strings should not be quoted:
+use one
+.B \e
+rather than two.
+.
+Accent strings are an obsolescent feature of the
+.I me
+and
+.I ms
+macro packages;
+modern documents should use
+.I groff
+special character escape sequences instead;
+see
+.MR groff_char @MAN7EXT@ .
+.
+.
+.\" ====================================================================
+.SS Citations
+.\" ====================================================================
+.
+Citations have a characteristic format.
+.
+.RS
+.EX
+.BI .[ opening-text
+.I flags keywords
+.I fields
+.BI .] closing-text
+.EE
+.RE
+.
+.
+.LP
+The
+.IR opening-text ,
+.IR closing-text ,
+and
+.I flags
+components are optional.
+.
+Only one of the
+.I keywords
+and
+.I fields
+components need be specified.
+.
+.
+.LP
+The
+.I keywords
+component says to search the bibliographic databases for a reference
+that contains all the words in
+.IR keywords .
+.
+It is an error if more than one reference is found.
+.
+.
+.LP
+The
+.I fields
+components specifies additional fields to replace or supplement those
+specified in the reference.
+.
+When references are being accumulated and the
+.I keywords
+component is non-empty,
+then additional fields should be specified only on the first occasion
+that a particular reference is cited,
+and will apply to all citations of that reference.
+.
+.
+.br
+.ne 2v
+.LP
+The
+.I opening-text
+and
+.I closing-text
+components specify strings to be used to bracket the label instead of
+those in the
+.B \%bracket\-label
+command.
+.
+If either of these components is non-empty,
+the strings specified in the
+.B \%bracket\-label
+command will not be used;
+this behavior can be altered using the
+.B [
+and
+.B ]
+flags.
+.
+Leading and trailing spaces are significant for these components.
+.
+.
+.LP
+The
+.I flags
+component is a list of non-alphanumeric characters each of which
+modifies the treatment of this particular citation.
+.
+AT&T
+.I \%refer \" AT&T
+will treat these flags as part of the keywords and so will ignore them
+since they are non-alphanumeric.
+.
+The following flags are currently recognized.
+.
+.
+.TP
+.B #
+Use the label specified by the
+.B \%short\-label
+command,
+instead of that specified by the
+.B \%label
+command.
+.
+If no short label has been specified,
+the normal label will be used.
+.
+Typically the short label is used with author-date labels and consists
+of only the date and possibly a disambiguating letter;
+the
+.RB \[lq] # \[rq]
+is supposed to be suggestive of a numeric type of label.
+.
+.
+.TP
+.B [
+Precede
+.I opening-text
+with the first string specified in the
+.B \%bracket\-label
+command.
+.
+.
+.TP
+.B ]
+Follow
+.I closing-text
+with the second string specified in the
+.B \%bracket\-label
+command.
+.
+.
+.LP
+An advantage of using the
+.B [
+and
+.B ]
+flags rather than including the brackets in
+.I opening-text
+and
+.I closing-text
+is that
+.
+you can change the style of bracket used in the document just by
+changing the
+.B \%bracket\-label
+command.
+.
+Another is that sorting and merging of citations will not necessarily be
+inhibited if the flags are used.
+.
+.
+.LP
+If a label is to be inserted into the text,
+it will be attached to the line preceding the
+.B .[
+line.
+.
+If there is no such line,
+then an extra line will be inserted before the
+.B .[
+line and a warning will be given.
+.
+.
+.LP
+There is no special notation for making a citation to multiple
+references.
+.
+Just use a sequence of citations,
+one for each reference.
+.
+Don't put anything between the citations.
+.
+The labels for all the citations will be attached to the line preceding
+the first citation.
+.
+The labels may also be sorted or merged.
+.
+See the description of the
+.B <>
+label expression,
+and of the
+.B \%sort\-adjacent\-labels
+and
+.B \%abbreviate\-label\-ranges
+commands.
+.
+A label will not be merged if its citation has a non-empty
+.I opening-text
+or
+.IR closing-text .
+.
+However,
+the labels for a citation using the
+.B ]
+flag and without any
+.I closing-text
+immediately followed by a citation using the
+.B [
+flag and without any
+.I opening-text
+may be sorted and merged
+even though the first citation's
+.I opening-text
+or the second citation's
+.I closing-text
+is non-empty.
+.
+(If you wish to prevent this,
+use the dummy character escape sequence
+.B \[rs]&
+as the first citation's
+.IR closing-text .)
+.
+.
+.\" ====================================================================
+.SS Commands
+.\" ====================================================================
+.
+Commands are contained between lines starting with
+.B .R1
+and
+.BR .R2 .
+.
+Recognition of these lines can be prevented by the
+.B \-R
+option.
+.
+When a
+.B .R1
+line is recognized any accumulated references are flushed out.
+.
+Neither
+.B .R1
+nor
+.B .R2
+lines,
+nor anything between them,
+is output.
+.
+.
+.P
+Commands are separated by newlines or semicolons.
+.
+A number sign
+.RB ( # )
+introduces a comment that extends to the end of the line,
+but does not conceal the newline.
+.
+Each command is broken up into words.
+.
+Words are separated by spaces or tabs.
+.
+A word that begins with a (neutral) double quote
+.RB ( \[dq] )
+extends to the next double quote that is not followed by another double
+quote.
+.
+If there is no such double quote,
+the word extends to the end of the line.
+.
+Pairs of double quotes in a word beginning with a double quote collapse
+to one double quote.
+.
+Neither a number sign nor a semicolon is recognized inside double
+quotes.
+.
+A line can be continued by ending it with a backslash
+.RB \[lq] \[rs] \[rq];
+this works everywhere except after a number sign.
+.
+.
+.LP
+.ds n \fR*\fP\"
+Each command
+.I name
+that is marked with \*n has an associated negative command
+.BI no\- name
+that undoes the effect of
+.IR name .
+.
+For example,
+the
+.B no\-sort
+command specifies that references should not be sorted.
+.
+The negative commands take no arguments.
+.
+.
+.LP
+In the following description each argument must be a single word;
+.I field
+is used for a single upper or lower case letter naming a field;
+.I fields
+is used for a sequence of such letters;
+.I m
+and
+.I n
+are used for a non-negative numbers;
+.I string
+is used for an arbitrary string;
+.I file
+is used for the name of a file.
+.
+.
+.TP
+.BI abbreviate\*n\~ fields\~string1\~string2\~string3\~string4
+Abbreviate the first names of
+.IR fields .
+.
+An initial letter will be separated from another initial letter by
+.IR string1 ,
+from the last name by
+.IR string2 ,
+and from anything else
+(such as \[lq]von\[rq] or \[lq]de\[rq])
+by
+.IR string3 .
+.
+These default to a period followed by a space.
+.
+In a hyphenated first name,
+the initial of the first part of the name will be separated from the
+hyphen by
+.IR string4 ;
+this defaults to a period.
+.
+No attempt is made to handle any ambiguities that might
+result from abbreviation.
+.
+Names are abbreviated before sorting and before label construction.
+.
+.
+.TP
+.BI abbreviate\-label\-ranges\*n\~ string
+.
+Three or more adjacent labels that refer to consecutive references
+will be abbreviated to a label consisting of the first label,
+followed by
+.IR string ,
+followed by the last label.
+.
+This is mainly useful with numeric labels.
+.
+If
+.I string
+is omitted,
+it defaults to
+.RB \[lq] \- \[rq].
+.
+.
+.TP
+.B accumulate\*n
+Accumulate references instead of writing out each reference
+as it is encountered.
+.
+Accumulated references will be written out whenever a reference
+of the form
+.
+.RS
+.RS
+.EX
+.B .[
+.B $LIST$
+.B .]
+.EE
+.RE
+.
+is encountered,
+after all input files have been processed,
+and whenever a
+.B .R1
+line is recognized.
+.RE
+.
+.
+.TP
+.BI annotate\*n\~ "field string"
+.I field
+is an annotation;
+print it at the end of the reference as a paragraph preceded by the line
+.
+.RS
+.IP
+.BI . string
+.
+.
+.LP
+If
+.I string
+is omitted,
+it will default to
+.BR AP ;
+if
+.I field
+is also omitted it will default to
+.BR X .
+.
+Only one field can be an annotation.
+.RE
+.
+.
+.TP
+.BI articles\~ string\~\c
+\&.\|.\|.
+Each
+.I string
+is a definite or indefinite article,
+and should be ignored at the beginning of
+.B T
+fields when sorting.
+.
+Initially,
+\[lq]a\[rq],
+\[lq]an\[rq],
+and
+\[lq]the\[rq] are recognized as articles.
+.
+.
+.TP
+.BI bibliography\~ file\~\c
+\&.\|.\|.
+.
+Write out all the references contained in each bibliographic database
+.IR file .
+.
+This command should come last in an
+.BR .R1 / .R2
+block.
+.
+.
+.TP
+.BI bracket\-label\~ "string1 string2 string3"
+In the text,
+bracket each label with
+.I string1
+and
+.IR string2 .
+.
+An occurrence of
+.I string2
+immediately followed by
+.I string1
+will be turned into
+.IR string3 .
+.
+The default behavior is as follows.
+.
+.RS \" RS twice to get inboard of the tagged paragraph indentation.
+.RS
+.EX
+.B bracket\-label \e*([. \e*(.] \[dq], \[dq]
+.EE
+.RE
+.RE
+.
+.
+.TP
+.BI capitalize\~ fields
+Convert
+.I fields
+to caps and small caps.
+.
+.
+.TP
+.B compatible\*n
+Recognize
+.B .R1
+and
+.B .R2
+even when followed by a character other than space or newline.
+.
+.
+.TP
+.BI database\~ file\~\c
+\&.\|.\|.
+Search each bibliographic database
+.IR file .
+.
+For each
+.IR file ,
+if an index
+.RI file @INDEX_SUFFIX@
+created by
+.MR @g@indxbib @MAN1EXT@
+exists,
+then it will be searched instead;
+each index can cover multiple databases.
+.
+.
+.TP
+.BI date\-as\-label\*n\~ string
+.I string
+is a label expression that specifies a string with which to replace the
+.B D
+field after constructing the label.
+.
+See subsection \[lq]Label expressions\[rq] below for a description of
+label expressions.
+.
+This command is useful if you do not want explicit labels in the
+reference list,
+but instead want to handle any necessary disambiguation by qualifying
+the date in some way.
+.
+The label used in the text would typically be some combination of the
+author and date.
+.
+In most cases you should also use the
+.B \%no\-label\-in\-reference
+command.
+.
+For example,
+.
+.RS \" RS twice to get inboard of the tagged paragraph indentation.
+.RS
+.EX
+.B date\-as\-label D.+yD.y%a*D.\-y
+.EE
+.RE
+.
+would attach a disambiguating letter to the year part of the
+.B D
+field in the reference.
+.RE
+.
+.
+.TP
+.B default\-database\*n
+The default database should be searched.
+.
+This is the default behavior,
+so the negative version of this command is more useful.
+.
+.I @g@refer
+determines whether the default database should be searched
+on the first occasion that it needs to do a search.
+.
+Thus a
+.B \%no\-default\-database
+command must be given before then,
+in order to be effective.
+.
+.
+.TP
+.BI discard\*n\~ fields
+When the reference is read,
+.I fields
+should be discarded;
+no string definitions for
+.I fields
+will be output.
+.
+Initially,
+.I fields
+are
+.BR XYZ .
+.
+.
+.TP
+.BI et\-al\*n\~ "string m n"
+Control use of
+.B et al.\&
+in the evaluation of
+.B @
+expressions in label expressions.
+.
+If the number of authors needed to make the author sequence unambiguous
+is
+.I u
+and the total number of authors is
+.I t
+then the last
+.IR t \|\-\| u
+authors will be replaced by
+.I string
+provided that
+.IR t \|\-\| u
+is not less than
+.I m
+and
+.I t
+is not less than
+.IR n .
+.
+The default behavior is as follows.
+.
+.RS \" RS twice to get inboard of the tagged paragraph indentation.
+.RS
+.EX
+.B et\-al \[dq] et al\[dq] 2 3
+.EE
+.RE
+.
+Note the absence of a dot from the end of the abbreviation,
+which is arguably not correct.
+.
+.RI ( "Et al" [.]
+is short for
+.IR "et alli" ,
+as
+.I etc.\&
+is short for
+.IR "et cetera".)
+.RE
+.
+.
+.TP
+.BI include\~ file
+Include
+.I file
+and interpret the contents as commands.
+.
+.
+.TP
+.BI join\-authors\~ "string1 string2 string3"
+Join multiple authors together with
+.IR string s.
+.
+When there are exactly two authors,
+they will be joined with
+.IR string1 .
+.
+When there are more than two authors,
+all but the last two will be joined with
+.IR string2 ,
+and the last two authors will be joined with
+.IR string3 .
+.
+If
+.I string3
+is omitted,
+it will default to
+.IR string1 ;
+if
+.I string2
+is also omitted it will also default to
+.IR string1 .
+.
+For example,
+.
+.RS
+.RS
+.EX
+join\-authors \[dq] and \[dq] \[dq], \[dq] \[dq], and \[dq]
+.EE
+.RE
+.
+will restore the default method for joining authors.
+.RE
+.
+.
+.TP
+.B label\-in\-reference\*n
+When outputting the reference,
+define the string
+.B [F
+to be the reference's label.
+.
+This is the default behavior,
+so the negative version of this command is more useful.
+.
+.
+.TP
+.B label\-in\-text\*n
+For each reference output a label in the text.
+.
+The label will be separated from the surrounding text as described in
+the
+.B \%bracket\-label
+command.
+.
+This is the default behavior,
+so the negative version of this command is more useful.
+.
+.
+.TP
+.BI label\~ string
+.I string
+is a label expression describing how to label each reference.
+.
+.
+.TP
+.BI separate\-label\-second\-parts\~ string
+When merging two-part labels,
+separate the second part of the second label from the first label with
+.IR string .
+.
+See the description of the
+.B <>
+label expression.
+.
+.
+.TP
+.B move\-punctuation\*n
+In the text,
+move any punctuation at the end of line past the label.
+.
+It is usually a good idea to give this command unless you are using
+superscripted numbers as labels.
+.
+.
+.TP
+.BI reverse\*n\~ string
+Reverse the fields whose names
+are in
+.IR string .
+.
+Each field name can be followed by a number which says how many such
+fields should be reversed.
+.
+If no number is given for a field,
+all such fields will be reversed.
+.
+.
+.TP
+.BI search\-ignore\*n\~ fields
+While searching for keys in databases for which no index exists,
+ignore the contents of
+.IR fields .
+.
+Initially,
+fields
+.B XYZ
+are ignored.
+.
+.
+.TP
+.BI search\-truncate\*n\~ n
+Only require the first
+.I n
+characters of keys to be given.
+.
+In effect when searching for a given key words in the database are
+truncated to the maximum of
+.I n
+and the length of the key.
+.
+Initially,
+.I n
+is\~6.
+.
+.
+.TP
+.BI short\-label\*n\~ string
+.I string
+is a label expression that specifies an alternative
+(usually shorter)
+style of label.
+.
+This is used when the
+.B #
+flag is given in the citation.
+.
+When using author-date style labels,
+the identity of the author or authors is sometimes clear from the
+context,
+and so it may be desirable to omit the author or authors from the label.
+.
+The
+.B \%short\-label
+command will typically be used to specify a label containing just
+a date and possibly a disambiguating letter.
+.
+.
+.TP
+.BI sort\*n\~ string
+Sort references according to
+.IR string .
+.
+References will automatically be accumulated.
+.
+.I string
+should be a list of field names,
+each followed by a number,
+indicating how many fields with the name should be used for sorting.
+.
+.RB \[lq] + \[rq]
+can be used to indicate that all the fields with the name should be
+used.
+.
+Also
+.B .\&
+can be used to indicate the references should be sorted using the
+(tentative) label.
+.
+(Subsection \[lq]Label expressions\[rq] below describes the concept of a
+tentative label.)
+.
+.
+.TP
+.B sort\-adjacent\-labels\*n
+Sort labels that are adjacent in the text according to their position
+in the reference list.
+.
+This command should usually be given if the
+.B \%abbreviate\-label\-ranges
+command has been given,
+or if the label expression contains a
+.B <>
+expression.
+.
+This will have no effect unless references are being accumulated.
+.
+.
+.\" ====================================================================
+.SS "Label expressions"
+.\" ====================================================================
+.
+Label expressions can be evaluated both normally and tentatively.
+.
+The result of normal evaluation is used for output.
+.
+The result of tentative evaluation,
+called the
+.IR "tentative label" ,
+is used to gather the information that normal evaluation needs to
+disambiguate the label.
+.
+Label expressions specified by the
+.B \%date\-as\-label
+and
+.B \%short\-label
+commands are not evaluated tentatively.
+.
+Normal and tentative evaluation are the same for all types of expression
+other than
+.BR @ ,
+.BR * ,
+and
+.B %
+expressions.
+.
+The description below applies to normal evaluation,
+except where otherwise specified.
+.
+.
+.TP
+.I field
+.TQ
+.I field\~n
+The
+.IR n -th
+part of
+.IR field .
+.
+If
+.I n
+is omitted,
+it defaults to\~1.
+.
+.
+.TP
+.BI \[aq] string \[aq]
+The characters in
+.I string
+literally.
+.
+.
+.TP
+.B @
+All the authors joined as specified by the
+.B \%join\-authors
+command.
+.
+The whole of each author's name will be used.
+.
+However,
+if the references are sorted by author
+(that is,
+the sort specification starts with
+.RB \[lq] A+ \[rq]),
+then authors' last names will be used instead,
+provided that this does not introduce ambiguity,
+and also an initial subsequence of the authors may be used instead of
+all the authors,
+again provided that this does not introduce ambiguity.
+.
+The use of only the last name for the
+.IR i -th
+author of some reference
+is considered to be ambiguous if
+there is some other reference,
+such that the first
+.IR i \|\-\|1
+authors of the references are the same,
+the
+.IR i -th
+authors are not the same,
+but the
+.IR i -th
+authors last names are the same.
+.
+A proper initial subsequence of the sequence of authors for some
+reference is considered to be ambiguous if there is a reference with
+some other sequence of authors which also has that subsequence as a
+proper initial subsequence.
+.
+When an initial subsequence of authors is used,
+the remaining authors are replaced by the string specified by the
+.B \%et\-al
+command;
+this command may also specify additional requirements that must be
+met before an initial subsequence can be used.
+.
+.B @
+tentatively evaluates to a canonical representation of the authors,
+such that authors that compare equally for sorting purpose will have
+the same representation.
+.
+.
+.TP
+.BI % n
+.TQ
+.B %a
+.TQ
+.B %A
+.TQ
+.B %i
+.TQ
+.B %I
+The serial number of the reference formatted according to the
+character following the
+.BR % .
+The serial number of a reference is\~1 plus the number of earlier
+references with same tentative label as this reference.
+.
+These expressions tentatively evaluate to an empty string.
+.
+.TP
+.IB expr *
+If there is another reference with the same tentative label as this
+reference,
+then
+.IR expr ,
+otherwise an empty string.
+.
+It tentatively evaluates to an empty string.
+.
+.
+.TP
+.IB expr + n
+.TQ
+.IB expr \- n
+The first
+.RB ( + )
+or last
+.RB ( \- )
+.I n
+upper or lower case letters or digits of
+.IR expr .
+.
+.I roff
+special characters
+(such as
+.BR \e(\[aq]a )
+count as a single letter.
+.
+Accent strings are retained but do not count towards the total.
+.
+.
+.TP
+.IB expr .l
+.I expr
+converted to lowercase.
+.
+.
+.TP
+.IB expr .u
+.I expr
+converted to uppercase.
+.
+.
+.TP
+.IB expr .c
+.I expr
+converted to caps and small caps.
+.
+.
+.TP
+.IB expr .r
+.I expr
+reversed so that the last name is first.
+.
+.
+.TP
+.IB expr .a
+.I expr
+with first names abbreviated.
+.
+Fields specified in the
+.B \%abbreviate
+command are abbreviated before any labels are evaluated.
+.
+Thus
+.B .a
+is useful only when you want a field to be abbreviated in a label
+but not in a reference.
+.
+.
+.TP
+.IB expr .y
+The year part of
+.IR expr .
+.
+.
+.TP
+.IB expr .+y
+The part of
+.I expr
+before the year,
+or the whole of
+.I expr
+if it does not contain a year.
+.
+.
+.TP
+.IB expr .\-y
+The part of
+.I expr
+after the year,
+or an empty string if
+.I expr
+does not contain a year.
+.
+.
+.TP
+.IB expr .n
+The last name part of
+.IR expr .
+.
+.
+.TP
+.IB expr1 \[ti] expr2
+.I expr1
+except that if the last character of
+.I expr1
+is
+.B \-
+then it will be replaced by
+.IR expr2 .
+.
+.
+.TP
+.I expr1 expr2
+The concatenation of
+.I expr1
+and
+.IR expr2 .
+.
+.
+.TP
+.IB expr1 | expr2
+If
+.I expr1
+is non-empty then
+.I expr1
+otherwise
+.IR expr2 .
+.
+.
+.TP
+.IB expr1 & expr2
+If
+.I expr1
+is non-empty
+then
+.I expr2
+otherwise an empty string.
+.
+.
+.TP
+.IB expr1 ? expr2 : expr3
+If
+.I expr1
+is non-empty
+then
+.I expr2
+otherwise
+.IR expr3 .
+.
+.
+.TP
+.BI < expr >
+The label is in two parts,
+which are separated by
+.IR expr .
+.
+Two adjacent two-part labels which have the same first part will be
+merged by appending the second part of the second label onto the first
+label separated by the string specified in the
+.B \%separate\-label\-second\-parts
+command
+(initially,
+a comma followed by a space);
+the resulting label will also be a two-part label with the same first
+part as before merging,
+and so additional labels can be merged into it.
+.
+It is permissible for the first part to be empty;
+this may be desirable for expressions used in the
+.B \%short\-label
+command.
+.
+.
+.TP
+.BI ( expr )
+The same as
+.IR expr .
+.
+Used for grouping.
+.
+.
+.LP
+The above expressions are listed in order of precedence
+(highest first);
+.B &
+and
+.B |
+have the same precedence.
+.
+.
+.\" ====================================================================
+.SS "Macro interface"
+.\" ====================================================================
+.
+Each reference starts with a call to the macro
+.BR ]\- .
+.
+The string
+.B [F
+will be defined to be the label for this reference,
+unless the
+.B \%no\-label\-in\-reference
+command has been given.
+.
+There then follows a series of string definitions,
+one for each field:
+string
+.BI [ X
+corresponds to field
+.IR X .
+.
+The register
+.B [P
+is set to\~1 if the
+.B P
+field contains a range of pages.
+.
+The
+.BR [T ,
+.B [A
+and
+.B [O
+registers are set to\~1 according as the
+.BR T ,
+.B A
+and
+.B O
+fields end with any of
+.B .?!\&
+(an end-of-sentence character).
+.
+The
+.B [E
+register will be set to\~1 if the
+.B [E
+string contains more than one name.
+.
+The reference is followed by a call to the
+.B ][
+macro.
+.
+The first argument to this macro gives a number representing
+the type of the reference.
+.
+If a reference contains a
+.B J
+field,
+it will be classified as type\~1,
+otherwise if it contains a
+.B B
+field,
+it will be type\~3,
+otherwise if it contains a
+.B G
+or
+.B R
+field it will be type\~4,
+otherwise if it contains an
+.B I
+field it will be type\~2,
+otherwise it will be type\~0.
+.
+The second argument is a symbolic name for the type:
+.BR other ,
+.BR \%journal\-article ,
+.BR book ,
+.BR \%article\-in\-book ,
+or
+.BR \%tech\-report .
+.
+Groups of references that have been accumulated or are produced by the
+.B \%bibliography
+command are preceded by a call to the
+.B ]<
+macro and followed by a call to the
+.B ]>
+macro.
+.
+.
+.br
+.ne 4v
+.\" ====================================================================
+.SH Options
+.\" ====================================================================
+.
+.B \-\-help
+displays a usage message,
+while
+.B \-v
+and
+.B \-\-version
+show version information;
+all exit afterward.
+.
+.
+.TP
+.B \-R
+Don't recognize lines beginning with
+.BR .R1 / .R2 .
+.
+.
+.P
+Other options are equivalent to
+.I @g@refer
+commands.
+.
+.
+.TP 16n
+.BI \-a\~ n
+.B reverse
+.BI A n
+.
+.
+.TP
+.B \-b
+.B "\%no\-label\-in\-text; \%no\-label\-in\-reference"
+.
+.
+.TP
+.B \-B
+See below.
+.
+.
+.TP
+.BI \-c\~ fields
+.B capitalize
+.I fields
+.
+.
+.TP
+.B \-C
+.B compatible
+.
+.
+.TP
+.B \-e
+.B accumulate
+.
+.
+.TP
+.BI \-f\~ n
+.B \%label
+.BI % n
+.
+.
+.TP
+.BI \-i\~ fields
+.B search\-ignore
+.I fields
+.
+.
+.TP
+.B \-k
+.B \%label
+.B L\[ti]%a
+.
+.
+.TP
+.BI \-k\~ field
+.B \%label
+.IB field \[ti]%a
+.
+.
+.TP
+.B \-l
+.B \%label
+.B A.nD.y%a
+.
+.
+.TP
+.BI \-l\~ m
+.B \%label
+.BI A.n+ m D.y%a
+.
+.
+.TP
+.BI \-l\~, n
+.B \%label
+.BI A.nD.y\- n %a
+.
+.
+.TP
+.BI \-l\~ m , n
+.B \%label
+.BI A.n+ m D.y\- n %a
+.
+.
+.TP
+.B \-n
+.B \%no\-default\-database
+.
+.
+.TP
+.BI \-p\~ db-file
+.B database
+.I db-file
+.
+.
+.TP
+.B \-P
+.B move\-punctuation
+.
+.
+.TP
+.BI \-s\~ spec
+.B sort
+.I spec
+.
+.
+.TP
+.B \-S
+.B \%label \[dq](A.n|Q) \[aq], \[aq] (D.y|D)\[dq]; \
+\%bracket-\%label \[dq]\~(\[dq]\~)\~\[dq];\~\[dq]
+.
+.
+.TP
+.BI \-t\~ n
+.B search\-truncate
+.I n
+.
+.
+.P
+The
+.B B
+option has command equivalents with the addition that the file names
+specified on the command line are processed as if they were arguments to
+the
+.B \%bibliography
+command instead of in the normal way.
+.
+.
+.TP 16n
+.B \-B
+.B "annotate X AP; \%no\-label\-in\-reference"
+.
+.
+.TP
+.BI \-B\~ field . macro
+.B annotate
+.I field
+.IB macro ;
+.B \%no\-label\-in\-reference
+.
+.
+.\" ====================================================================
+.SH Environment
+.\" ====================================================================
+.
+.TP
+.I REFER
+If set,
+overrides the default database.
+.
+.
+.\" ====================================================================
+.SH Files
+.\" ====================================================================
+.
+.TP
+.I @DEFAULT_INDEX@
+Default database.
+.
+.
+.TP
+.RI file @INDEX_SUFFIX@
+Index files.
+.
+.
+.TP
+.I @MACRODIR@/\:refer\:.tmac
+defines macros and strings facilitating integration with macro packages
+that wish to support
+.IR @g@refer .
+.
+.
+.LP
+.I @g@refer
+uses temporary files.
+.
+See the
+.MR groff @MAN1EXT@
+man page for details of where such files are created.
+.
+.
+.\" ====================================================================
+.SH Bugs
+.\" ====================================================================
+.
+In label expressions,
+.B <>
+expressions are ignored inside
+.BI . char
+expressions.
+.
+.
+.\" ====================================================================
+.SH Examples
+.\" ====================================================================
+.
+We can illustrate the operation of
+.I @g@refer
+with a sample bibliographic database containing one entry and a simple
+.I roff
+document to cite that entry.
+.
+.
+.P
+.RS
+.EX
+$ \c
+.B cat > my\-db\-file
+.B %A Daniel P.\[rs]& Friedman
+.B %A Matthias Felleisen
+.B %C Cambridge, Massachusetts
+.B %D 1996
+.B %I The MIT Press
+.B %T The Little Schemer, Fourth Edition
+$ \c
+.B refer -p my\-db\-file
+.B Read the book
+.B .[
+.B friedman
+.B .]
+.B on your summer vacation.
+.I <Control+D>
+\&.lf 1 \-
+Read the book\[rs]*([.1\[rs]*(.]
+\&.ds [F 1
+\&.]\-
+\&.ds [A Daniel P. Friedman and Matthias Felleisen
+\&.ds [C Cambridge, Massachusetts
+\&.ds [D 1996
+\&.ds [I The MIT Press
+\&.ds [T The Little Schemer, Fourth Edition
+\&.nr [T 0
+\&.nr [A 0
+\&.][ 2 book
+\&.lf 5 \-
+on your summer vacation.
+.EE
+.RE
+.
+.
+.P
+The foregoing shows us that
+.I @g@refer
+(a) produces a label \[lq]1\[rq];
+(b) brackets that label with interpolations of the
+.RB \[lq] [. \[rq]
+and
+.RB \[lq] .] \[rq]
+strings;
+(c) calls a macro
+.RB \[lq] ]\- \[rq];
+(d) defines strings and registers containing the label and bibliographic
+data for the reference;
+(e) calls a macro
+.RB \[lq] ][ \[rq];
+and (f) uses the
+.B lf
+request to restore the line numbers of the original input.
+.
+As discussed in subsection \[lq]Macro interface\[rq] above,
+it is up to the document or a macro package to employ and format this
+information usefully.
+.
+Let us see how we might turn
+.MR groff_ms @MAN7EXT@
+to this task.
+.
+.
+.P
+.RS
+.EX
+$ \c
+.B REFER=my\-db\-file groff \-R \-ms
+.B .LP
+.B Read the book
+.B .[
+.B friedman
+.B .]
+.B on your summer vacation.
+.B Commentary is available.\[rs]*{*\[rs]*}
+.B .FS \[rs]*{*\[rs]*}
+.B Space reserved for penetrating insight.
+.B .FE
+.EE
+.RE
+.
+.
+.LP
+.IR ms 's
+automatic footnote numbering mechanism is not aware of
+.IR @g@refer 's
+label numbering,
+so we have manually specified a (superscripted) symbolic footnote for
+our non-bibliographic aside.
+.
+.
+.\" ====================================================================
+.SH "See also"
+.\" ====================================================================
+.
+\[lq]Some Applications of Inverted Indexes on the Unix System\[rq],
+by M.\& E.\& Lesk,
+1978,
+AT&T Bell Laboratories Computing Science Technical Report No.\& 69.
+.
+.
+.LP
+.MR @g@indxbib @MAN1EXT@ ,
+.MR @g@lookbib @MAN1EXT@ ,
+.MR lkbib @MAN1EXT@
+.
+.
+.\" Restore compatibility mode (for, e.g., Solaris 10/11).
+.cp \n[*groff_refer_1_man_C]
+.do rr *groff_refer_1_man_C
+.
+.
+.\" Local Variables:
+.\" fill-column: 72
+.\" mode: nroff
+.\" End:
+.\" vim: set filetype=groff textwidth=72:
diff --git a/src/preproc/refer/refer.am b/src/preproc/refer/refer.am
new file mode 100644
index 0000000..273f334
--- /dev/null
+++ b/src/preproc/refer/refer.am
@@ -0,0 +1,61 @@
+# Copyright (C) 2014-2020 Free Software Foundation, Inc.
+#
+# This file is part of groff.
+#
+# groff is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# groff is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+prefixexecbin_PROGRAMS += refer
+refer_CPPFLAGS = $(AM_CPPFLAGS) -I $(top_srcdir)/src/preproc/refer
+refer_LDADD = libbib.a libgroff.a $(LIBM) lib/libgnu.a
+refer_SOURCES =  \
+  src/preproc/refer/command.cpp \
+  src/preproc/refer/ref.cpp \
+  src/preproc/refer/refer.cpp \
+  src/preproc/refer/token.cpp \
+  src/preproc/refer/label.ypp \
+  src/preproc/refer/refer.h \
+  src/preproc/refer/ref.h \
+  src/preproc/refer/token.h \
+  src/preproc/refer/command.h
+
+PREFIXMAN1 += src/preproc/refer/refer.1
+EXTRA_DIST += \
+  src/preproc/refer/TODO \
+  src/preproc/refer/refer.1.man
+
+# Since refer_CPPFLAGS was set, all .o files have a 'refer-' prefix.
+src/preproc/refer/refer-command.$(OBJEXT): defs.h
+src/preproc/refer/refer-ref.$(OBJEXT): defs.h
+src/preproc/refer/refer-refer.$(OBJEXT): defs.h
+src/preproc/refer/refer-token.$(OBJEXT): defs.h
+src/preproc/refer/refer-label.$(OBJEXT): defs.h
+
+MAINTAINERCLEANFILES += \
+  src/preproc/refer/label.cpp \
+  src/preproc/refer/label.hpp \
+  src/preproc/refer/label.output
+
+refer_TESTS = \
+  src/preproc/refer/tests/report-correct-line-numbers.sh
+TESTS += $(refer_TESTS)
+EXTRA_DIST += \
+  $(refer_TESTS) \
+  src/preproc/refer/tests/artifacts/62124.bib
+
+
+# Local Variables:
+# fill-column: 72
+# mode: makefile-automake
+# End:
+# vim: set autoindent filetype=automake textwidth=72:
diff --git a/src/preproc/refer/refer.cpp b/src/preproc/refer/refer.cpp
new file mode 100644
index 0000000..a5c291e
--- /dev/null
+++ b/src/preproc/refer/refer.cpp
@@ -0,0 +1,1267 @@
+/* Copyright (C) 1989-2020 Free Software Foundation, Inc.
+     Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include "refer.h"
+#include "refid.h"
+#include "ref.h"
+#include "token.h"
+#include "search.h"
+#include "command.h"
+
+extern "C" const char *Version_string;
+
+const char PRE_LABEL_MARKER = '\013';
+const char POST_LABEL_MARKER = '\014';
+const char LABEL_MARKER = '\015'; // label_type is added on
+
+#define FORCE_LEFT_BRACKET 04
+#define FORCE_RIGHT_BRACKET 010
+
+static FILE *outfp = stdout;
+
+string capitalize_fields;
+string reverse_fields;
+string abbreviate_fields;
+string period_before_last_name = ". ";
+string period_before_initial = ".";
+string period_before_hyphen = "";
+string period_before_other = ". ";
+string sort_fields;
+int annotation_field = -1;
+string annotation_macro;
+string discard_fields = "XYZ";
+string pre_label = "\\*([.";
+string post_label = "\\*(.]";
+string sep_label = ", ";
+int have_bibliography = 0;
+int accumulate = 0;
+int move_punctuation = 0;
+int abbreviate_label_ranges = 0;
+string label_range_indicator;
+int label_in_text = 1;
+int label_in_reference = 1;
+int date_as_label = 0;
+int sort_adjacent_labels = 0;
+// Join exactly two authors with this.
+string join_authors_exactly_two = " and ";
+// When there are more than two authors join the last two with this.
+string join_authors_last_two = ", and ";
+// Otherwise join authors with this.
+string join_authors_default = ", ";
+string separate_label_second_parts = ", ";
+// Use this string to represent that there are other authors.
+string et_al = " et al";
+// Use et al only if it can replace at least this many authors.
+int et_al_min_elide = 2;
+// Use et al only if the total number of authors is at least this.
+int et_al_min_total = 3;
+
+
+int compatible_flag = 0;
+
+int short_label_flag = 0;
+
+static bool recognize_R1_R2 = true;
+
+search_list database_list;
+int search_default = 1;
+static int default_database_loaded = 0;
+
+static reference **citation = 0;
+static int ncitations = 0;
+static int citation_max = 0;
+
+static reference **reference_hash_table = 0;
+static int hash_table_size;
+static int nreferences = 0;
+
+static int need_syncing = 0;
+string pending_line;
+string pending_lf_lines;
+
+static void output_pending_line();
+static unsigned immediately_handle_reference(const string &);
+static void immediately_output_references();
+static unsigned store_reference(const string &);
+static void divert_to_temporary_file();
+static reference *make_reference(const string &, unsigned *);
+static void usage(FILE *stream);
+static void do_file(const char *);
+static void split_punct(string &line, string &punct);
+static void output_citation_group(reference **v, int n, label_type,
+				  FILE *fp);
+static void possibly_load_default_database();
+
+int main(int argc, char **argv)
+{
+  program_name = argv[0];
+  static char stderr_buf[BUFSIZ];
+  setbuf(stderr, stderr_buf);
+  outfp = stdout;
+  int finished_options = 0;
+  int bib_flag = 0;
+  int done_spec = 0;
+
+  // TODO: Migrate to getopt_long; see, e.g., src/preproc/eqn/main.cpp.
+  for (--argc, ++argv;
+       !finished_options && argc > 0 && argv[0][0] == '-'
+       && argv[0][1] != '\0';
+       argv++, argc--) {
+    const char *opt = argv[0] + 1;
+    while (opt != 0 && *opt != '\0') {
+      switch (*opt) {
+      case 'C':
+	compatible_flag = 1;
+	opt++;
+	break;
+      case 'B':
+	bib_flag = 1;
+	label_in_reference = 0;
+	label_in_text = 0;
+	++opt;
+	if (*opt == '\0') {
+	  annotation_field = 'X';
+	  annotation_macro = "AP";
+	}
+	else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') {
+	  annotation_field = opt[0];
+	  annotation_macro = opt + 2;
+	}
+	opt = 0;
+	break;
+      case 'P':
+	move_punctuation = 1;
+	opt++;
+	break;
+      case 'R':
+	recognize_R1_R2 = false;
+	opt++;
+	break;
+      case 'S':
+	// Not a very useful spec.
+	set_label_spec("(A.n|Q)', '(D.y|D)");
+	done_spec = 1;
+	pre_label = " (";
+	post_label = ")";
+	sep_label = "; ";
+	opt++;
+	break;
+      case 'V':
+	do_verify = true;
+	opt++;
+	break;
+      case 'f':
+	{
+	  const char *num = 0;
+	  if (*++opt == '\0') {
+	    if (argc > 1) {
+	      num = *++argv;
+	      --argc;
+	    }
+	    else {
+	      error("'f' option requires an argument");
+	      usage(stderr);
+	      exit(1);
+	    }
+	  }
+	  else {
+	    num = opt;
+	    opt = 0;
+	  }
+	  const char *ptr;
+	  for (ptr = num; *ptr; ptr++)
+	    if (!csdigit(*ptr)) {
+	      error("invalid character '%1' in argument to 'f' option",
+		    *ptr);
+	      break;
+	    }
+	  if (*ptr == '\0') {
+	    string spec;
+	    spec = '%';
+	    spec += num;
+	    spec += '\0';
+	    set_label_spec(spec.contents());
+	    done_spec = 1;
+	  }
+	  break;
+	}
+      case 'b':
+	label_in_text = 0;
+	label_in_reference = 0;
+	opt++;
+	break;
+      case 'e':
+	accumulate = 1;
+	opt++;
+	break;
+      case 'c':
+	capitalize_fields = ++opt;
+	opt = 0;
+	break;
+      case 'k':
+	{
+	  char buf[5];
+	  if (csalpha(*++opt))
+	    buf[0] = *opt++;
+	  else {
+	    if (*opt != '\0')
+	      error("invalid field name '%1' in argument to 'k' option",
+		    *opt++);
+	    buf[0] = 'L';
+	  }
+	  buf[1] = '~';
+	  buf[2] = '%';
+	  buf[3] = 'a';
+	  buf[4] = '\0';
+	  set_label_spec(buf);
+	  done_spec = 1;
+	}
+	break;
+      case 'a':
+	{
+	  const char *ptr;
+	  for (ptr = ++opt; *ptr; ptr++)
+	    if (!csdigit(*ptr)) {
+	      error("'a' option argument must be an integer");
+	      break;
+	    }
+	  if (*ptr == '\0') {
+	    reverse_fields = 'A';
+	    reverse_fields += opt;
+	  }
+	  opt = 0;
+	}
+	break;
+      case 'i':
+	linear_ignore_fields = ++opt;
+	opt = 0;
+	break;
+      case 'l':
+	{
+	  char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a
+	  strcpy(buf, "A.n");
+	  if (*++opt != '\0' && *opt != ',') {
+	    char *ptr;
+	    long n = strtol(opt, &ptr, 10);
+	    if (n == 0 && ptr == opt) {
+	      error("invalid integer '%1' in 'l' option argument", opt);
+	      opt = 0;
+	      break;
+	    }
+	    if (n < 0)
+	      n = 0;
+	    opt = ptr;
+	    sprintf(strchr(buf, '\0'), "+%ld", n);
+	  }
+	  strcat(buf, "D.y");
+	  if (*opt == ',')
+	    opt++;
+	  if (*opt != '\0') {
+	    char *ptr;
+	    long n = strtol(opt, &ptr, 10);
+	    if (n == 0 && ptr == opt) {
+	      error("invalid integer '%1' in 'l' option argument", opt);
+	      opt = 0;
+	      break;
+	    }
+	    if (n < 0)
+	      n = 0;
+	    sprintf(strchr(buf, '\0'), "-%ld", n);
+	    opt = ptr;
+	    if (*opt != '\0')
+	      error("argument to 'l' option not of form 'm,n'");
+	  }
+	  strcat(buf, "%a");
+	  if (!set_label_spec(buf))
+	    assert(0 == "set_label_spec() failed");
+	  done_spec = 1;
+	}
+	break;
+      case 'n':
+	search_default = 0;
+	opt++;
+	break;
+      case 'p':
+	{
+	  const char *filename = 0;
+	  if (*++opt == '\0') {
+	    if (argc > 1) {
+	      filename = *++argv;
+	      argc--;
+	    }
+	    else {
+	      error("option 'p' requires an argument");
+	      usage(stderr);
+	      exit(1);
+	    }
+	  }
+	  else {
+	    filename = opt;
+	    opt = 0;
+	  }
+	  database_list.add_file(filename);
+	}
+	break;
+      case 's':
+	if (*++opt == '\0')
+	  sort_fields = "AD";
+	else {
+	  sort_fields = opt;
+	  opt = 0;
+	}
+	accumulate = 1;
+	break;
+      case 't':
+	{
+	  char *ptr;
+	  long n = strtol(opt, &ptr, 10);
+	  if (n == 0 && ptr == opt) {
+	    error("invalid integer '%1' in 't' option argument", opt);
+	    opt = 0;
+	    break;
+	  }
+	  if (n < 1)
+	    n = 1;
+	  linear_truncate_len = int(n);
+	  opt = ptr;
+	  break;
+	}
+      case '-':
+	if (opt[1] == '\0') {
+	  finished_options = 1;
+	  opt++;
+	  break;
+	}
+	if (strcmp(opt, "-version") == 0) {
+      case 'v':
+	  printf("GNU refer (groff) version %s\n", Version_string);
+	  exit(0);
+	  break;
+	}
+	if (strcmp(opt, "-help") == 0) {
+	  usage(stdout);
+	  exit(0);
+	  break;
+	}
+	// fall through
+      default:
+	error("unrecognized option '%1'", opt);
+	usage(stderr);
+	exit(1);
+	break;
+      }
+    }
+  }
+  if (!done_spec)
+    set_label_spec("%1");
+  if (argc <= 0) {
+    if (bib_flag)
+      do_bib("-");
+    else
+      do_file("-");
+  }
+  else {
+    for (int i = 0; i < argc; i++) {
+      if (bib_flag)
+	do_bib(argv[i]);
+      else
+	do_file(argv[i]);
+    }
+  }
+  if (accumulate)
+    output_references();
+  if (fflush(stdout) < 0)
+    fatal("output error: %1", strerror(errno));
+  return 0;
+}
+
+static void usage(FILE *stream)
+{
+  fprintf(stream,
+"usage: %s [-bCenPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N]"
+" [-p db-file] [-sXYZ] [-tN] [-Bl.m] [file ...]\n"
+"usage: %s {-v | --version}\n"
+"usage: %s --help\n",
+	  program_name, program_name, program_name);
+}
+
+static void possibly_load_default_database()
+{
+  if (search_default && !default_database_loaded) {
+    char *filename = getenv("REFER");
+    if (filename)
+      database_list.add_file(filename);
+    else
+      database_list.add_file(DEFAULT_INDEX, 1);
+    default_database_loaded = 1;
+  }
+}
+
+static bool is_list(const string &str)
+{
+  const char *start = str.contents();
+  const char *end = start + str.length();
+  while (end > start && csspace(end[-1]))
+    end--;
+  while (start < end && csspace(*start))
+    start++;
+  return end - start == 6 && memcmp(start, "$LIST$", 6) == 0;
+}
+
+static void do_file(const char *filename)
+{
+  FILE *fp;
+  if (strcmp(filename, "-") == 0) {
+    fp = stdin;
+  }
+  else {
+    errno = 0;
+    fp = fopen(filename, "r");
+    if (fp == 0) {
+      error("can't open '%1': %2", filename, strerror(errno));
+      return;
+    }
+  }
+  string fn(filename);
+  fn += '\0';
+  normalize_for_lf(fn);
+  current_filename = fn.contents();
+  fprintf(outfp, ".lf 1 %s\n", current_filename);
+  current_lineno = 1;
+  string line;
+  for (;;) {
+    line.clear();
+    for (;;) {
+      int c = getc(fp);
+      if (EOF == c) {
+	if (line.length() > 0)
+	  line += '\n';
+	break;
+      }
+      if (is_invalid_input_char(c))
+	error("invalid input character code %1", c);
+      else {
+	line += c;
+	if ('\n' == c)
+	  break;
+      }
+    }
+    int len = line.length();
+    if (len == 0)
+      break;
+    current_lineno++;
+    if (len >= 2 && line[0] == '.' && line[1] == '[') {
+      int start_lineno = current_lineno;
+      bool at_start_of_line = true;
+      string str;
+      string post;
+      string pre(line.contents() + 2, line.length() - 3);
+      for (;;) {
+	int c = getc(fp);
+	if (EOF == c) {
+	  error_with_file_and_line(current_filename, start_lineno,
+				   "missing '.]' line");
+	  break;
+	}
+	if (at_start_of_line)
+	  current_lineno++;
+	if (at_start_of_line && '.' == c) {
+	  int d = getc(fp);
+	  if (d == ']') {
+	    while ((d = getc(fp)) != '\n' && d != EOF) {
+	      if (is_invalid_input_char(d))
+		error("invalid input character code %1", d);
+	      else
+		post += d;
+	    }
+	    break;
+	  }
+	  if (d != EOF)
+	    ungetc(d, fp);
+	}
+	if (is_invalid_input_char(c))
+	  error("invalid input character code %1", c);
+	else
+	  str += c;
+	at_start_of_line = ('\n' == c);
+      }
+      if (is_list(str)) {
+	output_pending_line();
+	if (accumulate)
+	  output_references();
+	else
+	  error("found '$LIST$' but not accumulating references");
+      }
+      else {
+	unsigned flags = (accumulate
+			  ? store_reference(str)
+			  : immediately_handle_reference(str));
+	if (label_in_text) {
+	  if (accumulate && outfp == stdout)
+	    divert_to_temporary_file();
+	  if (pending_line.length() == 0) {
+	    warning("can't attach citation to previous line");
+	  }
+	  else
+	    pending_line.set_length(pending_line.length() - 1);
+	  string punct;
+	  if (move_punctuation)
+	    split_punct(pending_line, punct);
+	  int have_text = pre.length() > 0 || post.length() > 0;
+	  label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET
+					       |FORCE_RIGHT_BRACKET));
+	  if ((flags & FORCE_LEFT_BRACKET) || !have_text)
+	    pending_line += PRE_LABEL_MARKER;
+	  pending_line += pre;
+	  char lm = LABEL_MARKER + (int)lt;
+	  pending_line += lm;
+	  pending_line += post;
+	  if ((flags & FORCE_RIGHT_BRACKET) || !have_text)
+	    pending_line += POST_LABEL_MARKER;
+	  pending_line += punct;
+	  pending_line += '\n';
+	}
+      }
+      need_syncing = 1;
+    }
+    else if (len >= 4
+	     && '.' == line[0] && 'l' == line[1] && 'f' == line[2]
+	     && (compatible_flag || '\n' == line[3] || ' ' == line[3]))
+    {
+      pending_lf_lines += line;
+      line += '\0';
+      if (interpret_lf_args(line.contents() + 3))
+	current_lineno--;
+    }
+    else if (recognize_R1_R2
+	     && len >= 4
+	     && '.' == line[0] && 'R' == line[1] && '1' == line[2]
+	     && (compatible_flag || '\n' == line[3] || ' ' == line[3]))
+    {
+      line.clear();
+      int start_lineno = current_lineno;
+      bool at_start_of_line = true;
+      for (;;) {
+	int c = getc(fp);
+	if (c != EOF && at_start_of_line)
+	  current_lineno++;
+	if (at_start_of_line && '.' == c) {
+	  c = getc(fp);
+	  if ('R' == c) {
+	    c = getc(fp);
+	    if ('2' == c) {
+	      c = getc(fp);
+	      if (compatible_flag || ' ' == c || '\n' == c || EOF == c)
+	      {
+		while (c != EOF && c != '\n')
+		  c = getc(fp);
+		break;
+	      }
+	      else {
+		line += '.';
+		line += 'R';
+		line += '2';
+	      }
+	    }
+	    else {
+	      line += '.';
+	      line += 'R';
+	    }
+	  }
+	  else
+	    line += '.';
+	}
+	if (EOF == c) {
+	  error_with_file_and_line(current_filename, start_lineno,
+				   "missing '.R2' line");
+	  break;
+	}
+	if (is_invalid_input_char(c))
+	  error_with_file_and_line(current_filename, start_lineno,
+				   "invalid input character code %1",
+				   c);
+	else {
+	  line += c;
+	  at_start_of_line = ('\n' == c);
+	}
+      }
+      output_pending_line();
+      if (accumulate)
+	output_references();
+      else
+	nreferences = 0;
+      process_commands(line, current_filename, start_lineno + 1);
+      need_syncing = 1;
+    }
+    else {
+      output_pending_line();
+      pending_line = line;
+    }
+  }
+  need_syncing = 0;
+  output_pending_line();
+  if (fp != stdin)
+    fclose(fp);
+}
+
+class label_processing_state {
+  enum {
+    NORMAL,
+    PENDING_LABEL,
+    PENDING_LABEL_POST,
+    PENDING_LABEL_POST_PRE,
+    PENDING_POST
+    } state;
+  label_type type;		// type of pending labels
+  int count;			// number of pending labels
+  reference **rptr;		// pointer to next reference
+  int rcount;			// number of references left
+  FILE *fp;
+  int handle_pending(int c);
+public:
+  label_processing_state(reference **, int, FILE *);
+  ~label_processing_state();
+  void process(int c);
+};
+
+static void output_pending_line()
+{
+  if (label_in_text && !accumulate && ncitations > 0) {
+    label_processing_state state(citation, ncitations, outfp);
+    int len = pending_line.length();
+    for (int i = 0; i < len; i++)
+      state.process((unsigned char)(pending_line[i]));
+  }
+  else
+    put_string(pending_line, outfp);
+  pending_line.clear();
+  if (pending_lf_lines.length() > 0) {
+    put_string(pending_lf_lines, outfp);
+    pending_lf_lines.clear();
+  }
+  if (!accumulate)
+    immediately_output_references();
+  if (need_syncing) {
+    fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename);
+    need_syncing = 0;
+  }
+}
+
+static void split_punct(string &line, string &punct)
+{
+  const char *start = line.contents();
+  const char *end = start + line.length();
+  const char *ptr = start;
+  const char *last_token_start = 0;
+  for (;;) {
+    if (ptr >= end)
+      break;
+    last_token_start = ptr;
+    if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER
+	|| (*ptr >= LABEL_MARKER
+	    && *ptr < LABEL_MARKER + N_LABEL_TYPES))
+      ptr++;
+    else if (!get_token(&ptr, end))
+      break;
+  }
+  if (last_token_start) {
+    const token_info *ti = lookup_token(last_token_start, end);
+    if (ti->is_punct()) {
+      punct.append(last_token_start, end - last_token_start);
+      line.set_length(last_token_start - start);
+    }
+  }
+}
+
+static void divert_to_temporary_file()
+{
+  outfp = xtmpfile();
+}
+
+static void store_citation(reference *ref)
+{
+  if (ncitations >= citation_max) {
+    if (citation == 0)
+      citation = new reference*[citation_max = 100];
+    else {
+      reference **old_citation = citation;
+      citation_max *= 2;
+      citation = new reference *[citation_max];
+      memcpy(citation, old_citation, ncitations*sizeof(reference *));
+      delete[] old_citation;
+    }
+  }
+  citation[ncitations++] = ref;
+}
+
+static unsigned store_reference(const string &str)
+{
+  if (reference_hash_table == 0) {
+    reference_hash_table = new reference *[17];
+    hash_table_size = 17;
+    for (int i = 0; i < hash_table_size; i++)
+      reference_hash_table[i] = 0;
+  }
+  unsigned flags;
+  reference *ref = make_reference(str, &flags);
+  ref->compute_hash_code();
+  unsigned h = ref->hash();
+  reference **ptr;
+  for (ptr = reference_hash_table + (h % hash_table_size);
+       *ptr != 0;
+       ((ptr == reference_hash_table)
+	? (ptr = reference_hash_table + hash_table_size - 1)
+	: --ptr))
+    if (same_reference(**ptr, *ref))
+      break;
+  if (*ptr != 0) {
+    if (ref->is_merged())
+      warning("fields ignored because reference already used");
+    delete ref;
+    ref = *ptr;
+  }
+  else {
+    *ptr = ref;
+    ref->set_number(nreferences);
+    nreferences++;
+    ref->pre_compute_label();
+    ref->compute_sort_key();
+    if (nreferences*2 >= hash_table_size) {
+      // Rehash it.
+      reference **old_table = reference_hash_table;
+      int old_size = hash_table_size;
+      hash_table_size = next_size(hash_table_size);
+      reference_hash_table = new reference*[hash_table_size];
+      int i;
+      for (i = 0; i < hash_table_size; i++)
+	reference_hash_table[i] = 0;
+      for (i = 0; i < old_size; i++)
+	if (old_table[i]) {
+	  reference **p;
+	  for (p = (reference_hash_table
+		    + (old_table[i]->hash() % hash_table_size));
+	       *p;
+	       ((p == reference_hash_table)
+		? (p = reference_hash_table + hash_table_size - 1)
+		: --p))
+	    ;
+	  *p = old_table[i];
+	}
+      delete[] old_table;
+    }
+  }
+  if (label_in_text)
+    store_citation(ref);
+  return flags;
+}
+
+unsigned immediately_handle_reference(const string &str)
+{
+  unsigned flags;
+  reference *ref = make_reference(str, &flags);
+  ref->set_number(nreferences);
+  if (label_in_text || label_in_reference) {
+    ref->pre_compute_label();
+    ref->immediate_compute_label();
+  }
+  nreferences++;
+  store_citation(ref);
+  return flags;
+}
+
+static void immediately_output_references()
+{
+  for (int i = 0; i < ncitations; i++) {
+    reference *ref = citation[i];
+    if (label_in_reference) {
+      fputs(".ds [F ", outfp);
+      const string &label = ref->get_label(NORMAL_LABEL);
+      if (label.length() > 0
+	  && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
+	putc('"', outfp);
+      put_string(label, outfp);
+      putc('\n', outfp);
+    }
+    ref->output(outfp);
+    delete ref;
+  }
+  ncitations = 0;
+}
+
+static void output_citation_group(reference **v, int n, label_type type,
+				  FILE *fp)
+{
+  if (sort_adjacent_labels) {
+    // Do an insertion sort.  Usually n will be very small.
+    for (int i = 1; i < n; i++) {
+      int num = v[i]->get_number();
+      reference *temp = v[i];
+      int j;
+      for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--)
+	v[j + 1] = v[j];
+      v[j + 1] = temp;
+    }
+  }
+  // This messes up if !accumulate.
+  if (accumulate && n > 1) {
+    // remove duplicates
+    int j = 1;
+    for (int i = 1; i < n; i++)
+      if (v[i]->get_label(type) != v[i - 1]->get_label(type))
+	v[j++] = v[i];
+    n = j;
+  }
+  string merged_label;
+  for (int i = 0; i < n; i++) {
+    int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type,
+	merged_label);
+    if (nmerged > 0) {
+      put_string(merged_label, fp);
+      i += nmerged;
+    }
+    else
+      put_string(v[i]->get_label(type), fp);
+    if (i < n - 1)
+      put_string(sep_label, fp);
+  }
+}
+
+
+label_processing_state::label_processing_state(reference **p, int n,
+					       FILE *f)
+: state(NORMAL), count(0), rptr(p), rcount(n), fp(f)
+{
+}
+
+label_processing_state::~label_processing_state()
+{
+  int handled = handle_pending(EOF);
+  assert(!handled);
+  assert(rcount == 0);
+}
+
+int label_processing_state::handle_pending(int c)
+{
+  switch (state) {
+  case NORMAL:
+    break;
+  case PENDING_LABEL:
+    if (POST_LABEL_MARKER == c) {
+      state = PENDING_LABEL_POST;
+      return 1;
+    }
+    else {
+      output_citation_group(rptr, count, type, fp);
+      rptr += count ;
+      rcount -= count;
+      state = NORMAL;
+    }
+    break;
+  case PENDING_LABEL_POST:
+    if (PRE_LABEL_MARKER == c) {
+      state = PENDING_LABEL_POST_PRE;
+      return 1;
+    }
+    else {
+      output_citation_group(rptr, count, type, fp);
+      rptr += count;
+      rcount -= count;
+      put_string(post_label, fp);
+      state = NORMAL;
+    }
+    break;
+  case PENDING_LABEL_POST_PRE:
+    if (c >= LABEL_MARKER
+	&& c < LABEL_MARKER + N_LABEL_TYPES
+	&& c - LABEL_MARKER == type) {
+      count += 1;
+      state = PENDING_LABEL;
+      return 1;
+    }
+    else {
+      output_citation_group(rptr, count, type, fp);
+      rptr += count;
+      rcount -= count;
+      put_string(sep_label, fp);
+      state = NORMAL;
+    }
+    break;
+  case PENDING_POST:
+    if (PRE_LABEL_MARKER == c) {
+      put_string(sep_label, fp);
+      state = NORMAL;
+      return 1;
+    }
+    else {
+      put_string(post_label, fp);
+      state = NORMAL;
+    }
+    break;
+  }
+  return 0;
+}
+
+void label_processing_state::process(int c)
+{
+  if (handle_pending(c))
+    return;
+  assert(state == NORMAL);
+  switch (c) {
+  case PRE_LABEL_MARKER:
+    put_string(pre_label, fp);
+    state = NORMAL;
+    break;
+  case POST_LABEL_MARKER:
+    state = PENDING_POST;
+    break;
+  case LABEL_MARKER:
+  case LABEL_MARKER + 1:
+    count = 1;
+    state = PENDING_LABEL;
+    type = label_type(c - LABEL_MARKER);
+    break;
+  default:
+    state = NORMAL;
+    putc(c, fp);
+    break;
+  }
+}
+
+extern "C" {
+
+int rcompare(const void *p1, const void *p2)
+{
+  return compare_reference(**(reference **)p1, **(reference **)p2);
+}
+
+}
+
+void output_references()
+{
+  assert(accumulate);
+  if (!hash_table_size) {
+    if (have_bibliography)
+      error("nothing to reference (probably 'bibliography' before"
+	    " 'sort')");
+    accumulate = 0;
+    nreferences = 0;
+    return;
+  }
+  if (nreferences > 0) {
+    int j = 0;
+    int i;
+    for (i = 0; i < hash_table_size; i++)
+      if (reference_hash_table[i] != 0)
+	reference_hash_table[j++] = reference_hash_table[i];
+    assert(j == nreferences);
+    for (; j < hash_table_size; j++)
+      reference_hash_table[j] = 0;
+    qsort(reference_hash_table, nreferences, sizeof(reference*),
+	  rcompare);
+    for (i = 0; i < nreferences; i++)
+      reference_hash_table[i]->set_number(i);
+    compute_labels(reference_hash_table, nreferences);
+  }
+  if (outfp != stdout) {
+    rewind(outfp);
+    {
+      label_processing_state state(citation, ncitations, stdout);
+      int c;
+      while ((c = getc(outfp)) != EOF)
+	state.process(c);
+    }
+    ncitations = 0;
+    fclose(outfp);
+    outfp = stdout;
+  }
+  if (nreferences > 0) {
+    fputs(".]<\n", outfp);
+    for (int i = 0; i < nreferences; i++) {
+      if (sort_fields.length() > 0)
+	reference_hash_table[i]->print_sort_key_comment(outfp);
+      if (label_in_reference) {
+	fputs(".ds [F ", outfp);
+	const string &label
+	  = reference_hash_table[i]->get_label(NORMAL_LABEL);
+	if (label.length() > 0
+	    && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
+	  putc('"', outfp);
+	put_string(label, outfp);
+	putc('\n', outfp);
+      }
+      reference_hash_table[i]->output(outfp);
+      delete reference_hash_table[i];
+      reference_hash_table[i] = 0;
+    }
+    fputs(".]>\n", outfp);
+    nreferences = 0;
+  }
+  clear_labels();
+}
+
+static reference *find_reference(const char *query, int query_len)
+{
+  // This is so that error messages look better.
+  while (query_len > 0 && csspace(query[query_len - 1]))
+    query_len--;
+  string str;
+  for (int i = 0; i < query_len; i++)
+    str += query[i] == '\n' ? ' ' : query[i];
+  str += '\0';
+  possibly_load_default_database();
+  search_list_iterator iter(&database_list, str.contents());
+  reference_id rid;
+  const char *start;
+  int len;
+  if (!iter.next(&start, &len, &rid)) {
+    error("no matches for '%1'", str.contents());
+    return 0;
+  }
+  const char *end = start + len;
+  while (start < end) {
+    if (*start == '%')
+      break;
+    while (start < end && *start++ != '\n')
+      ;
+  }
+  if (start >= end) {
+    error("found a reference for '%1' but it didn't contain any fields",
+	  str.contents());
+    return 0;
+  }
+  reference *result = new reference(start, end - start, &rid);
+  if (iter.next(&start, &len, &rid))
+    warning("multiple matches for '%1'", str.contents());
+  return result;
+}
+
+static reference *make_reference(const string &str, unsigned *flagsp)
+{
+  const char *start = str.contents();
+  const char *end = start + str.length();
+  const char *ptr = start;
+  while (ptr < end) {
+    if (*ptr == '%')
+      break;
+    while (ptr < end && *ptr++ != '\n')
+      ;
+  }
+  *flagsp = 0;
+  for (; start < ptr; start++) {
+    if (*start == '#')
+      *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET
+					   | FORCE_LEFT_BRACKET)));
+    else if (*start == '[')
+      *flagsp |= FORCE_LEFT_BRACKET;
+    else if (*start == ']')
+      *flagsp |= FORCE_RIGHT_BRACKET;
+    else if (!csspace(*start))
+      break;
+  }
+  if (start >= end) {
+    error("empty reference");
+    return new reference;
+  }
+  reference *database_ref = 0;
+  if (start < ptr)
+    database_ref = find_reference(start, ptr - start);
+  reference *inline_ref = 0;
+  if (ptr < end)
+    inline_ref = new reference(ptr, end - ptr);
+  if (inline_ref) {
+    if (database_ref) {
+      database_ref->merge(*inline_ref);
+      delete inline_ref;
+      return database_ref;
+    }
+    else
+      return inline_ref;
+  }
+  else if (database_ref)
+    return database_ref;
+  else
+    return new reference;
+}
+
+static void do_ref(const string &str)
+{
+  if (accumulate)
+    (void)store_reference(str);
+  else {
+    (void)immediately_handle_reference(str);
+    immediately_output_references();
+  }
+}
+
+static void trim_blanks(string &str)
+{
+  const char *start = str.contents();
+  const char *end = start + str.length();
+  while (end > start && end[-1] != '\n' && csspace(end[-1]))
+    --end;
+  str.set_length(end - start);
+}
+
+void do_bib(const char *filename)
+{
+  FILE *fp;
+  if (strcmp(filename, "-") == 0)
+    fp = stdin;
+  else {
+    errno = 0;
+    fp = fopen(filename, "r");
+    if (fp == 0) {
+      error("can't open '%1': %2", filename, strerror(errno));
+      return;
+    }
+    current_filename = filename;
+  }
+  current_lineno = 1;
+  enum {
+    START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT
+    } state = START;
+  string body;
+  for (;;) {
+    int c = getc(fp);
+    if (EOF == c)
+      break;
+    if (is_invalid_input_char(c)) {
+      error("invalid input character code %1", c);
+      continue;
+    }
+    switch (state) {
+    case START:
+      if ('%' == c) {
+	body = c;
+	state = BODY;
+      }
+      else if (c != '\n')
+	state = MIDDLE;
+      break;
+    case MIDDLE:
+      if ('\n' == c)
+	state = START;
+      break;
+    case BODY:
+      body += c;
+      if ('\n' == c)
+	state = BODY_START;
+      break;
+    case BODY_START:
+      if ('\n' == c) {
+	do_ref(body);
+	state = START;
+      }
+      else if ('.' == c)
+	state = BODY_DOT;
+      else if (csspace(c)) {
+	state = BODY_BLANK;
+	body += c;
+      }
+      else {
+	body += c;
+	state = BODY;
+      }
+      break;
+    case BODY_BLANK:
+      if ('\n' == c) {
+	trim_blanks(body);
+	do_ref(body);
+	state = START;
+      }
+      else if (csspace(c))
+	body += c;
+      else {
+	body += c;
+	state = BODY;
+      }
+      break;
+    case BODY_DOT:
+      if (']' == c) {
+	do_ref(body);
+	state = MIDDLE;
+      }
+      else {
+	body += '.';
+	body += c;
+	state = ('\n' == c) ? BODY_START : BODY;
+      }
+      break;
+    default:
+      assert(0 == "unhandled case while parsing bibliography file");
+    }
+    if ('\n' == c)
+      current_lineno++;
+  }
+  switch (state) {
+  case START:
+  case MIDDLE:
+    break;
+  case BODY:
+    body += '\n';
+    do_ref(body);
+    break;
+  case BODY_DOT:
+  case BODY_START:
+    do_ref(body);
+    break;
+  case BODY_BLANK:
+    trim_blanks(body);
+    do_ref(body);
+    break;
+  }
+  fclose(fp);
+}
+
+// from the Dragon Book
+
+unsigned hash_string(const char *s, int len)
+{
+  const char *end = s + len;
+  unsigned h = 0, g;
+  while (s < end) {
+    h <<= 4;
+    h += *s++;
+    if ((g = h & 0xf0000000) != 0) {
+      h ^= g >> 24;
+      h ^= g;
+    }
+  }
+  return h;
+}
+
+int next_size(int n)
+{
+  static const int table_sizes[] = {
+    101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009,
+    80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009,
+    16000057, 32000011, 64000031, 128000003, 0
+  };
+
+  const int *p;
+  for (p = table_sizes; *p <= n && *p != 0; p++)
+    ;
+  assert(*p != 0);
+  return *p;
+}
+
+// Local Variables:
+// fill-column: 72
+// mode: C++
+// End:
+// vim: set cindent noexpandtab shiftwidth=2 textwidth=72:
diff --git a/src/preproc/refer/refer.h b/src/preproc/refer/refer.h
new file mode 100644
index 0000000..3ebff27
--- /dev/null
+++ b/src/preproc/refer/refer.h
@@ -0,0 +1,81 @@
+/* Copyright (C) 1989-2020 Free Software Foundation, Inc.
+     Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include "lib.h"
+
+#include <stdlib.h>
+#include <errno.h>
+
+#include "errarg.h"
+#include "error.h"
+#include "stringclass.h"
+#include "cset.h"
+#include "cmap.h"
+#include "lf.h"
+
+#include "defs.h"
+
+unsigned hash_string(const char *, int);
+int next_size(int);
+
+extern string capitalize_fields;
+extern string reverse_fields;
+extern string abbreviate_fields;
+extern string period_before_last_name;
+extern string period_before_initial;
+extern string period_before_hyphen;
+extern string period_before_other;
+extern string sort_fields;
+extern int annotation_field;
+extern string annotation_macro;
+extern string discard_fields;
+extern string articles;
+extern int abbreviate_label_ranges;
+extern string label_range_indicator;
+extern int date_as_label;
+extern string join_authors_exactly_two;
+extern string join_authors_last_two;
+extern string join_authors_default;
+extern string separate_label_second_parts;
+extern string et_al;
+extern int et_al_min_elide;
+extern int et_al_min_total;
+
+extern int compatible_flag;
+
+extern int set_label_spec(const char *);
+extern int set_date_label_spec(const char *);
+extern int set_short_label_spec(const char *);
+
+extern int short_label_flag;
+
+void clear_labels();
+void command_error(const char *,
+		   const errarg &arg1 = empty_errarg,
+		   const errarg &arg2 = empty_errarg,
+		   const errarg &arg3 = empty_errarg);
+
+class reference;
+
+void compute_labels(reference **, int);
+
+// Local Variables:
+// fill-column: 72
+// mode: C++
+// End:
+// vim: set cindent noexpandtab shiftwidth=2 textwidth=72:
diff --git a/src/preproc/refer/tests/artifacts/62124.bib b/src/preproc/refer/tests/artifacts/62124.bib
new file mode 100644
index 0000000..1093837
--- /dev/null
+++ b/src/preproc/refer/tests/artifacts/62124.bib
@@ -0,0 +1,4 @@
+%A Irritablé, X.
+%T Universit\*'e de Grenoble. Cours donn\*'es aux Houches.
+%Z Mon dieu, Consiel !
+%Z NOTE: This file is deliberately not valid UTF-8.  Try Latin-1.
diff --git a/src/preproc/refer/tests/report-correct-line-numbers.sh b/src/preproc/refer/tests/report-correct-line-numbers.sh
new file mode 100755
index 0000000..19cae53
--- /dev/null
+++ b/src/preproc/refer/tests/report-correct-line-numbers.sh
@@ -0,0 +1,136 @@
+#!/bin/sh
+#
+# Copyright (C) 2022 Free Software Foundation, Inc.
+#
+# This file is part of groff.
+#
+# groff is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+#
+# groff is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+refer="${abs_top_builddir:-.}/refer"
+
+fail=
+
+wail () {
+    echo FAILED >&2
+    fail=YES
+}
+
+# Regression-test Savannah #62124.  Ensure correct line numbers in
+# diagnostics on bibliography files.
+
+# Locate directory containing our test artifacts.
+artifact_dir=
+
+for buildroot in . .. ../..
+do
+    d=$buildroot/src/preproc/refer/tests/artifacts
+    if [ -d "$d" ]
+    then
+        artifact_dir=$d
+        break
+    fi
+done
+
+# If we can't find it, we can't test.
+test -z "$artifact_dir" && exit 77 # skip
+
+input=".
+.R1
+bibliography $artifact_dir/62124.bib
+cattywumpus
+.R2
+.
+.R1
+bibliography $artifact_dir/62124.bib
+cattywumpus
+.R2"
+
+# We want standard error _only_.
+output=$(echo "$input" | "$refer" -e -p "$artifact_dir"/62124.bib \
+    2>&1 >/dev/null)
+
+# We should get every complaint about the bibliography twice because it
+# is dumped twice; the line numbers should not change because they're
+# problems with the bibliography file, not the input file.
+
+# We're pattern-matching diagnostic output here, which is a delicate
+# thing to do.  If a test failure occurs, ensure the diagnostic message
+# text hasn't changed before assuming a deeper logic problem.
+
+echo "checking line number of invalid character on bibliography line 1"
+count=$(echo "$output" | grep -c "refer:.*/62124.bib:1:.*code 129")
+test $count -eq 2 || wail
+
+echo "checking line number of first invalid character on bibliography" \
+  "line 2"
+count=$(echo "$output" | grep -c "refer:.*/62124.bib:2:.*code 136")
+test $count -eq 2 || wail
+
+echo "checking line number of second invalid character on" \
+  "bibliography line 2"
+count=$(echo "$output" | grep -c "refer:.*/62124.bib:2:.*code 137")
+test $count -eq 2 || wail
+
+echo "checking line number of first invalid character on" \
+  "bibliography line 3"
+count=$(echo "$output" | grep -c "refer:.*/62124.bib:3:.*code 136")
+test $count -eq 2 || wail
+
+echo "checking line number of second invalid character on" \
+  "bibliography line 3"
+count=$(echo "$output" | grep -c "refer:.*/62124.bib:3:.*code 137")
+test $count -eq 2 || wail
+
+# Problems with the input file should also be accurately located.
+
+echo "checking line number of invalid refer(1) command on input line 4"
+echo "$output"
+echo "$output" | grep -q "refer:.*:4:.*unknown command" || wail
+
+echo "checking line number of invalid refer(1) command on input line 9"
+echo "$output"
+echo "$output" | grep -q "refer:.*:9:.*unknown command" || wail
+
+# Regression-test Savannah #62391.
+
+output=$(printf '\0201\n' | "$refer" 2>&1 >/dev/null)
+
+echo "checking line number of invalid input character on input line 1"
+echo "$output" | grep -q "refer:.*:1:.*invalid input character" \
+  || wail
+
+output=$(printf '.R1\nbogus \0200\n.R2\n' | "$refer" 2>&1 >/dev/null)
+
+echo "checking line number of invalid input character after refer(1)" \
+  "command on input line 2"
+echo "$output" | grep -q "refer:.*:2:.*invalid input character" \
+  || wail
+
+output=$(printf '.R1\ndatabase nonexistent.bib\n.R2\n' | "$refer" 2>&1 \
+  >/dev/null)
+
+echo "checking line number of attempt to load nonexistent database"
+echo "$output" | grep -q "refer:.*:2:.*can't open 'nonexistent\.bib':" \
+  || wail
+
+output=$(printf '.R1\ninclude nonexistent.bib\n.R2\n' | "$refer" 2>&1 \
+  >/dev/null)
+
+echo "checking line number of attempt to load nonexistent inclusion"
+echo "$output" | grep -q "refer:.*:2:.*can't open 'nonexistent\.bib':" \
+  || wail
+test -z "$fail" || exit 1
+
+# vim:set ai et sw=4 ts=4 tw=72:
diff --git a/src/preproc/refer/token.cpp b/src/preproc/refer/token.cpp
new file mode 100644
index 0000000..e643cbd
--- /dev/null
+++ b/src/preproc/refer/token.cpp
@@ -0,0 +1,377 @@
+// -*- C++ -*-
+/* Copyright (C) 1989-2020 Free Software Foundation, Inc.
+     Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include "refer.h"
+#include "token.h"
+
+#define TOKEN_TABLE_SIZE 1009
+// I believe in Icelandic thorn sorts after z.
+#define THORN_SORT_KEY "{"
+
+struct token_table_entry {
+  const char *tok;
+  token_info ti;
+  token_table_entry();
+};
+
+token_table_entry token_table[TOKEN_TABLE_SIZE];
+int ntokens = 0;
+
+static void skip_name(const char **ptr, const char *end)
+{
+  if (*ptr < end) {
+    switch (*(*ptr)++) {
+    case '(':
+      if (*ptr < end) {
+	*ptr += 1;
+	if (*ptr < end)
+	  *ptr += 1;
+      }
+      break;
+    case '[':
+      while (*ptr < end)
+	if (*(*ptr)++ == ']')
+	  break;
+      break;
+    }
+  }
+}
+
+int get_token(const char **ptr, const char *end)
+{
+  if (*ptr >= end)
+    return 0;
+  char c = *(*ptr)++;
+  if (c == '\\' && *ptr < end) {
+    switch (**ptr) {
+    default:
+      *ptr += 1;
+      break;
+    case '(':
+    case '[':
+      skip_name(ptr, end);
+      break;
+    case '*':
+    case 'f':
+      *ptr += 1;
+      skip_name(ptr, end);
+      break;
+    }
+  }
+  return 1;
+}
+
+token_info::token_info()
+: type(TOKEN_OTHER), sort_key(0), other_case(0)
+{
+}
+
+void token_info::set(token_type t, const char *sk, const char *oc)
+{
+  assert(oc == 0 || t == TOKEN_UPPER || t == TOKEN_LOWER);
+  type = t;
+  sort_key = sk;
+  other_case = oc;
+}
+
+void token_info::sortify(const char *start, const char *end, string &result)
+     const
+{
+  if (sort_key)
+    result += sort_key;
+  else if (type == TOKEN_UPPER || type == TOKEN_LOWER) {
+    for (; start < end; start++)
+      if (csalpha(*start))
+	result += cmlower(*start);
+  }
+}
+
+int token_info::sortify_non_empty(const char *start, const char *end) const
+{
+  if (sort_key)
+    return *sort_key != '\0';
+  if (type != TOKEN_UPPER && type != TOKEN_LOWER)
+    return 0;
+  for (; start < end; start++)
+    if (csalpha(*start))
+      return 1;
+  return 0;
+}
+
+
+void token_info::lower_case(const char *start, const char *end,
+			    string &result) const
+{
+  if (type != TOKEN_UPPER) {
+    while (start < end)
+      result += *start++;
+  }
+  else if (other_case)
+    result += other_case;
+  else {
+    while (start < end)
+      result += cmlower(*start++);
+  }
+}
+
+void token_info::upper_case(const char *start, const char *end,
+			    string &result) const
+{
+  if (type != TOKEN_LOWER) {
+    while (start < end)
+      result += *start++;
+  }
+  else if (other_case)
+    result += other_case;
+  else {
+    while (start < end)
+      result += cmupper(*start++);
+  }
+}
+
+token_table_entry::token_table_entry()
+: tok(0)
+{
+}
+
+static void store_token(const char *tok, token_type typ,
+			const char *sk = 0, const char *oc = 0)
+{
+  unsigned n = hash_string(tok, strlen(tok)) % TOKEN_TABLE_SIZE;
+  for (;;) {
+    if (token_table[n].tok == 0) {
+      if (++ntokens == TOKEN_TABLE_SIZE)
+	assert(0);
+      token_table[n].tok = tok;
+      break;
+    }
+    if (strcmp(tok, token_table[n].tok) == 0)
+      break;
+    if (n == 0)
+      n = TOKEN_TABLE_SIZE - 1;
+    else
+      --n;
+  }
+  token_table[n].ti.set(typ, sk, oc);
+}
+
+
+token_info default_token_info;
+
+const token_info *lookup_token(const char *start, const char *end)
+{
+  unsigned n = hash_string(start, end - start) % TOKEN_TABLE_SIZE;
+  for (;;) {
+    if (token_table[n].tok == 0)
+      break;
+    if (strlen(token_table[n].tok) == size_t(end - start)
+	&& memcmp(token_table[n].tok, start, end - start) == 0)
+      return &(token_table[n].ti);
+    if (n == 0)
+      n = TOKEN_TABLE_SIZE - 1;
+    else
+      --n;
+  }
+  return &default_token_info;
+}
+
+static void init_ascii()
+{
+  const char *p;
+  for (p = "abcdefghijklmnopqrstuvwxyz"; *p; p++) {
+    char buf[2];
+    buf[0] = *p;
+    buf[1] = '\0';
+    store_token(strsave(buf), TOKEN_LOWER);
+    buf[0] = cmupper(buf[0]);
+    store_token(strsave(buf), TOKEN_UPPER);
+  }
+  for (p = "0123456789"; *p; p++) {
+    char buf[2];
+    buf[0] = *p;
+    buf[1] = '\0';
+    const char *s = strsave(buf);
+    store_token(s, TOKEN_OTHER, s);
+  }
+  for (p = ".,:;?!"; *p; p++) {
+    char buf[2];
+    buf[0] = *p;
+    buf[1] = '\0';
+    store_token(strsave(buf), TOKEN_PUNCT);
+  }
+  store_token("-", TOKEN_HYPHEN);
+}
+
+static void store_letter(const char *lower, const char *upper,
+		  const char *sort_key = 0)
+{
+  store_token(lower, TOKEN_LOWER, sort_key, upper);
+  store_token(upper, TOKEN_UPPER, sort_key, lower);
+}
+
+static void init_letter(unsigned char uc_code, unsigned char lc_code,
+		 const char *sort_key)
+{
+  char lbuf[2];
+  lbuf[0] = lc_code;
+  lbuf[1] = 0;
+  char ubuf[2];
+  ubuf[0] = uc_code;
+  ubuf[1] = 0;
+  store_letter(strsave(lbuf), strsave(ubuf), sort_key);
+}
+
+static void init_latin1()
+{
+  init_letter(0xc0, 0xe0, "a");
+  init_letter(0xc1, 0xe1, "a");
+  init_letter(0xc2, 0xe2, "a");
+  init_letter(0xc3, 0xe3, "a");
+  init_letter(0xc4, 0xe4, "a");
+  init_letter(0xc5, 0xe5, "a");
+  init_letter(0xc6, 0xe6, "ae");
+  init_letter(0xc7, 0xe7, "c");
+  init_letter(0xc8, 0xe8, "e");
+  init_letter(0xc9, 0xe9, "e");
+  init_letter(0xca, 0xea, "e");
+  init_letter(0xcb, 0xeb, "e");
+  init_letter(0xcc, 0xec, "i");
+  init_letter(0xcd, 0xed, "i");
+  init_letter(0xce, 0xee, "i");
+  init_letter(0xcf, 0xef, "i");
+
+  init_letter(0xd0, 0xf0, "d");
+  init_letter(0xd1, 0xf1, "n");
+  init_letter(0xd2, 0xf2, "o");
+  init_letter(0xd3, 0xf3, "o");
+  init_letter(0xd4, 0xf4, "o");
+  init_letter(0xd5, 0xf5, "o");
+  init_letter(0xd6, 0xf6, "o");
+  init_letter(0xd8, 0xf8, "o");
+  init_letter(0xd9, 0xf9, "u");
+  init_letter(0xda, 0xfa, "u");
+  init_letter(0xdb, 0xfb, "u");
+  init_letter(0xdc, 0xfc, "u");
+  init_letter(0xdd, 0xfd, "y");
+  init_letter(0xde, 0xfe, THORN_SORT_KEY);
+
+  store_token("\337", TOKEN_LOWER, "ss", "SS");
+  store_token("\377", TOKEN_LOWER, "y", "Y");
+}
+
+static void init_two_char_letter(char l1, char l2, char u1, char u2,
+				 const char *sk = 0)
+{
+  char buf[6];
+  buf[0] = '\\';
+  buf[1] = '(';
+  buf[2] = l1;
+  buf[3] = l2;
+  buf[4] = '\0';
+  const char *p = strsave(buf);
+  buf[2] = u1;
+  buf[3] = u2;
+  store_letter(p, strsave(buf), sk);
+  buf[1] = '[';
+  buf[4] = ']';
+  buf[5] = '\0';
+  p = strsave(buf);
+  buf[2] = l1;
+  buf[3] = l2;
+  store_letter(strsave(buf), p, sk);
+  
+}
+
+static void init_special_chars()
+{
+  const char *p;
+  for (p = "':^`~"; *p; p++)
+    for (const char *q = "aeiouy"; *q; q++) {
+      // Use a variable to work around bug in gcc 2.0
+      char c = cmupper(*q);
+      init_two_char_letter(*p, *q, *p, c);
+    }
+  for (p = "/l/o~n,coeaeij"; *p; p += 2) {
+    // Use variables to work around bug in gcc 2.0
+    char c0 = cmupper(p[0]);
+    char c1 = cmupper(p[1]);
+    init_two_char_letter(p[0], p[1], c0, c1);
+  }
+  init_two_char_letter('v', 's', 'v', 'S', "s");
+  init_two_char_letter('v', 'z', 'v', 'Z', "z");
+  init_two_char_letter('o', 'a', 'o', 'A', "a");
+  init_two_char_letter('T', 'p', 'T', 'P', THORN_SORT_KEY);
+  init_two_char_letter('-', 'd', '-', 'D');
+  
+  store_token("\\(ss", TOKEN_LOWER, 0, "SS");
+  store_token("\\[ss]", TOKEN_LOWER, 0, "SS");
+
+  store_token("\\(Sd", TOKEN_LOWER, "d", "\\(-D");
+  store_token("\\[Sd]", TOKEN_LOWER, "d", "\\[-D]");
+  store_token("\\(hy", TOKEN_HYPHEN);
+  store_token("\\[hy]", TOKEN_HYPHEN);
+  store_token("\\(en", TOKEN_RANGE_SEP);
+  store_token("\\[en]", TOKEN_RANGE_SEP);
+}
+
+static void init_strings()
+{
+  char buf[6];
+  buf[0] = '\\';
+  buf[1] = '*';
+  for (const char *p = "'`^^,:~v_o./;"; *p; p++) {
+    buf[2] = *p;
+    buf[3] = '\0';
+    store_token(strsave(buf), TOKEN_ACCENT);
+    buf[2] = '[';
+    buf[3] = *p;
+    buf[4] = ']';
+    buf[5] = '\0';
+    store_token(strsave(buf), TOKEN_ACCENT);
+  }
+
+  // -ms special letters
+  store_letter("\\*(th", "\\*(Th", THORN_SORT_KEY);
+  store_letter("\\*[th]", "\\*[Th]", THORN_SORT_KEY);
+  store_letter("\\*(d-", "\\*(D-");
+  store_letter("\\*[d-]", "\\*[D-]");
+  store_letter("\\*(ae", "\\*(Ae", "ae");
+  store_letter("\\*[ae]", "\\*[Ae]", "ae");
+  store_letter("\\*(oe", "\\*(Oe", "oe");
+  store_letter("\\*[oe]", "\\*[Oe]", "oe");
+
+  store_token("\\*3", TOKEN_LOWER, "y", "Y");
+  store_token("\\*8", TOKEN_LOWER, "ss", "SS");
+  store_token("\\*q", TOKEN_LOWER, "o", "O");
+}
+
+struct token_initer {
+  token_initer();
+};
+
+static token_initer the_token_initer;
+
+token_initer::token_initer()
+{
+  init_ascii();
+  init_latin1();
+  init_special_chars();
+  init_strings();
+  default_token_info.set(TOKEN_OTHER);
+}
diff --git a/src/preproc/refer/token.h b/src/preproc/refer/token.h
new file mode 100644
index 0000000..9cd688c
--- /dev/null
+++ b/src/preproc/refer/token.h
@@ -0,0 +1,87 @@
+// -*- C++ -*-
+/* Copyright (C) 1989-2020 Free Software Foundation, Inc.
+     Written by James Clark (jjc@jclark.com)
+
+This file is part of groff.
+
+groff is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+groff is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+enum token_type {
+  TOKEN_OTHER,
+  TOKEN_UPPER,
+  TOKEN_LOWER,
+  TOKEN_ACCENT,
+  TOKEN_PUNCT,
+  TOKEN_HYPHEN,
+  TOKEN_RANGE_SEP
+};
+
+class token_info {
+private:
+  token_type type;
+  const char *sort_key;
+  const char *other_case;
+public:
+  token_info();
+  void set(token_type, const char *sk = 0, const char *oc = 0);
+  void lower_case(const char *start, const char *end, string &result) const;
+  void upper_case(const char *start, const char *end, string &result) const;
+  void sortify(const char *start, const char *end, string &result) const;
+  int sortify_non_empty(const char *start, const char *end) const;
+  int is_upper() const;
+  int is_lower() const;
+  int is_accent() const;
+  int is_other() const;
+  int is_punct() const;
+  int is_hyphen() const;
+  int is_range_sep() const;
+};
+
+inline int token_info::is_upper() const
+{
+  return type == TOKEN_UPPER;
+}
+
+inline int token_info::is_lower() const
+{
+  return type == TOKEN_LOWER;
+}
+
+inline int token_info::is_accent() const
+{
+  return type == TOKEN_ACCENT;
+}
+
+inline int token_info::is_other() const
+{
+  return type == TOKEN_OTHER;
+}
+
+inline int token_info::is_punct() const
+{
+  return type == TOKEN_PUNCT;
+}
+
+inline int token_info::is_hyphen() const
+{
+  return type == TOKEN_HYPHEN;
+}
+
+inline int token_info::is_range_sep() const
+{
+  return type == TOKEN_RANGE_SEP;
+}
+
+int get_token(const char **ptr, const char *end);
+const token_info *lookup_token(const char *start, const char *end);