1 files changed, 91 insertions, 0 deletions
diff --git a/tmac/hyphenex.pl b/tmac/hyphenex.pl
new file mode 100644
index 0000000..aee5845
--- /dev/null
+++ b/tmac/hyphenex.pl
@@ -0,0 +1,91 @@
+#! /usr/bin/env perl
+#
+#
+# hyphenex.pl
+#
+# This small filter converts a hyphenation exception log article for
+# TUGBoat to a real \hyphenation block.
+#
+# Written by Werner Lemberg <wl@gnu.org>.
+#
+# Version 1.2 (2007/11/16)
+#
+# Public domain.
+#
+#
+# Usage:
+#
+#   [perl] hyphenex.pl < tugboat-article > hyphenation-exceptions
+
+# print header
+print "% Hyphenation exceptions for US English,\n";
+print "% based on hyphenation exception log articles in TUGboat.\n";
+print "%\n";
+print "% Copyright 2007 TeX Users Group.\n";
+print "% You may freely use, modify and/or distribute this file.\n";
+print "%\n";
+print "% This is an automatically generated file.  Do not edit!\n";
+print "%\n";
+print "% Please contact the TUGboat editorial staff <tugboat\@tug.org>\n";
+print "% for corrections and omissions.\n";
+print "\n";
+print "\\hyphenation{\n";
+
+unshift @ARGV, '-' unless @ARGV;
+foreach my $filename (@ARGV) {
+  my $input;
+  if ($filename eq '-') {
+    $input = \*STDIN;
+  } elsif (not open $input, '<', $filename) {
+    warn $!;
+    next;
+  }
+  while (<$input>) {
+    # retain only lines starting with \1 ... \6 or \tabalign
+    next if not (m/^\\[123456]/ || m/^\\tabalign/);
+    # remove final newline
+    chop;
+    # remove all TeX commands except \1 ... \6
+    s/\\[^123456\s{]+//g;
+    # remove all paired { ... }
+    1 while s/{(.*?)}/\1/g;
+    # skip lines which now have only whitespace before '&'
+    next if m/^\s*&/;
+    # remove comments
+    s/%.*//;
+    # remove trailing whitespace
+    s/\s*$//;
+    # remove trailing '*' (used as a marker in the document)
+    s/\*$//;
+    # split at whitespace
+    @field = split(' ');
+    if ($field[0] eq "\\1" || $field[0] eq "\\4") {
+      print "  $field[2]\n";
+    }
+    elsif ($field[0] eq "\\2" || $field[0] eq "\\5") {
+      print "  $field[2]\n";
+      # handle multiple suffixes separated by commata
+      @suffix_list = split(/,/, "$field[3]");
+      foreach $suffix (@suffix_list) {
+        print "  $field[2]$suffix\n";
+      }
+    }
+    elsif ($field[0] eq "\\3" || $field[0] eq "\\6") {
+      # handle multiple suffixes separated by commata
+      @suffix_list = split(/,/, "$field[3],$field[4]");
+      foreach $suffix (@suffix_list) {
+        print "  $field[2]$suffix\n";
+      }
+    }
+    else {
+      # for '&', split at '&' with trailing whitespace
+      @field = split(/&\s*/);
+      print "  $field[1]\n";
+    }
+  }
+}
+
+# print trailer
+print "}\n";
+print "\n";
+print "% EOF\n";