summaryrefslogtreecommitdiffstats
path: root/tmac/hyphenex.pl
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--tmac/hyphenex.pl91
1 files changed, 91 insertions, 0 deletions
diff --git a/tmac/hyphenex.pl b/tmac/hyphenex.pl
new file mode 100644
index 0000000..aee5845
--- /dev/null
+++ b/tmac/hyphenex.pl
@@ -0,0 +1,91 @@
+#! /usr/bin/env perl
+#
+#
+# hyphenex.pl
+#
+# This small filter converts a hyphenation exception log article for
+# TUGBoat to a real \hyphenation block.
+#
+# Written by Werner Lemberg <wl@gnu.org>.
+#
+# Version 1.2 (2007/11/16)
+#
+# Public domain.
+#
+#
+# Usage:
+#
+# [perl] hyphenex.pl < tugboat-article > hyphenation-exceptions
+
+# print header
+print "% Hyphenation exceptions for US English,\n";
+print "% based on hyphenation exception log articles in TUGboat.\n";
+print "%\n";
+print "% Copyright 2007 TeX Users Group.\n";
+print "% You may freely use, modify and/or distribute this file.\n";
+print "%\n";
+print "% This is an automatically generated file. Do not edit!\n";
+print "%\n";
+print "% Please contact the TUGboat editorial staff <tugboat\@tug.org>\n";
+print "% for corrections and omissions.\n";
+print "\n";
+print "\\hyphenation{\n";
+
+unshift @ARGV, '-' unless @ARGV;
+foreach my $filename (@ARGV) {
+ my $input;
+ if ($filename eq '-') {
+ $input = \*STDIN;
+ } elsif (not open $input, '<', $filename) {
+ warn $!;
+ next;
+ }
+ while (<$input>) {
+ # retain only lines starting with \1 ... \6 or \tabalign
+ next if not (m/^\\[123456]/ || m/^\\tabalign/);
+ # remove final newline
+ chop;
+ # remove all TeX commands except \1 ... \6
+ s/\\[^123456\s{]+//g;
+ # remove all paired { ... }
+ 1 while s/{(.*?)}/\1/g;
+ # skip lines which now have only whitespace before '&'
+ next if m/^\s*&/;
+ # remove comments
+ s/%.*//;
+ # remove trailing whitespace
+ s/\s*$//;
+ # remove trailing '*' (used as a marker in the document)
+ s/\*$//;
+ # split at whitespace
+ @field = split(' ');
+ if ($field[0] eq "\\1" || $field[0] eq "\\4") {
+ print " $field[2]\n";
+ }
+ elsif ($field[0] eq "\\2" || $field[0] eq "\\5") {
+ print " $field[2]\n";
+ # handle multiple suffixes separated by commata
+ @suffix_list = split(/,/, "$field[3]");
+ foreach $suffix (@suffix_list) {
+ print " $field[2]$suffix\n";
+ }
+ }
+ elsif ($field[0] eq "\\3" || $field[0] eq "\\6") {
+ # handle multiple suffixes separated by commata
+ @suffix_list = split(/,/, "$field[3],$field[4]");
+ foreach $suffix (@suffix_list) {
+ print " $field[2]$suffix\n";
+ }
+ }
+ else {
+ # for '&', split at '&' with trailing whitespace
+ @field = split(/&\s*/);
+ print " $field[1]\n";
+ }
+ }
+}
+
+# print trailer
+print "}\n";
+print "\n";
+print "% EOF\n";