summaryrefslogtreecommitdiffstats
path: root/scripts/tbl2html.pl
diff options
context:
space:
mode:
Diffstat (limited to '')
-rwxr-xr-xscripts/tbl2html.pl364
1 files changed, 364 insertions, 0 deletions
diff --git a/scripts/tbl2html.pl b/scripts/tbl2html.pl
new file mode 100755
index 0000000..ba8ae09
--- /dev/null
+++ b/scripts/tbl2html.pl
@@ -0,0 +1,364 @@
+#!/usr/bin/perl -w
+# $LynxId: tbl2html.pl,v 1.5 2011/05/21 15:18:16 tom Exp $
+#
+# Translate one or more ".tbl" files into ".html" files which can be used to
+# test the charset support in lynx. Each of the ".html" files will use the
+# charset that corresponds to the input ".tbl" file.
+
+use strict;
+
+use Getopt::Std;
+use File::Basename;
+use POSIX qw(strtod);
+
+sub field($$) {
+ my $value = $_[0];
+ my $count = $_[1];
+
+ while ( $count > 0 ) {
+ $count -= 1;
+ $value =~ s/^\S*\s*//;
+ }
+ $value =~ s/\s.*//;
+ return $value;
+}
+
+sub notes($) {
+ my $value = $_[0];
+
+ $value =~ s/^[^#]*//;
+ $value =~ s/^#//;
+ $value =~ s/^\s+//;
+
+ return $value;
+}
+
+sub make_header($$$) {
+ my $source = $_[0];
+ my $charset = $_[1];
+ my $official = $_[2];
+
+ printf FP "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n";
+ printf FP "<HTML>\n";
+ printf FP "<HEAD>\n";
+ printf FP "<!-- $source -->\n";
+ printf FP "<TITLE>%s table</TITLE>\n", &escaped($official);
+ printf FP "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=%s\">\n", &escaped($charset);
+ printf FP "</HEAD>\n";
+ printf FP "\n";
+ printf FP "<BODY> \n";
+ printf FP "\n";
+ printf FP "<H1 ALIGN=center>%s table</H1> \n", &escaped($charset);
+ printf FP "\n";
+ printf FP "<PRE>\n";
+ printf FP "Code Char Entity Render Description\n";
+}
+
+sub make_mark() {
+ printf FP "---- ---- ------ ------ -----------------------------------\n";
+}
+
+sub escaped($) {
+ my $result = $_[0];
+ $result =~ s/&/&amp;/g;
+ $result =~ s/</&lt;/g;
+ $result =~ s/>/&gt;/g;
+ return $result;
+}
+
+sub make_row($$$) {
+ my $old_code = $_[0];
+ my $new_code = $_[1];
+ my $comments = $_[2];
+
+ # printf "# make_row %d %d %s\n", $old_code, $new_code, $comments;
+ my $visible = sprintf("&amp;#%d; ", $new_code);
+ if ($old_code < 256) {
+ printf FP "%4x %c %.13s &#%d; %s\n",
+ $old_code, $old_code,
+ $visible, $new_code,
+ &escaped($comments);
+ } else {
+ printf FP "%4x . %.13s &#%d; %s\n",
+ $old_code,
+ $visible, $new_code,
+ &escaped($comments);
+ }
+}
+
+sub null_row($$) {
+ my $old_code = $_[0];
+ my $comments = $_[1];
+
+ if ($old_code < 256) {
+ printf FP "%4x %c %s\n",
+ $old_code, $old_code,
+ &escaped($comments);
+ } else {
+ printf FP "%4x . %s\n",
+ $old_code,
+ &escaped($comments);
+ }
+}
+
+sub make_footer() {
+ printf FP "</PRE>\n";
+ printf FP "</BODY>\n";
+ printf FP "</HTML>\n";
+}
+
+# return true if the string describes a range
+sub is_range($) {
+ return ($_[0] =~ /.*-.*/);
+}
+
+# convert the U+'s to 0x's so strtod() can convert them.
+sub zeroxes($) {
+ my $result = $_[0];
+ $result =~ s/^U\+/0x/;
+ $result =~ s/-U\+/-0x/;
+ return $result;
+}
+
+# convert a string to a number (-1's are outside the range of Unicode).
+sub value_of($) {
+ my ($result, $oops) = strtod($_[0]);
+ $result = -1 if ($oops ne 0);
+ return $result;
+}
+
+# return the first number in a range
+sub first_of($) {
+ my $range = &zeroxes($_[0]);
+ $range =~ s/-.*//;
+ return &value_of($range);
+}
+
+# return the last number in a range
+sub last_of($) {
+ my $range = &zeroxes($_[0]);
+ $range =~ s/^.*-//;
+ return &value_of($range);
+}
+
+sub one_many($$$) {
+ my $oldcode = $_[0];
+ my $newcode = &zeroxes($_[1]);
+ my $comment = $_[2];
+
+ my $old_code = &value_of($oldcode);
+ if ( $old_code lt 0 ) {
+ printf "? Problem with number \"%s\"\n", $oldcode;
+ } else {
+ &make_mark if (( $old_code % 8 ) == 0 );
+
+ if ( $newcode =~ /^#.*/ ) {
+ &null_row($old_code, $comment);
+ } elsif ( &is_range($newcode) ) {
+ my $first_item = &first_of($newcode);
+ my $last_item = &last_of($newcode);
+ my $item;
+
+ if ( $first_item lt 0 or $last_item lt 0 ) {
+ printf "? Problem with one:many numbers \"%s\"\n", $newcode;
+ } else {
+ if ( $comment =~ /^$/ ) {
+ $comment = sprintf("mapped: %#x to %#x..%#x", $old_code, $first_item, $last_item);
+ } else {
+ $comment = $comment . " (range)";
+ }
+ for $item ( $first_item..$last_item) {
+ &make_row($old_code, $item, $comment);
+ }
+ }
+ } else {
+ my $new_code = &value_of($newcode);
+ if ( $new_code lt 0 ) {
+ printf "? Problem with number \"%s\"\n", $newcode;
+ } else {
+ if ( $comment =~ /^$/ ) {
+ $comment = sprintf("mapped: %#x to %#x", $old_code, $new_code);
+ }
+ &make_row($old_code, $new_code, $comment);
+ }
+ }
+ }
+}
+
+sub many_many($$$) {
+ my $oldcode = $_[0];
+ my $newcode = $_[1];
+ my $comment = $_[2];
+
+ my $first_old = &first_of($oldcode);
+ my $last_old = &last_of($oldcode);
+ my $item;
+
+ if (&is_range($newcode)) {
+ my $first_new = &first_of($newcode);
+ my $last_new = &last_of($newcode);
+ for $item ( $first_old..$last_old) {
+ &one_many($item, $first_new, $comment);
+ $first_new += 1;
+ }
+ } else {
+ for $item ( $first_old..$last_old) {
+ &one_many($item, $newcode, $comment);
+ }
+ }
+}
+
+sub approximate($$$) {
+ my $values = $_[0];
+ my $expect = sprintf("%-8s", $_[1]);
+ my $comment = $_[2];
+ my $escaped = &escaped($expect);
+ my $left;
+ my $this;
+ my $next;
+
+ $escaped =~ s/\\134/\\/g;
+ $escaped =~ s/\\015/\&#13\;/g;
+ $escaped =~ s/\\012/\&#10\;/g;
+
+ while ( $escaped =~ /^.*\\[0-7]{3}.*$/ ) {
+ $left = $escaped;
+ $left =~ s/\\[0-7]{3}.*//;
+ $this = substr $escaped,length($left)+1,3;
+ $next = substr $escaped,length($left)+4;
+ $escaped = sprintf("%s&#%d;%s", $left, oct $this, $next);
+ }
+
+ my $visible = sprintf("&amp;#%d; ", $values);
+ if ($values < 256) {
+ printf FP "%4x %c %.13s &#%d; approx: %s\n",
+ $values, $values,
+ $visible,
+ $values,
+ $escaped;
+ } else {
+ printf FP "%4x . %.13s &#%d; approx: %s\n",
+ $values,
+ $visible,
+ $values,
+ $escaped;
+ }
+}
+
+sub doit($) {
+ my $source = $_[0];
+
+ printf "** %s\n", $source;
+
+ my $target = basename($source, ".tbl");
+
+ # Read the file into an array in memory.
+ open(FP,$source) || do {
+ print STDERR "Can't open input $source: $!\n";
+ return;
+ };
+ my (@input) = <FP>;
+ chomp @input;
+ close(FP);
+
+ my $n;
+ my $charset = "";
+ my $official = "";
+ my $empty = 1;
+
+ for $n (0..$#input) {
+ $input[$n] =~ s/\s*$//; # trim trailing blanks
+ $input[$n] =~ s/^\s*//; # trim leading blanks
+ $input[$n] =~ s/^#0x/0x/; # uncomment redundant stuff
+
+ next if $input[$n] =~ /^$/;
+ next if $input[$n] =~ /^#.*$/;
+
+ if ( $empty
+ and ( $input[$n] =~ /^\d/
+ or $input[$n] =~ /^U\+/ ) ) {
+ $target = $charset . ".html";
+ printf "=> %s\n", $target;
+ open(FP,">$target") || do {
+ print STDERR "Can't open output $target: $!\n";
+ return;
+ };
+ &make_header($source, $charset, $official);
+ $empty = 0;
+ }
+
+ if ( $input[$n] =~ /^M.*/ ) {
+ $charset = $input[$n];
+ $charset =~ s/^.//;
+ } elsif ( $input[$n] =~ /^O.*/ ) {
+ $official = $input[$n];
+ $official =~ s/^.//;
+ } elsif ( $input[$n] =~ /^\d/ ) {
+
+ my $newcode = &field($input[$n], 1);
+
+ next if ( $newcode eq "idem" );
+ next if ( $newcode eq "" );
+
+ my $oldcode = &field($input[$n], 0);
+ if ( &is_range($oldcode) ) {
+ &many_many($oldcode, $newcode, &notes($input[$n]));
+ } else {
+ &one_many($oldcode, $newcode, &notes($input[$n]));
+ }
+ } elsif ( $input[$n] =~ /^U\+/ ) {
+ if ( $input[$n] =~ /^U\+\w+:/ ) {
+ my $values = $input[$n];
+ my $expect = $input[$n];
+
+ $values =~ s/:.*//;
+ $values = &zeroxes($values);
+ $expect =~ s/^[^:]+://;
+
+ if ( &is_range($values) ) {
+ printf "fixme:%s(%s)(%s)\n", $input[$n], $values, $expect;
+ } else {
+ &approximate(&value_of($values), $expect, &notes($input[$n]));
+ }
+ } else {
+ my $value = $input[$n];
+ $value =~ s/\s*".*//;
+ $value = &value_of(&zeroxes($value));
+ if ($value gt 0) {
+ my $quote = $input[$n];
+ my $comment = &notes($input[$n]);
+ $quote =~ s/^[^"]*"//;
+ $quote =~ s/".*//;
+ &approximate($value, $quote, $comment);
+ } else {
+ printf "fixme:%d(%s)\n", $n, $input[$n];
+ }
+ }
+ } else {
+ # printf "skipping line %d:%s\n", $n + 1, $input[$n];
+ }
+ }
+ if ( ! $empty ) {
+ &make_footer();
+ }
+ close FP;
+}
+
+sub usage() {
+ print <<USAGE;
+Usage: $0 [tbl-files]
+
+The script writes a new ".html" file for each input, using
+the same name as the input, stripping the ".tbl" suffix.
+USAGE
+ exit(1);
+}
+
+if ( $#ARGV < 0 ) {
+ usage();
+} else {
+ while ( $#ARGV >= 0 ) {
+ &doit ( shift @ARGV );
+ }
+}
+exit (0);