diff options
Diffstat (limited to 'scripts/tbl2html.pl')
-rwxr-xr-x | scripts/tbl2html.pl | 364 |
1 files changed, 364 insertions, 0 deletions
diff --git a/scripts/tbl2html.pl b/scripts/tbl2html.pl new file mode 100755 index 0000000..ba8ae09 --- /dev/null +++ b/scripts/tbl2html.pl @@ -0,0 +1,364 @@ +#!/usr/bin/perl -w +# $LynxId: tbl2html.pl,v 1.5 2011/05/21 15:18:16 tom Exp $ +# +# Translate one or more ".tbl" files into ".html" files which can be used to +# test the charset support in lynx. Each of the ".html" files will use the +# charset that corresponds to the input ".tbl" file. + +use strict; + +use Getopt::Std; +use File::Basename; +use POSIX qw(strtod); + +sub field($$) { + my $value = $_[0]; + my $count = $_[1]; + + while ( $count > 0 ) { + $count -= 1; + $value =~ s/^\S*\s*//; + } + $value =~ s/\s.*//; + return $value; +} + +sub notes($) { + my $value = $_[0]; + + $value =~ s/^[^#]*//; + $value =~ s/^#//; + $value =~ s/^\s+//; + + return $value; +} + +sub make_header($$$) { + my $source = $_[0]; + my $charset = $_[1]; + my $official = $_[2]; + + printf FP "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"; + printf FP "<HTML>\n"; + printf FP "<HEAD>\n"; + printf FP "<!-- $source -->\n"; + printf FP "<TITLE>%s table</TITLE>\n", &escaped($official); + printf FP "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=%s\">\n", &escaped($charset); + printf FP "</HEAD>\n"; + printf FP "\n"; + printf FP "<BODY> \n"; + printf FP "\n"; + printf FP "<H1 ALIGN=center>%s table</H1> \n", &escaped($charset); + printf FP "\n"; + printf FP "<PRE>\n"; + printf FP "Code Char Entity Render Description\n"; +} + +sub make_mark() { + printf FP "---- ---- ------ ------ -----------------------------------\n"; +} + +sub escaped($) { + my $result = $_[0]; + $result =~ s/&/&/g; + $result =~ s/</</g; + $result =~ s/>/>/g; + return $result; +} + +sub make_row($$$) { + my $old_code = $_[0]; + my $new_code = $_[1]; + my $comments = $_[2]; + + # printf "# make_row %d %d %s\n", $old_code, $new_code, $comments; + my $visible = sprintf("&#%d; ", $new_code); + if ($old_code < 256) { + printf FP "%4x %c %.13s &#%d; %s\n", + $old_code, $old_code, + $visible, $new_code, + &escaped($comments); + } else { + printf FP "%4x . %.13s &#%d; %s\n", + $old_code, + $visible, $new_code, + &escaped($comments); + } +} + +sub null_row($$) { + my $old_code = $_[0]; + my $comments = $_[1]; + + if ($old_code < 256) { + printf FP "%4x %c %s\n", + $old_code, $old_code, + &escaped($comments); + } else { + printf FP "%4x . %s\n", + $old_code, + &escaped($comments); + } +} + +sub make_footer() { + printf FP "</PRE>\n"; + printf FP "</BODY>\n"; + printf FP "</HTML>\n"; +} + +# return true if the string describes a range +sub is_range($) { + return ($_[0] =~ /.*-.*/); +} + +# convert the U+'s to 0x's so strtod() can convert them. +sub zeroxes($) { + my $result = $_[0]; + $result =~ s/^U\+/0x/; + $result =~ s/-U\+/-0x/; + return $result; +} + +# convert a string to a number (-1's are outside the range of Unicode). +sub value_of($) { + my ($result, $oops) = strtod($_[0]); + $result = -1 if ($oops ne 0); + return $result; +} + +# return the first number in a range +sub first_of($) { + my $range = &zeroxes($_[0]); + $range =~ s/-.*//; + return &value_of($range); +} + +# return the last number in a range +sub last_of($) { + my $range = &zeroxes($_[0]); + $range =~ s/^.*-//; + return &value_of($range); +} + +sub one_many($$$) { + my $oldcode = $_[0]; + my $newcode = &zeroxes($_[1]); + my $comment = $_[2]; + + my $old_code = &value_of($oldcode); + if ( $old_code lt 0 ) { + printf "? Problem with number \"%s\"\n", $oldcode; + } else { + &make_mark if (( $old_code % 8 ) == 0 ); + + if ( $newcode =~ /^#.*/ ) { + &null_row($old_code, $comment); + } elsif ( &is_range($newcode) ) { + my $first_item = &first_of($newcode); + my $last_item = &last_of($newcode); + my $item; + + if ( $first_item lt 0 or $last_item lt 0 ) { + printf "? Problem with one:many numbers \"%s\"\n", $newcode; + } else { + if ( $comment =~ /^$/ ) { + $comment = sprintf("mapped: %#x to %#x..%#x", $old_code, $first_item, $last_item); + } else { + $comment = $comment . " (range)"; + } + for $item ( $first_item..$last_item) { + &make_row($old_code, $item, $comment); + } + } + } else { + my $new_code = &value_of($newcode); + if ( $new_code lt 0 ) { + printf "? Problem with number \"%s\"\n", $newcode; + } else { + if ( $comment =~ /^$/ ) { + $comment = sprintf("mapped: %#x to %#x", $old_code, $new_code); + } + &make_row($old_code, $new_code, $comment); + } + } + } +} + +sub many_many($$$) { + my $oldcode = $_[0]; + my $newcode = $_[1]; + my $comment = $_[2]; + + my $first_old = &first_of($oldcode); + my $last_old = &last_of($oldcode); + my $item; + + if (&is_range($newcode)) { + my $first_new = &first_of($newcode); + my $last_new = &last_of($newcode); + for $item ( $first_old..$last_old) { + &one_many($item, $first_new, $comment); + $first_new += 1; + } + } else { + for $item ( $first_old..$last_old) { + &one_many($item, $newcode, $comment); + } + } +} + +sub approximate($$$) { + my $values = $_[0]; + my $expect = sprintf("%-8s", $_[1]); + my $comment = $_[2]; + my $escaped = &escaped($expect); + my $left; + my $this; + my $next; + + $escaped =~ s/\\134/\\/g; + $escaped =~ s/\\015/\
\;/g; + $escaped =~ s/\\012/\
\;/g; + + while ( $escaped =~ /^.*\\[0-7]{3}.*$/ ) { + $left = $escaped; + $left =~ s/\\[0-7]{3}.*//; + $this = substr $escaped,length($left)+1,3; + $next = substr $escaped,length($left)+4; + $escaped = sprintf("%s&#%d;%s", $left, oct $this, $next); + } + + my $visible = sprintf("&#%d; ", $values); + if ($values < 256) { + printf FP "%4x %c %.13s &#%d; approx: %s\n", + $values, $values, + $visible, + $values, + $escaped; + } else { + printf FP "%4x . %.13s &#%d; approx: %s\n", + $values, + $visible, + $values, + $escaped; + } +} + +sub doit($) { + my $source = $_[0]; + + printf "** %s\n", $source; + + my $target = basename($source, ".tbl"); + + # Read the file into an array in memory. + open(FP,$source) || do { + print STDERR "Can't open input $source: $!\n"; + return; + }; + my (@input) = <FP>; + chomp @input; + close(FP); + + my $n; + my $charset = ""; + my $official = ""; + my $empty = 1; + + for $n (0..$#input) { + $input[$n] =~ s/\s*$//; # trim trailing blanks + $input[$n] =~ s/^\s*//; # trim leading blanks + $input[$n] =~ s/^#0x/0x/; # uncomment redundant stuff + + next if $input[$n] =~ /^$/; + next if $input[$n] =~ /^#.*$/; + + if ( $empty + and ( $input[$n] =~ /^\d/ + or $input[$n] =~ /^U\+/ ) ) { + $target = $charset . ".html"; + printf "=> %s\n", $target; + open(FP,">$target") || do { + print STDERR "Can't open output $target: $!\n"; + return; + }; + &make_header($source, $charset, $official); + $empty = 0; + } + + if ( $input[$n] =~ /^M.*/ ) { + $charset = $input[$n]; + $charset =~ s/^.//; + } elsif ( $input[$n] =~ /^O.*/ ) { + $official = $input[$n]; + $official =~ s/^.//; + } elsif ( $input[$n] =~ /^\d/ ) { + + my $newcode = &field($input[$n], 1); + + next if ( $newcode eq "idem" ); + next if ( $newcode eq "" ); + + my $oldcode = &field($input[$n], 0); + if ( &is_range($oldcode) ) { + &many_many($oldcode, $newcode, ¬es($input[$n])); + } else { + &one_many($oldcode, $newcode, ¬es($input[$n])); + } + } elsif ( $input[$n] =~ /^U\+/ ) { + if ( $input[$n] =~ /^U\+\w+:/ ) { + my $values = $input[$n]; + my $expect = $input[$n]; + + $values =~ s/:.*//; + $values = &zeroxes($values); + $expect =~ s/^[^:]+://; + + if ( &is_range($values) ) { + printf "fixme:%s(%s)(%s)\n", $input[$n], $values, $expect; + } else { + &approximate(&value_of($values), $expect, ¬es($input[$n])); + } + } else { + my $value = $input[$n]; + $value =~ s/\s*".*//; + $value = &value_of(&zeroxes($value)); + if ($value gt 0) { + my $quote = $input[$n]; + my $comment = ¬es($input[$n]); + $quote =~ s/^[^"]*"//; + $quote =~ s/".*//; + &approximate($value, $quote, $comment); + } else { + printf "fixme:%d(%s)\n", $n, $input[$n]; + } + } + } else { + # printf "skipping line %d:%s\n", $n + 1, $input[$n]; + } + } + if ( ! $empty ) { + &make_footer(); + } + close FP; +} + +sub usage() { + print <<USAGE; +Usage: $0 [tbl-files] + +The script writes a new ".html" file for each input, using +the same name as the input, stripping the ".tbl" suffix. +USAGE + exit(1); +} + +if ( $#ARGV < 0 ) { + usage(); +} else { + while ( $#ARGV >= 0 ) { + &doit ( shift @ARGV ); + } +} +exit (0); |