#!/usr/bin/perl -w # $LynxId: tbl2html.pl,v 1.5 2011/05/21 15:18:16 tom Exp $ # # Translate one or more ".tbl" files into ".html" files which can be used to # test the charset support in lynx. Each of the ".html" files will use the # charset that corresponds to the input ".tbl" file. use strict; use Getopt::Std; use File::Basename; use POSIX qw(strtod); sub field($$) { my $value = $_[0]; my $count = $_[1]; while ( $count > 0 ) { $count -= 1; $value =~ s/^\S*\s*//; } $value =~ s/\s.*//; return $value; } sub notes($) { my $value = $_[0]; $value =~ s/^[^#]*//; $value =~ s/^#//; $value =~ s/^\s+//; return $value; } sub make_header($$$) { my $source = $_[0]; my $charset = $_[1]; my $official = $_[2]; printf FP "\n"; printf FP "\n"; printf FP "
\n"; printf FP "\n"; printf FP "\n"; printf FP "Code Char Entity Render Description\n"; } sub make_mark() { printf FP "---- ---- ------ ------ -----------------------------------\n"; } sub escaped($) { my $result = $_[0]; $result =~ s/&/&/g; $result =~ s/</g; $result =~ s/>/>/g; return $result; } sub make_row($$$) { my $old_code = $_[0]; my $new_code = $_[1]; my $comments = $_[2]; # printf "# make_row %d %d %s\n", $old_code, $new_code, $comments; my $visible = sprintf("&#%d; ", $new_code); if ($old_code < 256) { printf FP "%4x %c %.13s %d; %s\n", $old_code, $old_code, $visible, $new_code, &escaped($comments); } else { printf FP "%4x . %.13s %d; %s\n", $old_code, $visible, $new_code, &escaped($comments); } } sub null_row($$) { my $old_code = $_[0]; my $comments = $_[1]; if ($old_code < 256) { printf FP "%4x %c %s\n", $old_code, $old_code, &escaped($comments); } else { printf FP "%4x . %s\n", $old_code, &escaped($comments); } } sub make_footer() { printf FP "\n"; printf FP "\n"; printf FP "\n"; } # return true if the string describes a range sub is_range($) { return ($_[0] =~ /.*-.*/); } # convert the U+'s to 0x's so strtod() can convert them. sub zeroxes($) { my $result = $_[0]; $result =~ s/^U\+/0x/; $result =~ s/-U\+/-0x/; return $result; } # convert a string to a number (-1's are outside the range of Unicode). sub value_of($) { my ($result, $oops) = strtod($_[0]); $result = -1 if ($oops ne 0); return $result; } # return the first number in a range sub first_of($) { my $range = &zeroxes($_[0]); $range =~ s/-.*//; return &value_of($range); } # return the last number in a range sub last_of($) { my $range = &zeroxes($_[0]); $range =~ s/^.*-//; return &value_of($range); } sub one_many($$$) { my $oldcode = $_[0]; my $newcode = &zeroxes($_[1]); my $comment = $_[2]; my $old_code = &value_of($oldcode); if ( $old_code lt 0 ) { printf "? Problem with number \"%s\"\n", $oldcode; } else { &make_mark if (( $old_code % 8 ) == 0 ); if ( $newcode =~ /^#.*/ ) { &null_row($old_code, $comment); } elsif ( &is_range($newcode) ) { my $first_item = &first_of($newcode); my $last_item = &last_of($newcode); my $item; if ( $first_item lt 0 or $last_item lt 0 ) { printf "? Problem with one:many numbers \"%s\"\n", $newcode; } else { if ( $comment =~ /^$/ ) { $comment = sprintf("mapped: %#x to %#x..%#x", $old_code, $first_item, $last_item); } else { $comment = $comment . " (range)"; } for $item ( $first_item..$last_item) { &make_row($old_code, $item, $comment); } } } else { my $new_code = &value_of($newcode); if ( $new_code lt 0 ) { printf "? Problem with number \"%s\"\n", $newcode; } else { if ( $comment =~ /^$/ ) { $comment = sprintf("mapped: %#x to %#x", $old_code, $new_code); } &make_row($old_code, $new_code, $comment); } } } } sub many_many($$$) { my $oldcode = $_[0]; my $newcode = $_[1]; my $comment = $_[2]; my $first_old = &first_of($oldcode); my $last_old = &last_of($oldcode); my $item; if (&is_range($newcode)) { my $first_new = &first_of($newcode); my $last_new = &last_of($newcode); for $item ( $first_old..$last_old) { &one_many($item, $first_new, $comment); $first_new += 1; } } else { for $item ( $first_old..$last_old) { &one_many($item, $newcode, $comment); } } } sub approximate($$$) { my $values = $_[0]; my $expect = sprintf("%-8s", $_[1]); my $comment = $_[2]; my $escaped = &escaped($expect); my $left; my $this; my $next; $escaped =~ s/\\134/\\/g; $escaped =~ s/\\015/\ \;/g; $escaped =~ s/\\012/\ \;/g; while ( $escaped =~ /^.*\\[0-7]{3}.*$/ ) { $left = $escaped; $left =~ s/\\[0-7]{3}.*//; $this = substr $escaped,length($left)+1,3; $next = substr $escaped,length($left)+4; $escaped = sprintf("%s%d;%s", $left, oct $this, $next); } my $visible = sprintf("&#%d; ", $values); if ($values < 256) { printf FP "%4x %c %.13s %d; approx: %s\n", $values, $values, $visible, $values, $escaped; } else { printf FP "%4x . %.13s %d; approx: %s\n", $values, $visible, $values, $escaped; } } sub doit($) { my $source = $_[0]; printf "** %s\n", $source; my $target = basename($source, ".tbl"); # Read the file into an array in memory. open(FP,$source) || do { print STDERR "Can't open input $source: $!\n"; return; }; my (@input) =