#!/usr/bin/perl -w # $LynxId: tbl2html.pl,v 1.5 2011/05/21 15:18:16 tom Exp $ # # Translate one or more ".tbl" files into ".html" files which can be used to # test the charset support in lynx. Each of the ".html" files will use the # charset that corresponds to the input ".tbl" file. use strict; use Getopt::Std; use File::Basename; use POSIX qw(strtod); sub field($$) { my $value = $_[0]; my $count = $_[1]; while ( $count > 0 ) { $count -= 1; $value =~ s/^\S*\s*//; } $value =~ s/\s.*//; return $value; } sub notes($) { my $value = $_[0]; $value =~ s/^[^#]*//; $value =~ s/^#//; $value =~ s/^\s+//; return $value; } sub make_header($$$) { my $source = $_[0]; my $charset = $_[1]; my $official = $_[2]; printf FP "\n"; printf FP "\n"; printf FP "\n"; printf FP "\n"; printf FP "%s table\n", &escaped($official); printf FP "\n", &escaped($charset); printf FP "\n"; printf FP "\n"; printf FP " \n"; printf FP "\n"; printf FP "

%s table

\n", &escaped($charset); printf FP "\n"; printf FP "

\n";
	printf FP "Code  Char  Entity   Render          Description\n";
}

sub make_mark() {
	printf FP "----  ----  ------   ------          -----------------------------------\n";
}

sub escaped($) {
	my $result = $_[0];
	$result =~ s/&/&/g;
	$result =~ s//>/g;
	return $result;
}

sub make_row($$$) {
	my $old_code = $_[0];
	my $new_code = $_[1];
	my $comments = $_[2];

	# printf "# make_row %d %d %s\n", $old_code, $new_code, $comments;
	my $visible = sprintf("&#%d;      ", $new_code);
	if ($old_code < 256) {
		printf FP "%4x    %c   %.13s  &#%d;             %s\n",
			$old_code, $old_code,
			$visible, $new_code,
			&escaped($comments);
	} else {
		printf FP "%4x    .   %.13s  &#%d;             %s\n",
			$old_code,
			$visible, $new_code,
			&escaped($comments);
	}
}

sub null_row($$) {
	my $old_code = $_[0];
	my $comments = $_[1];

	if ($old_code < 256) {
		printf FP "%4x    %c                     %s\n",
			$old_code, $old_code,
			&escaped($comments);
	} else {
		printf FP "%4x    .                     %s\n",
			$old_code,
			&escaped($comments);
	}
}

sub make_footer() {
	printf FP "

\n"; printf FP "\n"; printf FP "\n"; } # return true if the string describes a range sub is_range($) { return ($_[0] =~ /.*-.*/); } # convert the U+'s to 0x's so strtod() can convert them. sub zeroxes($) { my $result = $_[0]; $result =~ s/^U\+/0x/; $result =~ s/-U\+/-0x/; return $result; } # convert a string to a number (-1's are outside the range of Unicode). sub value_of($) { my ($result, $oops) = strtod($_[0]); $result = -1 if ($oops ne 0); return $result; } # return the first number in a range sub first_of($) { my $range = &zeroxes($_[0]); $range =~ s/-.*//; return &value_of($range); } # return the last number in a range sub last_of($) { my $range = &zeroxes($_[0]); $range =~ s/^.*-//; return &value_of($range); } sub one_many($$$) { my $oldcode = $_[0]; my $newcode = &zeroxes($_[1]); my $comment = $_[2]; my $old_code = &value_of($oldcode); if ( $old_code lt 0 ) { printf "? Problem with number \"%s\"\n", $oldcode; } else { &make_mark if (( $old_code % 8 ) == 0 ); if ( $newcode =~ /^#.*/ ) { &null_row($old_code, $comment); } elsif ( &is_range($newcode) ) { my $first_item = &first_of($newcode); my $last_item = &last_of($newcode); my $item; if ( $first_item lt 0 or $last_item lt 0 ) { printf "? Problem with one:many numbers \"%s\"\n", $newcode; } else { if ( $comment =~ /^$/ ) { $comment = sprintf("mapped: %#x to %#x..%#x", $old_code, $first_item, $last_item); } else { $comment = $comment . " (range)"; } for $item ( $first_item..$last_item) { &make_row($old_code, $item, $comment); } } } else { my $new_code = &value_of($newcode); if ( $new_code lt 0 ) { printf "? Problem with number \"%s\"\n", $newcode; } else { if ( $comment =~ /^$/ ) { $comment = sprintf("mapped: %#x to %#x", $old_code, $new_code); } &make_row($old_code, $new_code, $comment); } } } } sub many_many($$$) { my $oldcode = $_[0]; my $newcode = $_[1]; my $comment = $_[2]; my $first_old = &first_of($oldcode); my $last_old = &last_of($oldcode); my $item; if (&is_range($newcode)) { my $first_new = &first_of($newcode); my $last_new = &last_of($newcode); for $item ( $first_old..$last_old) { &one_many($item, $first_new, $comment); $first_new += 1; } } else { for $item ( $first_old..$last_old) { &one_many($item, $newcode, $comment); } } } sub approximate($$$) { my $values = $_[0]; my $expect = sprintf("%-8s", $_[1]); my $comment = $_[2]; my $escaped = &escaped($expect); my $left; my $this; my $next; $escaped =~ s/\\134/\\/g; $escaped =~ s/\\015/\ \;/g; $escaped =~ s/\\012/\ \;/g; while ( $escaped =~ /^.*\\[0-7]{3}.*$/ ) { $left = $escaped; $left =~ s/\\[0-7]{3}.*//; $this = substr $escaped,length($left)+1,3; $next = substr $escaped,length($left)+4; $escaped = sprintf("%s&#%d;%s", $left, oct $this, $next); } my $visible = sprintf("&#%d; ", $values); if ($values < 256) { printf FP "%4x %c %.13s &#%d; approx: %s\n", $values, $values, $visible, $values, $escaped; } else { printf FP "%4x . %.13s &#%d; approx: %s\n", $values, $visible, $values, $escaped; } } sub doit($) { my $source = $_[0]; printf "** %s\n", $source; my $target = basename($source, ".tbl"); # Read the file into an array in memory. open(FP,$source) || do { print STDERR "Can't open input $source: $!\n"; return; }; my (@input) = ; chomp @input; close(FP); my $n; my $charset = ""; my $official = ""; my $empty = 1; for $n (0..$#input) { $input[$n] =~ s/\s*$//; # trim trailing blanks $input[$n] =~ s/^\s*//; # trim leading blanks $input[$n] =~ s/^#0x/0x/; # uncomment redundant stuff next if $input[$n] =~ /^$/; next if $input[$n] =~ /^#.*$/; if ( $empty and ( $input[$n] =~ /^\d/ or $input[$n] =~ /^U\+/ ) ) { $target = $charset . ".html"; printf "=> %s\n", $target; open(FP,">$target") || do { print STDERR "Can't open output $target: $!\n"; return; }; &make_header($source, $charset, $official); $empty = 0; } if ( $input[$n] =~ /^M.*/ ) { $charset = $input[$n]; $charset =~ s/^.//; } elsif ( $input[$n] =~ /^O.*/ ) { $official = $input[$n]; $official =~ s/^.//; } elsif ( $input[$n] =~ /^\d/ ) { my $newcode = &field($input[$n], 1); next if ( $newcode eq "idem" ); next if ( $newcode eq "" ); my $oldcode = &field($input[$n], 0); if ( &is_range($oldcode) ) { &many_many($oldcode, $newcode, ¬es($input[$n])); } else { &one_many($oldcode, $newcode, ¬es($input[$n])); } } elsif ( $input[$n] =~ /^U\+/ ) { if ( $input[$n] =~ /^U\+\w+:/ ) { my $values = $input[$n]; my $expect = $input[$n]; $values =~ s/:.*//; $values = &zeroxes($values); $expect =~ s/^[^:]+://; if ( &is_range($values) ) { printf "fixme:%s(%s)(%s)\n", $input[$n], $values, $expect; } else { &approximate(&value_of($values), $expect, ¬es($input[$n])); } } else { my $value = $input[$n]; $value =~ s/\s*".*//; $value = &value_of(&zeroxes($value)); if ($value gt 0) { my $quote = $input[$n]; my $comment = ¬es($input[$n]); $quote =~ s/^[^"]*"//; $quote =~ s/".*//; &approximate($value, $quote, $comment); } else { printf "fixme:%d(%s)\n", $n, $input[$n]; } } } else { # printf "skipping line %d:%s\n", $n + 1, $input[$n]; } } if ( ! $empty ) { &make_footer(); } close FP; } sub usage() { print <= 0 ) { &doit ( shift @ARGV ); } } exit (0);