#!/usr/bin/perl # # Format XHTML generated by groff -Thtml (via tidy) for websites # # Usage: groff -Thtml -P-l something.man | tidy -asxml ... | perl fix-groff-xhtml.pl OUTPUT-FILE # # (C) Copyright 2003-2014 Dave Beckett # use strict; use File::Basename; my $progname=basename $0; my $raptor_title="Raptor RDF Parser Toolkit"; my $redland_title="Redland RDF Application Framework"; my $rasqal_title="Rasqal RDF Query Library"; die "USAGE: $progname OUTPUT-FILE\n" if @ARGV < 1; my $doc_title; my($file)=@ARGV; open(OUT, ">$file") or die "$progname: Cannot create $file - $!\n"; open(IN, "-"); while() { s%libraptor%$raptor_title - Raptor API%; s%

libraptor

%

$raptor_title - Raptor API

%; s%rapper%$raptor_title - Raptor RDF parser utility%; s%

rapper

%

$raptor_title - Raptor RDF parser utility

%; s%rdfproc%$redland_title - Redland RDF processor utility%; s%

rdfproc

%

$redland_title - Redland RDF processor utility

%; s%librasqal%$rasqal_title - Rasqal API%; s%

librasqal

%

$rasqal_title - Rasqal API

%; s%roqet%$rasqal_title - Rasqal RDF parser utility%; s%

roqet

%

$rasqal_title - Rasqal RDF parser utility

%; next if /^]*>%%; # This is not xhtml s% cols="\d+" % %; s%(name|id)="([^"]+)"%my($at,$val)=($1,$2); $val =~ s/ /_/g; qq{$at="$val"};%eg; s%(Dave Beckett|Institute for Learning and Research Technology .ILRT.|University of Bristol) (?:- |)(http://[^<]+)%$1%; my $year=1900+(localtime)[5]; print OUT <<"EOT" if m%^%;

Copyright 2002-$year Dave Beckett
2002-2005 University of Bristol

EOT print OUT; } close(IN); close(OUT);