#!/usr/bin/perl # Po4a::Xml.pm # # extract and translate translatable strings from XML documents. # # This code extracts plain text from tags and attributes from generic # XML documents, and it can be used as a base to build modules for # XML-based documents. # # Copyright © 2004 Jordi Vilalta # Copyright © 2008-2009 Nicolas François # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # ######################################################################## =encoding UTF-8 =head1 NAME Locale::Po4a::Xml - convert XML documents and derivates from/to PO files =head1 DESCRIPTION The po4a (PO for anything) project goal is to ease translations (and more interestingly, the maintenance of translations) using gettext tools on areas where they were not expected like documentation. Locale::Po4a::Xml is a module to help the translation of XML documents into other [human] languages. It can also be used as a base to build modules for XML-based documents. =cut package Locale::Po4a::Xml; use 5.16.0; use strict; use warnings; require Exporter; use vars qw(@ISA @EXPORT); @ISA = qw(Locale::Po4a::TransTractor); @EXPORT = qw(new initialize @tag_types); use Locale::Po4a::TransTractor; use Locale::Po4a::Common; use Carp qw(croak); use File::Basename; use File::Spec; #It will maintain the path from the root tag to the current one my @path; #It will contain a list of external entities and their attached paths my %entities; my @comments; my %translate_options_cache; my $input_charset; # This shiftline function returns the next line of the document being parsed # (and its reference). # For XML, it overloads the Transtractor shiftline to handle: # - text file inclusion if includeexternal option is set # - dropping of the text in the XML comment my $_shiftline_in_comment = 0; sub shiftline { my $self = shift; # call Transtractor's shiftline my ( $line, $ref ) = $self->SUPER::shiftline(); return ( $line, $ref ) if ( not defined $line ); if ( $self->{options}{'includeexternal'} ) { my $tmp; for my $k ( keys %entities ) { if ( $line =~ m/^(.*?)&$k;(.*)$/s ) { my ( $before, $after ) = ( $1, $2 ); my $linenum = 0; my @textentries; $tmp = $before; my $tmp_in_comment = 0; if ($_shiftline_in_comment) { if ( $before =~ m/^.*?-->(.*)$/s ) { $tmp = $1; $tmp_in_comment = 0; } else { $tmp_in_comment = 1; } } if ( $tmp_in_comment == 0 ) { while ( $tmp =~ m/^.*?(.*)$/s ) { $tmp = $1; } if ( $tmp =~ m/(.*)$/s ) { $tmp = $1; $_shiftline_in_comment = 0; } else { $_shiftline_in_comment = 1; } } if ( $_shiftline_in_comment == 0 ) { while ( $tmp =~ m/^.*?(.*)$/s ) { $tmp = $1; } if ( $tmp =~ m/', { include => 1, remove => $remove } ); return ( $eof, @tag ); } sub tag_trans_comment { my ( $self, @tag ) = @_; return $self->join_lines(@tag); } sub tag_trans_xmlhead { my ( $self, @tag ) = @_; # We don't have to translate anything from here: throw away references my $tag = $self->join_lines(@tag); $tag =~ /encoding=(("|')|)(.*?)(\s|\2)/s; my $in_charset = $3; my $out_charset = $self->get_out_charset; if ( defined $in_charset ) { if ( length( $input_charset // '' ) > 0 && uc($in_charset) ne uc($input_charset) ) { if ( ( $in_charset eq 'UTF-8' || lc($in_charset) eq 'utf8' ) && ( $input_charset eq 'UTF-8' || lc($input_charset) eq 'utf8' ) ) { croak wrap_mod( "po4a::pod", dgettext( "po4a", "The file %s declares %s as encoding, but you provided %s as master charset. Please change either setting because they really are different encoding in Perl. See https://perldoc.perl.org/Encode#UTF-8-vs.-utf8-vs.-UTF8" ), $self->{DOCPOD}{refname}, $in_charset, $input_charset, ); } else { croak wrap_mod( "po4a::pod", dgettext( "po4a", "The file %s declares %s as encoding, but you provided %s as master charset. Please change either setting." ), $self->{DOCPOD}{refname}, $in_charset, $input_charset, ); } } $tag =~ s/$in_charset/$out_charset/; } else { if ( $tag =~ m/standalone/ ) { $tag =~ s/(standalone)/encoding="$out_charset" $1/; } else { $tag .= " encoding=\"$out_charset\""; } } return $tag; } sub tag_trans_procins { my ( $self, @tag ) = @_; return $self->join_lines(@tag); } sub tag_extract_doctype { my ( $self, $remove ) = ( shift, shift ); # Check if there is an internal subset (between []). my ( $eof, @tag ) = $self->get_string_until( '>', { include => 1, unquoted => 1 } ); my $parity = 0; my $paragraph = ""; map { $parity = 1 - $parity; $paragraph .= $parity ? $_ : ""; } @tag; my $found = 0; if ( $paragraph =~ m/<.*\[.*get_string_until( '>', { include => 1, remove => $remove, unquoted => 1 } ); } else { ( $eof, @tag ) = $self->get_string_until( ']\s*>', { include => 1, remove => $remove, unquoted => 1, regex => 1 } ); } return ( $eof, @tag ); } sub tag_trans_doctype { # This check is not really reliable. There are system and public # identifiers. Only the public one could be checked reliably. my ( $self, @tag ) = @_; if ( defined $self->{options}{'doctype'} ) { my $doctype = $self->{options}{'doctype'}; if ( $tag[0] !~ /\Q$doctype\E/i ) { warn wrap_ref_mod( $tag[1], "po4a::xml", dgettext( "po4a", "Bad document type. '%s' expected. You can fix this warning with a -o doctype option, or ignore this check with -o doctype=\"\"." ), $doctype ); } } my $i = 0; my $basedir = $tag[1]; $basedir =~ s/:[0-9]+$//; $basedir = dirname($basedir); while ( $i < $#tag ) { my $t = $tag[$i]; my $ref = $tag[ $i + 1 ]; if ( $t =~ /^(\s*{options}{'includeexternal'} ) { $entities{$name} = $part2; $entities{$name} =~ s/^"?(.*?)".*$/$1/s; $entities{$name} = File::Spec->catfile( $basedir, $entities{$name} ); } } if ( ( not $file ) and ( not $includenow ) ) { if ( $part2 =~ m/^\s*(["'])(.*)\1(\s*>.*)$/s ) { my $comment = "Content of the $name entity"; my $quote = $1; my $text = $2; $part2 = $3; $text = $self->translate( $text, $ref, $comment, 'wrap' => 1 ); $t = $part1 . "$quote$text$quote$part2"; } } # print $part1."\n"; # print $name."\n"; # print $part2."\n"; } $tag[$i] = $t; $i += 2; } return $self->join_lines(@tag); } sub tag_break_close { my ( $self, @tag ) = @_; my $struct = $self->get_path; my $options = $self->get_translate_options($struct); if ( $options =~ m/[ip]/ ) { return 0; } else { return 1; } } sub tag_trans_close { my ( $self, @tag ) = @_; my $name = $self->get_tag_name(@tag); my $test = pop @path; if ( !defined($test) || $test ne $name ) { # Check whether it's simply a missing closing tag that I could survive if ( ( $self->{options}{'optionalclosingtag'} // 0 ) == 1 ) { my $found = 0; map { $found = 1 if $_ eq $name } @path; if ($found) { # The opening tag corresponding to the one closed now exists in the path. Pop everything in between while ( $test ne $name ) { $test = pop @path; } return $self->join_lines(@tag); } } my $ontagerror = $self->{options}{'ontagerror'}; if ( $ontagerror eq "warn" ) { warn wrap_ref_mod( $tag[1], "po4a::xml", dgettext( "po4a", "Unexpected closing tag found. The main document may be wrong. Continuing…" ), $name ); } elsif ( $ontagerror ne "silent" ) { die wrap_ref_mod( $tag[1], "po4a::xml", dgettext( "po4a", "Unexpected closing tag found. The main document may be wrong." ), $name ); } } return $self->join_lines(@tag); } sub CDATA_extract { my ( $self, $remove ) = ( shift, shift ); my ( $eof, @tag ) = $self->get_string_until( ']]>', { include => 1, unquoted => 0, remove => $remove } ); return ( $eof, @tag ); } sub CDATA_trans { my ( $self, @tag ) = @_; return $self->found_string( $self->join_lines(@tag), $tag[1], { 'type' => "CDATA" } ); } sub tag_break_alone { my ( $self, @tag ) = @_; my $struct = $self->get_path( $self->get_tag_name(@tag) ); if ( $self->get_translate_options($struct) =~ m/[ip]/ ) { return 0; } else { return 1; } } sub tag_trans_alone { my ( $self, @tag ) = @_; my $name = $self->get_tag_name(@tag); push @path, $name; $name = $self->treat_attributes(@tag); pop @path; return $name; } sub tag_break_open { my ( $self, @tag ) = @_; my $struct = $self->get_path( $self->get_tag_name(@tag) ); my $options = $self->get_translate_options($struct); if ( $options =~ m/[ip]/ ) { return 0; } else { return 1; } } sub tag_trans_open { my ( $self, @tag ) = @_; my $name = $self->get_tag_name(@tag); push @path, $name; $name = $self->treat_attributes(@tag); if ( defined $self->{options}{'addlang'} ) { my $struct = $self->get_path(); if ( $struct eq $self->{options}{'addlang'} ) { $name .= ' lang="' . $self->{TT}{po_in}->{lang} . '"'; } } return $name; } ##### END of Generic XML tag types ##### =head1 INTERNAL FUNCTIONS used to write derivative parsers =head2 WORKING WITH TAGS =over 4 =item get_path() This function returns the path to the current tag from the document's root, in the form EhtmlEEbodyEEpE. An additional array of tags (without brackets) can be passed as argument. These path elements are added to the end of the current path. =cut sub get_path { my $self = shift; my @add = @_; if ( @path > 0 or @add > 0 ) { return "<" . join( "><", @path, @add ) . ">"; } else { return "outside any tag (error?)"; } } =item tag_type() This function returns the index from the tag_types list that fits to the next tag in the input stream, or -1 if it's at the end of the input file. Here, the tag has structure started by E and end by E and it can contain multiple lines. This works on the array C<< @{$self->{TT}{doc_in}} >> holding input document data and reference indirectly via C<< $self->shiftline() >> and C<< $self->unshiftline($$) >>. =cut sub tag_type { my $self = shift; my ( $line, $ref ) = $self->shiftline(); my ( $match1, $match2 ); my $found = 0; my $i = 0; if ( !defined($line) ) { return -1; } $self->unshiftline( $line, $ref ); my ( $eof, @lines ) = $self->get_string_until( ">", { include => 1, unquoted => 1 } ); my $line2 = $self->join_lines(@lines); while ( !$found && $i < @tag_types ) { ( $match1, $match2 ) = ( $tag_types[$i]->{beginning}, $tag_types[$i]->{end} ); if ( $line =~ /^<\Q$match1\E/ ) { if ( !defined( $tag_types[$i]->{f_extract} ) ) { #print substr($line2,length($line2)-1-length($match2),1+length($match2))."\n"; if ( defined($line2) and $line2 =~ /\Q$match2\E>$/ ) { $found = 1; #print "YES: <".$match1." ".$match2.">\n"; } else { #print "NO: <".$match1." ".$match2.">\n"; $i++; } } else { $found = 1; } } else { $i++; } } if ( !$found ) { #It should never enter here, unless you undefine the most #general tags (as <...>) chomp $line; die $ref . ": Unknown tag type: " . $line . "\n"; } else { return $i; } } =item extract_tag($$) This function returns the next tag from the input stream without the beginning and end, in an array form, to maintain the references from the input file. It has two parameters: the type of the tag (as returned by tag_type) and a boolean, that indicates if it should be removed from the input stream. This works on the array C<< @{$self->{TT}{doc_in}} >> holding input document data and reference indirectly via C<< $self->shiftline() >> and C<< $self->unshiftline($$) >>. =cut sub extract_tag { my ( $self, $type, $remove ) = ( shift, shift, shift ); my ( $match1, $match2 ) = ( $tag_types[$type]->{beginning}, $tag_types[$type]->{end} ); my ( $eof, @tag ); if ( defined( $tag_types[$type]->{f_extract} ) ) { # , , , or ( $eof, @tag ) = &{ $tag_types[$type]->{f_extract} }( $self, $remove ); } else { # , , , , or . ( $eof, @tag ) = $self->get_string_until( $match2 . ">", { include => 1, remove => $remove, unquoted => 1 } ); } # Please note even index of array @tag holds actual text of input line # Please note odd index of array @tag holds its reference = $filename:$flinenum $tag[0] =~ /^<\Q$match1\E(.*)$/s; $tag[0] = $1; $tag[ $#tag - 1 ] =~ /^(.*)\Q$match2\E>$/s; $tag[ $#tag - 1 ] = $1; # Please note even index of array @tag holds tag string return ( $eof, @tag ); } =item get_tag_name(@) This function returns the name of the tag passed as an argument, in the array form returned by extract_tag. =cut sub get_tag_name { my ( $self, @tag ) = @_; $tag[0] =~ /^(\S*)/; return $1; } =item breaking_tag() This function returns a boolean that says if the next tag in the input stream is a breaking tag or not (inline tag). It leaves the input stream intact. =cut sub breaking_tag { my $self = shift; my $break; my $type = $self->tag_type; if ( $type == -1 ) { return 0; } $break = $tag_types[$type]->{breaking}; if ( !defined($break) ) { # This tag's breaking depends on its content my ( $eof, @lines ) = $self->extract_tag( $type, 0 ); $break = &{ $tag_types[$type]->{f_breaking} }( $self, @lines ); } # print "TAG TYPE = ".$type." (<".$tag_types[$type]->{beginning}.") break:$break\n"; return $break; } =item treat_tag() This function translates the next tag from the input stream. Using each tag type's custom translation functions. This works on the array C<< @{$self->{TT}{doc_in}} >> holding input document data and reference indirectly via C<< $self->shiftline() >> and C<< $self->unshiftline($$) >>. =cut sub treat_tag { my $self = shift; my $type = $self->tag_type; my ( $match1, $match2 ) = ( $tag_types[$type]->{beginning}, $tag_types[$type]->{end} ); my ( $eof, @lines ) = $self->extract_tag( $type, 1 ); # Please note even index of array @lines holds actual text of input line # Please note odd index of array @lines holds its reference = $filename:$flinenum $lines[0] =~ /^(\s*)(.*)$/s; my $space1 = $1; $lines[0] = $2; $lines[ $#lines - 1 ] =~ /^(.*?)(\s*)$/s; my $space2 = $2; $lines[ $#lines - 1 ] = $1; # Calling this tag type's specific handling (translation of attributes...) my $line = &{ $tag_types[$type]->{f_translate} }( $self, @lines ); print wrap_mod( "po4a::xml::treat_tag", "%s: type=%s <%s%s%s%s%s>", $lines[1], $type, $match1, $space1, $line, $space2, $match2 ) if $self->{options}{'debug'}; $self->pushline( "<" . $match1 . $space1 . $line . $space2 . $match2 . ">" ); return $eof; } =item tag_in_list($@) This function returns a string value that says if the first argument (a tag hierarchy) matches any of the tags from the second argument (a list of tags or tag hierarchies). If it doesn't match, it returns 0. Else, it returns the matched tag's options (the characters in front of the tag) or 1 (if that tag doesn't have options). =back =cut sub tag_in_list ($$$) { my ( $self, $path, $list ) = @_; if ( $self->{options}{'caseinsensitive'} ) { $path = lc $path; } while (1) { if ( defined $list->{$path} ) { if ( length $list->{$path} ) { return $list->{$path}; } else { return 1; } } last unless ( $path =~ m///; } return 0; } =head2 WORKING WITH ATTRIBUTES =over 4 =item treat_attributes(@) This function handles the translation of the tags' attributes. It receives the tag without the beginning / end marks, and then it finds the attributes, and it translates the translatable ones (specified by the module option B). This returns a plain string with the translated tag. =back =cut sub treat_attributes { my ( $self, @tag ) = @_; $tag[0] =~ /^(\S*)(.*)/s; my $text = $1; $tag[0] = $2; while (@tag) { my $complete = 1; $text .= $self->skip_spaces( \@tag ); if (@tag) { # Get the attribute's name $complete = 0; $tag[0] =~ /^([^\s=]+)(.*)/s; my $name = $1; my $ref = $tag[1]; $tag[0] = $2; $text .= $name; $text .= $self->skip_spaces( \@tag ); if (@tag) { # Get the '=' if ( $tag[0] =~ /^=(.*)/s ) { $tag[0] = $1; $text .= "="; $text .= $self->skip_spaces( \@tag ); if (@tag) { # Get the value my $value = ""; $ref = $tag[1]; my $quot = substr( $tag[0], 0, 1 ); if ( $quot ne "\"" and $quot ne "'" ) { # Unquoted value $quot = ""; $tag[0] =~ /^(\S+)(.*)/s; $value = $1; $tag[0] = $2; } else { # Quoted value $text .= $quot; $tag[0] =~ /^\Q$quot\E(.*)/s; $tag[0] = $1; while ( $tag[0] !~ /\Q$quot\E/ ) { $value .= $tag[0]; shift @tag; shift @tag; } $tag[0] =~ /^(.*?)\Q$quot\E(.*)/s; $value .= $1; $tag[0] = $2; } $complete = 1; if ( $self->tag_in_list( $self->get_path . $name, $self->{attributes} ) ) { $text .= $self->found_string( $value, $ref, { type => "attribute", attribute => $name } ); } else { print wrap_mod( "po4a::xml::treat_attributes", dgettext( "po4a", "%s: attribute '%s' is not defined in module option 'attributes' and\n" . ".... is not translated for the attribute path '%s'" ), $ref, $value, $self->get_path . $name ) if $self->{options}{'debug'}; $text .= $value; } $text .= $quot; } } else { # This is an attribute with no '=' sign, nothing to translate $complete = 1; } } unless ($complete) { my $ontagerror = $self->{options}{'ontagerror'}; if ( $ontagerror eq "warn" ) { warn wrap_mod( "po4a::xml::treat_attributes", dgettext( "po4a", "%s: Bad attribute syntax. Continuing…" ), $ref ); } elsif ( $ontagerror ne "silent" ) { die wrap_mod( "po4a::xml::treat_attributes", dgettext( "po4a", "%s: Bad attribute syntax" ), $ref ); } } } } return $text; } # Returns an empty string if the content in the $path should not be # translated. # # Otherwise, returns the set of options for translation: # w: the content shall be re-wrapped # W: the content shall not be re-wrapped # i: the tag shall be inlined # p: a placeholder shall replace the tag (and its content) # n: a custom tag # f: fold attribute # # A translatable inline tag in an untranslated tag is treated as a translatable breaking tag. sub get_translate_options { my $self = shift; my $path = shift; if ( defined $translate_options_cache{$path} ) { # print "option($path)=".$translate_options_cache{$path}." (cached)\n"; return $translate_options_cache{$path}; } my $options = ""; my $translate = 0; my $usedefault = 1; my $inlist = 0; my $tag = $self->get_tag_from_list( $path, $self->{tags} ); if ( defined $tag ) { $inlist = 1; } # Note: tags option is deprecated. --> $inlist should be 0 now if ( $self->{options}{'tagsonly'} eq $inlist ) { # Note: tags option is deprecated. --> $inlist should be 0 now # Default is not to use tagsonly --> You are here. $usedefault = 0; if ( defined $tag ) { $options = $tag; $options =~ s/<.*$//; } else { # Note: tags option is deprecated. --> $tag is undefined # $self->{options}{'wrap'} = 0 ... xml inherent default # $self->{options}{'wrap'} = 1 ... docbook overridden default # This sets all tags unlisted in translated nor untranslated to become translated tag normally if ( $self->{options}{'wrap'} ) { $options = "w"; } else { $options = "W"; } } $translate = 1; } # TODO: a less precise set of tags should not override a more precise one # The tags and tagsonly options are deprecated. # The translated and untranslated options have a higher priority. $tag = $self->get_tag_from_list( $path, $self->{translated} ); if ( defined $tag ) { $usedefault = 0; $options = $tag; $options =~ s/<.*$//; $translate = 1; } if ( $translate and $options !~ m/w/i ) { $options .= ( $self->{options}{'wrap'} ) ? "w" : "W"; } if ( not defined $tag ) { $tag = $self->get_tag_from_list( $path, $self->{untranslated} ); if ( defined $tag ) { $usedefault = 0; $options = ""; $translate = 0; } } $tag = $self->get_tag_from_list( $path, $self->{inline} ); if ( defined $tag ) { $usedefault = 0; $options .= "i"; } else { $tag = $self->get_tag_from_list( $path, $self->{placeholder} ); if ( defined $tag ) { $usedefault = 0; $options .= "p"; } } $tag = $self->get_tag_from_list( $path, $self->{customtag} ); if ( defined $tag ) { $usedefault = 0; $options = "in"; # This erases any other setting } if ($usedefault) { $options = $self->{options}{'defaulttranslateoption'}; } # A translatable inline tag in an untranslated tag is treated as a # translatable breaking tag. if ( $options =~ m/i/ ) { my $ppath = $path; $ppath =~ s/<[^>]*>$//; my $poptions = $self->get_translate_options($ppath); if ( $poptions eq "" ) { $options =~ s/i//; print wrap_mod( "po4a::xml::get_translate_options", dgettext( "po4a", "%s: translation option='%s'.\n *** the original translation option is overridden here since parent path='%s' is untranslated," ), $path, $options, $ppath ) if $self->{options}{'debug'}; } } if ( $options =~ m/i/ and $self->{options}{'foldattributes'} ) { $options .= "f"; } if ( $options !~ m/i/ and $self->{options}{'foldattributes'} ) { print wrap_mod( "po4a::xml::get_translate_options", dgettext( "po4a", "%s: foldattributes setting ignored since '%s' is not inline tag" ), $path, $tag ) if $self->{options}{'debug'}; } $translate_options_cache{$path} = $options; # print "option($path)=".$translate_options_cache{$path}." (new)\n"; #print wrap_mod("po4a::xml::get_translate_options", dgettext ("po4a", "%s: options: '%s'"), $path, $options) if $self->{options}{'debug'}; return $options; } # Return the tag (or biggest set of tags) of a list which matches with the # given path. # # The tag (or set of tags) is returned with its options. # # If no tags could match the path, undef is returned. sub get_tag_from_list ($$$) { my ( $self, $path, $list ) = @_; if ( $self->{options}{'caseinsensitive'} ) { $path = lc $path; } while (1) { if ( defined $list->{$path} ) { return $list->{$path} . $path; } last unless ( $path =~ m///; } return undef; } =head2 WORKING WITH TAGGED CONTENTS =over 4 =item treat_content() This function gets the text until the next breaking tag (not inline) from the input stream. Translate it using each tag type's custom translation functions. This works on the array C<< @{$self->{TT}{doc_in}} >> holding input document data and reference indirectly via C<< $self->shiftline() >> and C<< $self->unshiftline($$) >>. =back =cut sub treat_content { my $self = shift; my $blank = ""; # Indicates if the paragraph will have to be translated my $translate = ""; my ( $eof, @paragraph ) = $self->get_string_until( '<', { remove => 1 } ); # Please note even index of array @paragraph holds actual text of input line # Please note odd index of array @paragraph holds its reference = $filename:$flinenum while ( !$eof and !$self->breaking_tag ) { NEXT_TAG: # Loop if tag is , , , , or my @text; my $type = $self->tag_type; my $f_extract = $tag_types[$type]->{'f_extract'}; if ( defined($f_extract) and $f_extract eq \&tag_extract_comment ) { # if tag is or , remove this tag from the # input stream and save its content to @comments for use by # translate_paragraph. print wrap_mod( "po4a::xml::treat_content", "%s: type='%s'", $paragraph[1], $type ) if $self->{options}{'debug'}; ( $eof, @text ) = $self->extract_tag( $type, 1 ); # Add "\0" to mark end of each separate comment $text[ $#text - 1 ] .= "\0"; if ( $tag_types[$type]->{'beginning'} eq "!--#" ) { $text[0] = "#" . $text[0]; } push @comments, @text; } else { # if tag is , , or , get its tag name # alone in @tag without touching the input stream, then get this # whole tag with attributes in @text while removing this whole tag # from the input stream for use by translate_paragraph. my ( $tmpeof, @tag ) = $self->extract_tag( $type, 0 ); # Append the found inline tag ( $eof, @text ) = $self->get_string_until( '>', { include => 1, remove => 1, unquoted => 1 } ); # print "cur: ".$self->get_tag_name(@tag)."\n"; # Append or remove the opening/closing tag from the tag path if ( $tag_types[$type]->{'end'} eq "" ) { if ( $tag_types[$type]->{'beginning'} eq "" ) { $self->treat_content_open_tag( \@tag, \@paragraph, \@text ); } elsif ( $tag_types[$type]->{'beginning'} eq "/" ) { $self->treat_content_close_tag( \@tag, \@paragraph, \@text ); } } elsif ( $tag_types[$type]->{'beginning'} eq "" && $tag_types[$type]->{'end'} eq "/" ) { # As for empty-element tag, # treat as if both open and close tags exist $self->treat_content_open_tag( \@tag, \@paragraph, \@text ); $self->treat_content_close_tag( \@tag, \@paragraph, \@text ); } push @paragraph, @text; } # Next tag ( $eof, @text ) = $self->get_string_until( '<', { remove => 1 } ); if ( $#text > 0 ) { # Check if text (extracted after the inline tag) # has to be translated push @paragraph, @text; } } # This strips the extracted strings # (only if you don't specify the 'nostrip' option, and if the # paragraph can be re-wrapped) $translate = $self->get_translate_options( $self->get_path ); if ( !$self->{options}{'nostrip'} and $translate !~ m/W/ ) { my $clean = 0; # Clean the beginning while ( !$clean and $#paragraph > 0 ) { $paragraph[0] =~ /^(\s*)(.*)/s; my $match = $1; if ( $paragraph[0] eq $match ) { if ( $match ne "" ) { $self->pushline($match); } shift @paragraph; shift @paragraph; } else { $paragraph[0] = $2; if ( $match ne "" ) { $self->pushline($match); } $clean = 1; } } $clean = 0; # Clean the end while ( !$clean and $#paragraph > 0 ) { $paragraph[ $#paragraph - 1 ] =~ /^(.*?)(\s*)$/s; my $match = $2; if ( $paragraph[ $#paragraph - 1 ] eq $match ) { if ( $match ne "" ) { $blank = $match . $blank; } pop @paragraph; pop @paragraph; } else { $paragraph[ $#paragraph - 1 ] = $1; if ( $match ne "" ) { $blank = $match . $blank; } $clean = 1; } } } # Translate the string when needed # This will either push the translation in the translated document or # in the current holder translation. $self->translate_paragraph(@paragraph); # Push the trailing blanks if ( $blank ne "" ) { $self->pushline($blank); } return $eof; } # Processes open tags during getting texts. # Performs special process for placeholder and attribute folding. sub treat_content_open_tag { my $self = shift; my ( $tag, $paragraph, $text ) = @_; # tag is my $cur_tag_name = $self->get_tag_name(@$tag); my $t_opts = $self->get_translate_options( $self->get_path($cur_tag_name) ); if ( $t_opts =~ m/p/ ) { # tag has a placeholder option, append a "" tag to @$paragraph. # using $self->get_tag_name(@$tag) as cur_tag_name and # using $#{$save_holders[$#save_holders]->{'sub_translations'}} + 1 # as id_index my $last_holder = $save_holders[$#save_holders]; my $placeholder_str = "{'sub_translations'} } + 1 ) . "\"/>"; push @$paragraph, ( $placeholder_str, $text->[1] ); my @saved_paragraph = @$paragraph; $last_holder->{'paragraph'} = \@saved_paragraph; # make attributes be able to be translated my $open_tag = $self->join_lines(@$text); if ( $open_tag =~ m/^<(\s*)(\S+\s+\S.*)>$/s ) { my ( $ws, $tag_inner ) = ( $1, $2 ); $tag_inner =~ s|(\s*/)$||; my $postfix = $1; push @path, $cur_tag_name; $open_tag = "<" . $ws . $self->treat_attributes($tag_inner) . $postfix . ">"; pop @path; } # Then we must push a new holder into @save_holders my @new_paragraph = (); my @sub_translations = (); my %folded_attributes; my %new_holder = ( 'paragraph' => \@new_paragraph, 'open' => $open_tag, 'translation' => "", 'close' => undef, 'sub_translations' => \@sub_translations, 'folded_attributes' => \%folded_attributes ); push @save_holders, \%new_holder; # reset @$text holding the whole tag with attributes # to empty @$text = (); # reset the current @$paragraph (for the current holder) # to empty. @$paragraph = (); } elsif ( $t_opts =~ m/f/ ) { # tag has a "f" option for folded attributes my $tag_full = $self->join_lines(@$text); my $tag_ref = $text->[1]; if ( $tag_full =~ m/^<(\s*)(\S+\s+\S.*)>$/s ) { my ( $ws, $tag_inner ) = ( $1, $2 ); my $holder = $save_holders[$#save_holders]; my $id = 0; foreach ( keys %{ $holder->{folded_attributes} } ) { $id = $_ + 1 if ( $_ >= $id ); } # make attributes be able to be translated $tag_inner =~ s|(\s*/)$||; my $postfix = $1; push @path, $cur_tag_name; $holder->{folded_attributes}->{$id} = "<" . $ws . $self->treat_attributes($tag_inner) . $postfix . ">"; pop @path; @$text = ( "<$cur_tag_name po4a-id=$id>", $tag_ref ); } } unless ( $t_opts =~ m/n/ ) { # unless "n" for custom (such as non-XML HTML) tag, update @path push @path, $cur_tag_name; } } # Processes close tags during getting texts. # Performs special process for placeholder. sub treat_content_close_tag { my $self = shift; my ( $tag, $paragraph, $text ) = @_; # tag is # Verify this closing tag matches with the last opening tag # while removing the last opening tag in @path my $test = pop @path; my $name = $self->get_tag_name(@$tag); if ( !defined($test) || $test ne $name ) { my $ontagerror = $self->{options}{'ontagerror'}; if ( $ontagerror eq "warn" ) { warn wrap_ref_mod( $tag->[1], "po4a::xml", dgettext( "po4a", "Unexpected closing tag found. The main document may be wrong. Continuing…" ), $name ); } elsif ( $ontagerror ne "silent" ) { die wrap_ref_mod( $tag->[1], "po4a::xml", dgettext( "po4a", "Unexpected closing tag found. The main document may be wrong." ), $name ); } } if ( $self->get_translate_options( $self->get_path( $self->get_tag_name(@$tag) ) ) =~ m/p/ ) { # this closing tag has a placeholder option # revert @path to include this tag for translate_paragraph push @path, $self->get_tag_name(@$tag); # Now translate this paragraph if needed. # This will call pushline and append the # translation to the current holder's translation. $self->translate_paragraph(@$paragraph); # remove this tag from @path pop @path; # Now that this holder is closed, we can remove # the holder from the stack. my $holder = pop @save_holders; # We need to keep the translation of this holder my $translation = $holder->{'open'} . $holder->{'translation'}; $translation .= $self->join_lines(@$text); @$text = (); # Then we store the translation in the previous # holder's sub_translations array my $previous_holder = $save_holders[$#save_holders]; push @{ $previous_holder->{'sub_translations'} }, $translation; # We also need to restore the @$paragraph array, as # it was before we encountered the holder. @$paragraph = @{ $previous_holder->{'paragraph'} }; } } # Translate a @paragraph array of (string, reference). # The $translate argument indicates if the strings must be translated or # just pushed sub translate_paragraph { my $self = shift; my @paragraph = @_; my $translate = $self->get_translate_options( $self->get_path ); while ( ( scalar @paragraph ) and ( $paragraph[0] =~ m/^\s*\n/s ) ) { $self->pushline( $paragraph[0] ); shift @paragraph; shift @paragraph; } my $comments; while (@comments) { my ( $comment, $eoc ); do { my ( $t, $l ) = ( shift @comments, shift @comments ); $t =~ s/\n?(\0)?$//; $eoc = $1; $comment .= "\n" if defined $comment; $comment .= $t; } until ($eoc); $comments .= "\n" if defined $comments; $comments .= $comment; $self->pushline( "\n" ) if defined $comment; } @comments = (); if ( $self->{options}{'cpp'} ) { my @tmp = @paragraph; @paragraph = (); while (@tmp) { my ( $t, $l ) = ( shift @tmp, shift @tmp ); # #include can be followed by a filename between # <> brackets. In that case, the argument won't be # handled in the same call to translate_paragraph. # Thus do not try to match "include ". if ( $t =~ m/^#[ \t]*(if |endif|undef |include|else|ifdef |ifndef |define )/si ) { if (@paragraph) { $self->translate_paragraph(@paragraph); @paragraph = (); $self->pushline("\n"); } $self->pushline($t); } else { push @paragraph, ( $t, $l ); } } } my $para = $self->join_lines(@paragraph); if ( length($para) > 0 ) { if ( $translate ne "" ) { # This tag should be translated print wrap_mod( "po4a::xml::translate_paragraph", "%s: path='%s', translation option='%s'", $paragraph[1], $self->get_path, $translate ) if $self->{options}{'debug'}; $self->pushline( $self->found_string( $para, $paragraph[1], { type => "tag", tag_options => $translate, comments => $comments } ) ); } else { # Inform that this tag isn't translated in debug mode print wrap_mod( "po4a::xml::translate_paragraph", "%s: path='%s', translation option='%s' (no translation)", $paragraph[1], $self->get_path, $translate ) if $self->{options}{'debug'}; $self->pushline($para); } } # Now the paragraph is fully translated. # If we have all the holders' translation, we can replace the # placeholders by their translations. # We must wait to have all the translations because the holders are # numbered. { my $holder = $save_holders[$#save_holders]; my $translation = $holder->{'translation'}; # Count the number of in $translation my $count = 0; my $str = $translation; while ( ( defined $str ) and ( $str =~ m/^.*?]*>(.*)$/s ) ) { die wrap_mod( "po4a::xml", dgettext( "po4a", "Invalid placeholder in the translation (the 'type' and 'id' must be present, in this order).\n%s\n\nPlease fix your translation." ), $str ) unless ( $str =~ m/^.*?(.*)$/s ); $count += 1; $str = $2; if ( $holder->{'sub_translations'}->[$1] =~ m//s ) { $count = -1; last; } } if ( ( defined $translation ) and ( scalar( @{ $holder->{'sub_translations'} } ) == $count ) ) { # OK, all the holders of the current paragraph are # closed (and translated). # Replace them by their translation. while ( $translation =~ m/^(.*?)(.*)$/s ) { # FIXME: we could also check that # * the holder exists # * all the holders are used $translation = $1 . $holder->{'sub_translations'}->[$2] . $3; } # We have our translation $holder->{'translation'} = $translation; # And there is no need for any holder in it. my @sub_translations = (); $holder->{'sub_translations'} = \@sub_translations; } } } =head2 WORKING WITH THE MODULE OPTIONS =over 4 =item treat_options() This function fills the internal structures that contain the tags, attributes and inline data with the options of the module (specified in the command-line or in the initialize function). =back =cut sub treat_options { my $self = shift; if ( $self->{options}{'caseinsensitive'} ) { $self->{options}{'nodefault'} = lc $self->{options}{'nodefault'}; $self->{options}{'tags'} = lc $self->{options}{'tags'}; $self->{options}{'break'} = lc $self->{options}{'break'}; $self->{options}{'_default_break'} = lc $self->{options}{'_default_break'}; $self->{options}{'translated'} = lc $self->{options}{'translated'}; $self->{options}{'_default_translated'} = lc $self->{options}{'_default_translated'}; $self->{options}{'untranslated'} = lc $self->{options}{'untranslated'}; $self->{options}{'_default_untranslated'} = lc $self->{options}{'_default_untranslated'}; $self->{options}{'attributes'} = lc $self->{options}{'attributes'}; $self->{options}{'_default_attributes'} = lc $self->{options}{'_default_attributes'}; $self->{options}{'inline'} = lc $self->{options}{'inline'}; $self->{options}{'_default_inline'} = lc $self->{options}{'_default_inline'}; $self->{options}{'placeholder'} = lc $self->{options}{'placeholder'}; $self->{options}{'_default_placeholder'} = lc $self->{options}{'_default_placeholder'}; $self->{options}{'customtag'} = lc $self->{options}{'customtag'}; $self->{options}{'_default_customtag'} = lc $self->{options}{'_default_customtag'}; } $self->{options}{'nodefault'} =~ /^\s*(.*)\s*$/s; my %list_nodefault; foreach ( split( /\s+/s, $1 ) ) { $list_nodefault{$_} = 1; } $self->{nodefault} = \%list_nodefault; $self->{options}{'tags'} =~ /^\s*(.*)\s*$/s; if ( length $self->{options}{'tags'} ) { warn wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories." ), "tags" ); } foreach ( split( /\s+/s, $1 ) ) { $_ =~ m/^(.*?)(<.*)$/; $self->{tags}->{$2} = $1 || ""; } if ( $self->{options}{'tagsonly'} ) { warn wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories." ), "tagsonly" ); } $self->{options}{'break'} =~ /^\s*(.*)\s*$/s; foreach my $tag ( split( /\s+/s, $1 ) ) { $tag =~ m/^(.*?)(<.*)$/; $self->{break}->{$2} = $1 || ""; } $self->{options}{'_default_break'} =~ /^\s*(.*)\s*$/s; foreach my $tag ( split( /\s+/s, $1 ) ) { $tag =~ m/^(.*?)(<.*)$/; $self->{break}->{$2} = $1 || "" unless $list_nodefault{$2} or defined $self->{break}->{$2}; } $self->{options}{'translated'} =~ /^\s*(.*)\s*$/s; foreach my $tag ( split( /\s+/s, $1 ) ) { $tag =~ m/^(.*?)(<.*)$/; $self->{translated}->{$2} = $1 || ""; } $self->{options}{'_default_translated'} =~ /^\s*(.*)\s*$/s; foreach my $tag ( split( /\s+/s, $1 ) ) { $tag =~ m/^(.*?)(<.*)$/; $self->{translated}->{$2} = $1 || "" unless $list_nodefault{$2} or defined $self->{translated}->{$2}; } $self->{options}{'untranslated'} =~ /^\s*(.*)\s*$/s; foreach my $tag ( split( /\s+/s, $1 ) ) { $tag =~ m/^(.*?)(<.*)$/; $self->{untranslated}->{$2} = $1 || ""; } $self->{options}{'_default_untranslated'} =~ /^\s*(.*)\s*$/s; foreach my $tag ( split( /\s+/s, $1 ) ) { $tag =~ m/^(.*?)(<.*)$/; $self->{untranslated}->{$2} = $1 || "" unless $list_nodefault{$2} or defined $self->{untranslated}->{$2}; } $self->{options}{'attributes'} =~ /^\s*(.*)\s*$/s; foreach my $tag ( split( /\s+/s, $1 ) ) { if ( $tag =~ m/^(.*?)(<.*)$/ ) { $self->{attributes}->{$2} = $1 || ""; } else { $self->{attributes}->{$tag} = ""; } } $self->{options}{'_default_attributes'} =~ /^\s*(.*)\s*$/s; foreach my $tag ( split( /\s+/s, $1 ) ) { if ( $tag =~ m/^(.*?)(<.*)$/ ) { $self->{attributes}->{$2} = $1 || "" unless $list_nodefault{$2} or defined $self->{attributes}->{$2}; } else { $self->{attributes}->{$tag} = "" unless $list_nodefault{$tag} or defined $self->{attributes}->{$tag}; } } $self->{options}{'inline'} =~ /^\s*(.*)\s*$/s; foreach my $tag ( split( /\s+/s, $1 ) ) { $tag =~ m/^(.*?)(<.*)$/; $self->{inline}->{$2} = $1 || ""; } $self->{options}{'_default_inline'} =~ /^\s*(.*)\s*$/s; foreach my $tag ( split( /\s+/s, $1 ) ) { $tag =~ m/^(.*?)(<.*)$/; $self->{inline}->{$2} = $1 || "" unless $list_nodefault{$2} or defined $self->{inline}->{$2}; } $self->{options}{'placeholder'} =~ /^\s*(.*)\s*$/s; foreach my $tag ( split( /\s+/s, $1 ) ) { $tag =~ m/^(.*?)(<.*)$/; $self->{placeholder}->{$2} = $1 || ""; } $self->{options}{'_default_placeholder'} =~ /^\s*(.*)\s*$/s; foreach my $tag ( split( /\s+/s, $1 ) ) { $tag =~ m/^(.*?)(<.*)$/; $self->{placeholder}->{$2} = $1 || "" unless $list_nodefault{$2} or defined $self->{placeholder}->{$2}; } $self->{options}{'customtag'} =~ /^\s*(.*)\s*$/s; foreach my $tag ( split( /\s+/s, $1 ) ) { $tag =~ m/^(.*?)(<.*)$/; $self->{customtag}->{$2} = $1 || ""; } $self->{options}{'_default_customtag'} =~ /^\s*(.*)\s*$/s; foreach my $tag ( split( /\s+/s, $1 ) ) { $tag =~ m/^(.*?)(<.*)$/; $self->{customtag}->{$2} = $1 || "" unless $list_nodefault{$2} or defined $self->{customtag}->{$2}; } # If break-pi is provided, we should ensure that: $tag_types[the one of PI]->breaking is 1 if ( $self->{options}{'break-pi'} ) { for ( my $i = 0 ; $i < @tag_types ; $i++ ) { if ( $tag_types[$i]->{beginning} eq '?' && $tag_types[$i]->{end} eq '?' ) { $tag_types[$i]->{breaking} = 1; } } } foreach my $tagtype (qw(untranslated)) { foreach my $tag ( sort keys %{ $self->{$tagtype} } ) { warn "po4a::xml::treat_options: WARN: tag='$tag' is %s tag, translation option='$self->{$tagtype}->{$tag}' is ignores wW.\n" if $self->{$tagtype}->{$tag} =~ m/wW/; } } foreach my $tagtype (qw(inline break placeholder customtag)) { foreach my $tag ( sort keys %{ $self->{$tagtype} } ) { die "po4a::xml::treat_options: WARN: tag='$tag' is %s tag, translation option='$self->{$tagtype}->{$tag}' is ignored.\n" if $self->{$tagtype}->{$tag} ne ""; } } foreach my $tagtype (qw(attributes)) { foreach my $tag ( sort keys %{ $self->{$tagtype} } ) { warn "po4a::xml::treat_options: WARN: tag='$tag' is %s tag, translation option='$self->{$tagtype}->{$tag}' is ignored.\n" if $self->{$tagtype}->{$tag} ne ""; } } # Debug output of internal parameters for generic XML parser # Marked content of a XML tag can be either "translated" or "untranslated". # -- XML tags in these may specify options: wWip # Extraction of XML content can be one of "inline", "break", "placeholder", or "customtag". # -- XML tags in these must not specify options if ( $self->{options}{'debug'} ) { foreach my $tagtype (qw(translated untranslated)) { foreach my $tag ( sort keys %{ $self->{$tagtype} } ) { print "po4a::xml::treat_options: $tag: translation option='$self->{$tagtype}->{$tag}' (original), but listed in '$tagtype'"; foreach my $tagtype1 (qw(inline break placeholder customtag)) { if ( exists $self->{$tagtype1}->{$tag} ) { print " / '$tagtype1'"; } } print "\n"; print wrap_mod( "po4a::xml::treat_options", "%s: translation option='%s' (valid)", $tag, $self->get_translate_options($tag) ); } } foreach my $tag ( sort keys %{ $self->{'attributes'} } ) { print "po4a::xml::treat_options: $tag: translated attributes.\n"; } } # There should be no translated and untranslated tags foreach my $tag ( keys %{ $self->{translated} } ) { die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ), $tag, "translated", "untranslated" ) if defined $self->{untranslated}->{$tag}; } # There should be no inline, break, placeholder, and customtag tags foreach my $tag ( keys %{ $self->{inline} } ) { die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ), $tag, "inline", "break" ) if defined $self->{break}->{$tag}; die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ), $tag, "inline", "placeholder" ) if defined $self->{placeholder}->{$tag}; die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ), $tag, "inline", "customtag" ) if defined $self->{customtag}->{$tag}; } foreach my $tag ( keys %{ $self->{break} } ) { die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ), $tag, "break", "placeholder" ) if defined $self->{placeholder}->{$tag}; die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ), $tag, "break", "customtag" ) if defined $self->{customtag}->{$tag}; } foreach my $tag ( keys %{ $self->{placeholder} } ) { die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ), $tag, "placeholder", "customtag" ) if defined $self->{customtag}->{$tag}; } } =head2 GETTING TEXT FROM THE INPUT DOCUMENT =over =item get_string_until($%) This function returns an array with the lines (and references) from the input document until it finds the first argument. The second argument is an options hash. Value 0 means disabled (the default) and 1, enabled. The valid options are: =over 4 =item B This makes the returned array to contain the searched text =item B This removes the returned stream from the input =item B This ensures that the searched text is outside any quotes =item B This denotes that the first argument is a regular expression rather than an plain string =back =cut sub get_string_until { my ( $self, $search ) = ( shift, shift ); my $options = shift; my ( $include, $remove, $unquoted, $regex ) = ( 0, 0, 0, 0 ); if ( defined( $options->{include} ) ) { $include = $options->{include}; } if ( defined( $options->{remove} ) ) { $remove = $options->{remove}; } if ( defined( $options->{unquoted} ) ) { $unquoted = $options->{unquoted}; } if ( defined( $options->{regex} ) ) { $regex = $options->{regex}; } my ( $line, $ref ) = $self->shiftline(); my ( @text, $paragraph ); my ( $eof, $found ) = ( 0, 0 ); $search = "\Q$search\E" unless $regex; while ( defined($line) and !$found ) { push @text, ( $line, $ref ); $paragraph .= $line; if ($unquoted) { if ( $paragraph =~ /^((\".*?\")|(\'.*?\')|[^\"\'])*$search/s ) { $found = 1; } } else { if ( $paragraph =~ /$search/s ) { $found = 1; } } if ( !$found ) { ( $line, $ref ) = $self->shiftline(); } } if ( !defined($line) ) { $eof = 1; } if ($found) { $line = ""; if ($unquoted) { $paragraph =~ /^(?:(?:\".*?\")|(?:\'.*?\')|[^\"\'])*?$search(.*)$/s; $line = $1; $text[ $#text - 1 ] =~ s/\Q$line\E$//s; } else { $paragraph =~ /$search(.*)$/s; $line = $1; $text[ $#text - 1 ] =~ s/\Q$line\E$//s; } if ( !$include ) { $text[ $#text - 1 ] =~ /^(.*)($search.*)$/s; $text[ $#text - 1 ] = $1; $line = $2 . $line; } if ( defined($line) and ( $line ne "" ) ) { $self->unshiftline( $line, $text[$#text] ); } } if ( !$remove ) { $self->unshiftline(@text); } #If we get to the end of the file, we return the whole paragraph return ( $eof, @text ); } =item skip_spaces(\@) This function receives as argument the reference to a paragraph (in the format returned by get_string_until), skips his heading spaces and returns them as a simple string. =cut sub skip_spaces { my ( $self, $pstring ) = @_; my $space = ""; while ( @$pstring and ( @$pstring[0] =~ /^(\s+)(.*)$/s or @$pstring[0] eq "" ) ) { if ( @$pstring[0] ne "" ) { $space .= $1; @$pstring[0] = $2; } if ( @$pstring[0] eq "" ) { shift @$pstring; shift @$pstring; } } return $space; } =item join_lines(@) This function returns a simple string with the text from the argument array (discarding the references). =cut sub join_lines { my ( $self, @lines ) = @_; my ( $line, $ref ); my $text = ""; while ( $#lines > 0 ) { ( $line, $ref ) = ( shift @lines, shift @lines ); $text .= $line; } return $text; } =back =head1 STATUS OF THIS MODULE This module can translate tags and attributes. =head1 TODO LIST DOCTYPE (ENTITIES) There is a minimal support for the translation of entities. They are translated as a whole, and tags are not taken into account. Multilines entities are not supported and entities are always rewrapped during the translation. MODIFY TAG TYPES FROM INHERITED MODULES (move the tag_types structure inside the $self hash?) =head1 SEE ALSO L, L =head1 AUTHORS Jordi Vilalta Nicolas François =head1 COPYRIGHT AND LICENSE Copyright © 2004 Jordi Vilalta Copyright © 2008-2009 Nicolas François This program is free software; you may redistribute it and/or modify it under the terms of GPL v2.0 or later (see the COPYING file). =cut 1;