diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-17 11:26:17 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-17 11:26:17 +0000 |
commit | 5df6c2aefebe3d2abcc939a88e294876d59f03ca (patch) | |
tree | 63fb332a0f21ddb91cb789c80cf64e134d373463 /lib/Locale/Po4a | |
parent | Initial commit. (diff) | |
download | po4a-5df6c2aefebe3d2abcc939a88e294876d59f03ca.tar.xz po4a-5df6c2aefebe3d2abcc939a88e294876d59f03ca.zip |
Adding upstream version 0.72.upstream/0.72
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lib/Locale/Po4a')
27 files changed, 20701 insertions, 0 deletions
diff --git a/lib/Locale/Po4a/AsciiDoc.pm b/lib/Locale/Po4a/AsciiDoc.pm new file mode 100644 index 0000000..6fe1cd1 --- /dev/null +++ b/lib/Locale/Po4a/AsciiDoc.pm @@ -0,0 +1,1449 @@ +#!/usr/bin/perl -w + +# http://asciidoc.org/userguide.html + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::AsciiDoc - convert AsciiDoc documents from/to PO files + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::AsciiDoc is a module to help the translation of documentation in +the AsciiDoc format. + +=cut + +package Locale::Po4a::AsciiDoc; + +use 5.16.0; +use strict; +use warnings; + +require Exporter; +use vars qw(@ISA @EXPORT); +@ISA = qw(Locale::Po4a::TransTractor); +@EXPORT = qw(); + +use Locale::Po4a::TransTractor; +use Locale::Po4a::Common; +use YAML::Tiny; + +=head1 OPTIONS ACCEPTED BY THIS MODULE + +These are this module's particular options: + +=over + +=item B<definitions> + +The name of a file containing definitions for po4a, as defined in the +B<INLINE CUSTOMIZATION> section. +You can use this option if it is not possible to put the definitions in +the document being translated. + +In a definitions file, lines must not start by two slashes, but directly +by B<po4a:>. + +=item B<entry> + +Space-separated list of attribute entries you want to translate. By default, +no attribute entries are translatable. + +=item B<macro> + +Space-separated list of macro definitions. + +=item B<style> + +Space-separated list of style definitions. + +=item B<forcewrap> + +Enable automatic line wrapping in non-verbatim blocks, even if the +result could be misinterpreted by AsciiDoc formatters. + +By default, po4a will not wrap the produced AsciiDoc files because a +manual inspection is mandated to ensure that the wrapping does not +change the formatting. Consider for instance the following list +item: + + * a long sentence that is ending with a number 1. A second sentence. + +If the wrapping leads to the following presentation, the item is +split into a numbered sub-list. To make things worse, only the +speakers of the language used in the translation can inspect the +situation. + + * a long sentence that is ending with a number + 1. A second sentence. + +Note that not wrapping the files produced by po4a should not be a +problem since those files are meant to be processed automatically. +They should not be regarded as source files anyway. + +With this option, po4a will produce better-looking AsciiDoc files, but it +may lead to possibly erroneous formatted outputs. + +=item B<noimagetargets> + +By default, the targets of block images are translatable to give opportunity +to make the content point to translated images. This can be stopped by setting +this option. + +=item B<tablecells> + +This option is a flag that enables sub-table segmentation into cell content. +The segmentation is limited to cell content, without any parsing inside of it. + +=item B<compat> + +Switch parsing rules to compatibility with different tools. Available options are +"asciidoc" or "asciidoctor". Asciidoctor has stricter parsing rules, such as +equality of length of opening and closing block fences. + +=item B<nolinting> + +Disable linting messages. When the source code cannot be fixed for clearer document structure, these messages are useless. + +=item B<cleanspaces> + +Remove extra spaces from the source segments in no-wrap mode. This is useful when the +translation tools are sensitive to the number of spaces. + +=item B<yfm_keys> + +Comma-separated list of keys to process for translation in the YAML Front Matter +section. All other keys are skipped. Keys are matched with a case-sensitive +match. If B<yfm_paths> and B<yfm_keys> are used together, values are included if +they are matched by at least one of the options. Array values are always translated, +unless the B<yfm_skip_array> option is provided. + +=cut + +my %yfm_keys = (); + +=item B<yfm_skip_array> + +Do not translate array values in the YAML Front Matter section. + +=cut + +my $yfm_skip_array = 0; + +=item B<yfm_paths> + +Comma-separated list of hash paths to process for extraction in the YAML +Front Matter section, all other paths are skipped. Paths are matched with a +case-sensitive match. If B<yfm_paths> and B<yfm_keys> are used together, +values are included if they are matched by at least one of the options. +Arrays values are always returned unless the B<yfm_skip_array> option is +provided. + +=cut + +my %yfm_paths = (); + +=back + +=head1 INLINE CUSTOMIZATION + +The AsciiDoc module can be customized with lines starting by B<//po4a:>. +These lines are interpreted as commands to the parser. +The following commands are recognized: + +=over 4 + +=item B<//po4a: macro >I<name>B<[>I<attribute list>B<]> + +This describes in detail the parameters of a B<macro>; +I<name> must be a valid macro name, and it ends with an underscore +if the target must be translated. + +The I<attribute list> argument is a comma separated list which +contains information about translatable arguments. This list contains +either numbers, to define positional parameters, or named attributes. + +If a plus sign (B<+>) is prepended to I<name>, then the macro and its +arguments are translated as a whole. There is no need to define +attribute list in this case, but brackets must be present. + +=item B<//po4a: style >B<[>I<attribute list>B<]> + +This describes in detail which attributes of a style must +be translated. + +The I<attribute list> argument is a comma separated list which +contains information about translatable arguments. This list contains +either numbers, to define positional parameters, or named attributes. +The first attribute is the style name, it will not be translated. + +If a plus sign (B<+>) is prepended to the style name, then the +attribute list is translated as a whole. There is no need to define +translatable attributes. + +If a minus sign (B<->) is prepended to the style name, then this +attribute is not translated. + +=item B<//po4a: entry >I<name> + +This declares an attribute entry as being translatable. By default, +they are not translated. + +=back + +=cut + +my @comments = (); + +my %debug = ( + 'split_attributelist' => 0, + 'join_attributelist' => 0, + 'parse' => 0, +); + +sub initialize { + my $self = shift; + my %options = @_; + + $self->{options}{'nobullets'} = 1; + $self->{options}{'forcewrap'} = 0; + $self->{options}{'debug'} = ''; + $self->{options}{'verbose'} = 1; + $self->{options}{'entry'} = ''; + $self->{options}{'macro'} = ''; + $self->{options}{'style'} = ''; + $self->{options}{'definitions'} = ''; + $self->{options}{'noimagetargets'} = 0; + $self->{options}{'tablecells'} = 0; + $self->{options}{'compat'} = 'asciidoc'; + $self->{options}{'yfm_keys'} = ''; + $self->{options}{'yfm_skip_array'} = 0; + $self->{options}{'yfm_paths'} = ''; + $self->{options}{'nolinting'} = 0; + $self->{options}{'cleanspaces'} = 0; + + foreach my $opt ( keys %options ) { + die wrap_mod( "po4a::asciidoc", dgettext( "po4a", "Unknown option: %s" ), $opt ) + unless exists $self->{options}{$opt}; + $self->{options}{$opt} = $options{$opt}; + } + + my $compat = $self->{options}{'compat'}; + die wrap_mod( "po4a::asciidoc", + dgettext( "po4a", "Invalid compatibility setting: '%s'. It must be either '%s' or '%s'." ), + $compat, 'asciidoc', 'asciidoctor' ) + if ( defined $compat && $compat ne "asciidoc" && $compat ne "asciidoctor" ); + + if ( $options{'debug'} ) { + foreach ( $options{'debug'} ) { + $debug{$_} = 1; + } + } + map { + $_ =~ s/^\s+|\s+$//g; # Trim the keys before using them + $yfm_keys{$_} = 1 + } ( split( ',', $self->{options}{'yfm_keys'} ) ); + map { + $_ =~ s/^\s+|\s+$//g; # Trim the keys before using them + $yfm_paths{$_} = 1 + } ( split( ',', $self->{options}{'yfm_paths'} ) ); + + $yfm_skip_array = $self->{options}{'yfm_skip_array'}; + + $self->{translate} = { + macro => {}, + style => {}, + entry => {} + }; + + $self->register_attributelist('[verse,2,3,attribution,citetitle]'); + $self->register_attributelist('[quote,2,3,attribution,citetitle]'); + $self->register_attributelist('[icon]'); + $self->register_attributelist('[caption]'); + $self->register_attributelist('[-icons,caption]'); + $self->register_macro('image_[1,alt,title,link]') unless $self->{options}{'noimagetargets'}; + $self->register_macro('indexterm[1,2,3]') unless $self->{options}{'noimagetargets'}; + + if ( $self->{options}{'definitions'} ) { + $self->parse_definition_file( $self->{options}{'definitions'} ); + } + $self->{options}{entry} =~ s/^\s*//; + foreach my $attr ( split( /\s+/, $self->{options}{entry} ) ) { + $self->{translate}->{entry}->{$attr} = 1; + } + $self->{options}{macro} =~ s/^\s*//; + foreach my $attr ( split( /\s+/, $self->{options}{macro} ) ) { + $self->register_macro($attr); + } + $self->{options}{style} =~ s/^\s*//; + foreach my $attr ( split( /\s+/, $self->{options}{style} ) ) { + $self->register_attributelist($attr); + } + +} + +sub register_attributelist { + my $self = shift; + my $list = shift; + my $type = shift || 'style'; + $list =~ s/^\[//; + $list =~ s/\]$//; + $list =~ s/\s+//; + $list = "," . $list . ","; + $list =~ m/^,([-+]?)([^,]*)/; + my $command = $2; + $self->{translate}->{$type}->{$command} = $list; + print STDERR "Definition: $type $command: $list\n" if $debug{definitions}; +} + +sub register_macro { + my $self = shift; + my $text = shift; + die wrap_mod( "po4a::asciidoc", dgettext( "po4a", "Unable to parse macro definition: %s" ), $text ) + unless $text =~ m/^(\+?)([\w\d][\w\d-]*?)(_?)\[(.*)\]$/; + my $macroplus = $1; + my $macroname = $2; + my $macrotarget = $3; + my $macroparam = $macroname . "," . $4; + $self->register_attributelist( $macroparam, 'macro' ); + + if ( $macrotarget eq '_' ) { + $self->{translate}->{macro}->{$macroname} .= '_'; + } + if ( $macroplus eq '+' ) { + $self->{translate}->{macro}->{$macroname} =~ s/^,/,+/; + } +} + +sub is_translated_target { + my $self = shift; + my $macroname = shift; + return defined( $self->{translate}->{macro}->{$macroname} ) + && $self->{translate}->{macro}->{$macroname} =~ m/_$/; +} + +sub is_unsplitted_attributelist { + my $self = shift; + my $name = shift; + my $type = shift; + return defined( $self->{translate}->{$type}->{$name} ) + && $self->{translate}->{$type}->{$name} =~ m/^,\+/; +} + +sub process_definition { + my $self = shift; + my $command = shift; + if ( $command =~ m/^po4a: macro\s+(.*\[.*\])\s*$/ ) { + $self->register_macro($1); + } elsif ( $command =~ m/^po4a: style\s*(\[.*\])\s*$/ ) { + $self->register_attributelist($1); + } elsif ( $command =~ m/^po4a: entry\s+(.+?)\s*$/ ) { + $self->{translate}->{entry}->{$1} = 1; + } +} + +sub parse_definition_file { + my $self = shift; + my $filename = shift; + if ( !open( IN, "<", $filename ) ) { + die wrap_mod( "po4a::asciidoc", dgettext( "po4a", "Cannot open %s: %s" ), $filename, $! ); + } + while (<IN>) { + chomp; + process_definition( $self, $_ ); + } + close IN; +} + +my $RE_SECTION_TEMPLATES = "sect1|sect2|sect3|sect4|preface|colophon|dedication|synopsis|index"; +my $RE_STYLE_ADMONITION = "TIP|NOTE|IMPORTANT|WARNING|CAUTION"; +my $RE_STYLE_PARAGRAPH = + "normal|literal|verse|quote|listing|abstract|partintro|comment|example|sidebar|source|music|latex|graphviz"; +my $RE_STYLE_NUMBERING = "arabic|loweralpha|upperalpha|lowerroman|upperroman"; +my $RE_STYLE_LIST = "appendix|horizontal|qanda|glossary|bibliography"; +my $RE_STYLES = + "$RE_SECTION_TEMPLATES|$RE_STYLE_ADMONITION|$RE_STYLE_PARAGRAPH|$RE_STYLE_NUMBERING|$RE_STYLE_LIST|float"; + +BEGIN { + my $UnicodeGCString_available = 0; + $UnicodeGCString_available = 1 if ( eval { require Unicode::GCString } ); + eval { + + sub chars($$$) { + my $text = shift; + my $encoder = shift; + $text = $encoder->decode($text) if ( defined($encoder) && $encoder->name ne "ascii" ); + if ($UnicodeGCString_available) { + return Unicode::GCString->new($text)->chars(); + } else { + $text =~ s/\n$//s; + return length($text) if !( defined($encoder) && $encoder->name ne "ascii" ); + eval { require Unicode::GCString }; + die wrap_mod( + "po4a::asciidoc", + dgettext( + "po4a", + "Detection of two line titles failed at %s\nPlease install the Unicode::GCString module (error: %s)." + ), + shift, $@ + ); + } + } + }; +} + +sub translate { + my ( $self, $str, $ref, $type ) = @_; + my (%options) = @_; + if ( $options{'wrap'} == 1 ) { + if ($str =~ / \+\n/) { + $options{'wrap'} = 0; + $str =~ s/([^+])\n/$1 /g; + $str =~ s/ \+\n/\n/g; + $str = $self->SUPER::translate( $str, $ref, $type, %options); + $str =~ s/\n/ +\n/g; + $options{'wrap'} = 1; + } else { + if ( $self->{options}{'cleanspaces'} == 1 ) { + $str =~ s/[ \n]+/ /g; + } + $str = $self->SUPER::translate( $str, $ref, $type, %options); + } + } else { + $str = $self->SUPER::translate( $str, $ref, $type, %options ); + } + return $str; +} + +sub parse { + my $self = shift; + my ( $line, $ref ) = $self->shiftline(); + + # Handle the YAML Front Matter, if any + if ( defined($line) && $line =~ /^---$/ ) { + my $yfm; + my ( $nextline, $nextref ) = $self->shiftline(); + while ( defined($nextline) ) { + last if ( $nextline =~ /^(---|\.\.\.)$/ ); + $yfm .= $nextline; + ( $nextline, $nextref ) = $self->shiftline(); + } + die "Could not get the YAML Front Matter from the file." if ( length($yfm) == 0 ); + my $yamlarray = YAML::Tiny->read_string($yfm) + || die "Couldn't read YAML Front Matter ($!)\n$yfm\n"; + + $self->handle_yaml( 1, $ref, $yamlarray, \%yfm_keys, $yfm_skip_array, \%yfm_paths ); + $self->pushline("---\n"); + + ( $line, $ref ) = $self->shiftline(); # Pass the final '---' + } + + my $paragraph = ""; + my $wrapped_mode = 1; + my $file = $ref; + $file =~ s/:[0-9]+$// if defined($line); + + while ( defined($line) ) { + $ref =~ m/^(.*):[0-9]+$/; + if ( $1 ne $file ) { + $file = $1; + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = 1; + } + + chomp($line); + print STDERR "Seen $ref $line\n" + if ( $debug{parse} ); + $self->{ref} = "$ref"; + if ( ( defined $self->{verbatim} ) and ( $self->{verbatim} == 3 ) ) { + + # Untranslated blocks + $self->pushline( $line . "\n" ); + if ( $line =~ m/^~{4,}$/ ) { + undef $self->{verbatim}; + undef $self->{type}; + $wrapped_mode = 1; + } + } elsif ( ( defined $self->{verbatim} ) and ( $self->{verbatim} == 2 ) ) { + + # CommentBlock + if ( $line =~ m/^\/{4,}$/ ) { + undef $self->{verbatim}; + undef $self->{type}; + $wrapped_mode = 1; + } else { + push @comments, $line; + } + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = 1 unless defined( $self->{verbatim} ); + $self->pushline( $line . "\n" ); + } elsif ( ( defined $self->{type} ) + and ( $self->{type} eq "Table" ) + and ( $line !~ m/^\|===/ ) + and ( $self->{options}{"tablecells"} ) + and ( not defined $self->{disabletablecells} ) ) + { + # inside a table, and we should split per cell + my $new_line = ""; + my @texts = split /(?:(?:\d+|\d*(?:\.\d+)?)(?:\+|\*))?[<^>]?(?:\.[<^>])?[demshalv]?\|/, $line; + my @seps = ($line) =~ m/(?:(?:\d+|\d*(?:\.\d+)?)(?:\+|\*))?[<^>]?(?:\.[<^>])?[demshalv]?\|/g; + if ( ( scalar(@texts) and length( $texts[0] ) ) || ( !length($line) ) ) { + if ( !length($line) ) { $texts[0] = ""; } + if ( length($paragraph) ) { + + # if we are in a continuation line + $paragraph .= "\n" . $texts[0]; + } else { + $paragraph = $texts[0]; + $self->pushline("\n"); + } + } elsif ( length($paragraph) ) { + $new_line = "\n"; + } + + shift @texts; + my @parts = map { ( $_, shift @texts ) } @seps; + foreach my $part (@parts) { + if ( not defined $part ) { + + # allows concatenation and will be stripped anyway + $part = " "; + } + if ( $part =~ /\|$/ ) { + + # this is a cell separator. End the previous cell + do_stripped_unwrapped_paragraph( $self, $paragraph, $wrapped_mode ); + if ( $new_line eq "\n" ) { + $self->pushline("\n"); + $new_line = ""; + } + $paragraph = ""; + $self->pushline($part); + } else { + + # this is content. Append it. + $paragraph .= $part; + } + } + + } elsif ( ( not defined( $self->{verbatim} ) ) and ( $line =~ m/^(\+|--)$/ ) ) { + + # List Item Continuation or List Block + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = 1 unless defined( $self->{verbatim} ); + $self->pushline( $line . "\n" ); + + # TODO: add support for Open blocks + } elsif ( + ( not defined( $self->{verbatim} ) ) + and ( $line =~ m/^(={2,}|-{2,}|~{2,}|\^{2,}|\+{2,})$/ ) + and ( defined($paragraph) ) + and ( $paragraph =~ m/^[^\n]*\n$/s ) + and + + # subtract one because chars includes the newline on the paragraph + ( abs( ( chars( $paragraph, $self->{TT}{po_in}{encoder}, $ref ) - 1 ) - length($line) ) < 3 ) + ) + { + # Found title + + $wrapped_mode = 0; + my $level = $line; + $level =~ s/^(.).*$/$1/; + $paragraph =~ s/\n$//s; + + warn wrap_mod( + "$ref", + dgettext( + "po4a", + "'%s' seems to be a two-lines title underlined with '%s', but the underlines are too short or too long compared to the title length. " + . "You may want to fix your master document." + ), + $paragraph, + $level + ) + if ( ( chars( $paragraph, $self->{TT}{po_in}{encoder}, $ref ) != length($line) ) + && ( !$self->{options}{'nolinting'} ) ); + + my $t = $self->translate( + $paragraph, + $self->{ref}, + "Title $level", + "comment" => join( "\n", @comments ), + "wrap" => 0 + ); + $self->pushline( $t . "\n" ); + $paragraph = ""; + @comments = (); + $wrapped_mode = 1; + $self->pushline( ( $level x ( chars( $t, $self->{TT}{po_in}{encoder}, $ref ) ) ) . "\n" ); + } elsif ( $line =~ m/^(={1,5})( +)(.*?)( +\1)?$/ ) { + my $titlelevel1 = $1; + my $titlespaces = $2; + my $title = $3; + my $titlelevel2 = $4 || ""; + + # Found one line title + do_paragraph( $self, $paragraph, $wrapped_mode ); + $wrapped_mode = 0; + $paragraph = ""; + $self->pushline( $titlelevel1 . $titlespaces); + $title = $self->translate_indexterms($title); + my $t = $self->translate( + $title, + $self->{ref}, + "Title $titlelevel1", + "comment" => join( "\n", @comments ), + "wrap" => 0 + ); + $self->pushline( $t . $titlelevel2 . "\n" ); + @comments = (); + $wrapped_mode = 1; + } elsif ( ( $line =~ m/^(\/{4,}|\+{4,}|-{4,}|\.{4,}|\*{4,}|_{4,}|={4,}|~{4,})$/ ) + and ( !defined( $self->{type} ) or ( defined( $self->{type} ) and ( $self->{type} !~ /^Table/i ) ) ) ) + { + # Found one delimited block + my $t = $line; + $t =~ s/^(.).*$/$1/; + my $l = length $line; + my $type = "delimited block $t"; + $type = "$type $l" if ( $self->{options}{'compat'} eq 'asciidoctor' ); + if ( defined $self->{verbatim} and ( $self->{type} ne $type ) ) { + $paragraph .= "$line\n"; + } else { + do_paragraph( $self, $paragraph, $wrapped_mode ); + if ( ( defined $self->{type} ) + and ( $self->{type} eq $type ) ) + { + undef $self->{type}; + undef $self->{verbatim}; + undef $self->{bullet}; + undef $self->{indent}; + $wrapped_mode = 1; + print STDERR "Closing $t block\n" if $debug{parse}; + } else { + print STDERR "Begining $t block\n" if $debug{parse}; + if ( $t eq "\/" ) { + + # CommentBlock, should not be treated + $self->{verbatim} = 2; + } elsif ( $t eq "+" ) { + + # PassthroughBlock + $wrapped_mode = 0; + $self->{verbatim} = 1; + } elsif ( $t eq "-" or $t eq "|" ) { + + # ListingBlock + $wrapped_mode = 0; + $self->{verbatim} = 1; + } elsif ( $t eq "." ) { + + # LiteralBlock + $wrapped_mode = 0; + $self->{verbatim} = 1; + } elsif ( $t eq "*" ) { + + # SidebarBlock + $wrapped_mode = 1; + } elsif ( $t eq "_" ) { + + # QuoteBlock + if ( ( defined $self->{type} ) + and ( $self->{type} eq "verse" ) ) + { + $wrapped_mode = 0; + $self->{verbatim} = 1; + print STDERR "QuoteBlock verse\n" if $debug{parse}; + } else { + $wrapped_mode = 1; + } + } elsif ( $t eq "=" ) { + + # ExampleBlock + $wrapped_mode = 1; + } elsif ( $t eq "~" ) { + + # Filter blocks, TBC: not translated + $wrapped_mode = 0; + $self->{verbatim} = 3; + } + $self->{type} = $type; + } + $paragraph = ""; + $self->pushline( $line . "\n" ); + } + } elsif ( ( not defined( $self->{verbatim} ) ) and ( $line =~ m/^\/\/(.*)/ ) ) { + my $comment = $1; + if ( $comment =~ m/^po4a: / ) { + + # Po4a command line + $self->process_definition($comment); + } else { + + # Comment line + push @comments, $comment; + } + do_paragraph( $self, $paragraph, $wrapped_mode ) if length($paragraph); + $paragraph = ""; + $wrapped_mode = 1; + + $self->pushline( $line . "\n" ); + } elsif ( not defined $self->{verbatim} + and ( $line =~ m/^\[\[([^\]]*)\]\]$/ ) ) + { + # Found BlockId + do_paragraph( $self, $paragraph, $wrapped_mode ); + my $block_id = $1; + $paragraph = ""; + $wrapped_mode = 1; + if ($block_id =~ m/^([^,]+),(.+)$/) { + # Found BlockId with a xlabel + my $xlabel = $2; + $block_id = $1; + $self->pushline( "[[$block_id," ); + do_paragraph( $self, $xlabel, 0); + $self->pushline( "]]\n" ); + } else { + $self->pushline( $line . "\n" ); + } + undef $self->{bullet}; + undef $self->{indent}; + } elsif ( not defined $self->{verbatim} + and ( $paragraph eq "" ) + and ( $line =~ m/^((?:$RE_STYLE_ADMONITION):\s+)(.*)$/ ) ) + { + my $type = $1; + my $text = $2; + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = $text . "\n"; + $wrapped_mode = 1; + $self->pushline($type); + undef $self->{bullet}; + undef $self->{indent}; + } elsif ( not defined $self->{verbatim} + and ( $line =~ m/^\[($RE_STYLES)\]$/ ) ) + { + my $type = $1; + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = 1; + $self->pushline( $line . "\n" ); + if ( $type eq "verse" ) { + $wrapped_mode = 0; + } + undef $self->{bullet}; + undef $self->{indent}; + } elsif ( not defined $self->{verbatim} + and ( $line =~ m/^\[[^\]]+\]$/ ) ) + { + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + my $t = $self->parse_style($line); + $self->pushline("$t\n"); + @comments = (); + $wrapped_mode = 1; + if ( $line =~ m/^\[(['"]?)(verse|quote)\1,/ ) { + $self->{type} = $2; + if ( $self->{type} eq 'verse' ) { + $wrapped_mode = 0; + } + print STDERR "Starting verse\n" if $debug{parse}; + } + if ( ( ( $line =~ m/^\[format=(['"]?)(csv|tsv|dsv)\1,/ ) || ( $line =~ m/^\[separator=[^\|]/ ) ) + && $self->{options}{'tablecells'} ) + { + warn wrap_mod( + "$ref", + dgettext( + "po4a", + "Po4a's tablecells mode only supports PSV formatted tables with '|' separators. Disabling tablecells and falling back to block mode for this table." + ) + ); + $self->{disabletablecells} = 1; + } + undef $self->{bullet}; + undef $self->{indent}; + } elsif ( not defined $self->{verbatim} + and ( $line =~ m/^(\s*)([-%~\$[*_+`'#<>[:alnum:]\\"(|\{].*?)((?::::?|;;|\?\?|:-)(?: *\\)?)$/ ) ) + { + my $indent = $1; + my $label = $2; + my $labelend = $3; + + # Found labeled list + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = 1; + $self->{bullet} = ""; + $self->{indent} = $indent; + my $t = $self->translate( + $label, + $self->{ref}, + "Labeled list", + "comment" => join( "\n", @comments ), + "wrap" => 0 + ); + $self->pushline("$indent$t$labelend\n"); + @comments = (); + } elsif ( not defined $self->{verbatim} + and ( $line =~ m/^(\s*)(\S.*?)((?::?::|;;)\s+)(.*)$/ ) ) + { + my $indent = $1; + my $label = $2; + my $labelend = $3; + my $labeltext = $4; + + # Found Horizontal Labeled Lists + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = $labeltext . "\n"; + $wrapped_mode = 1; + $self->{bullet} = ""; + $self->{indent} = $indent; + my $t = $self->translate( + $label, + $self->{ref}, + "Labeled list", + "comment" => join( "\n", @comments ), + "wrap" => 0 + ); + $self->pushline("$indent$t$labelend"); + @comments = (); + } elsif ( not defined $self->{verbatim} + and ( $line =~ m/^\:(\S.*?)(:\s*)(.*)$/ ) ) + { + my $attrname = $1; + my $attrsep = $2; + my $attrvalue = $3; + my $linebreak = ""; + while ( $attrvalue =~ s/ ([\\+])$//s ) { + $linebreak = $1; + # add a carriage return at the end of attrvalue if there is none + $attrvalue .= "\n" if $attrvalue !~ m/\n$/; + ( $line, $ref ) = $self->shiftline(); + $ref =~ m/^(.*):[0-9]+$/; + $line =~ s/^\s+|\s+$//; + print STDERR "appending attribute with $line" if $debug{parse}; + $attrvalue .= $line; + } + print STDERR "attr definition: $attrvalue\n" if $debug{parse}; + # Found an Attribute entry + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = 1; + undef $self->{bullet}; + undef $self->{indent}; + if ( defined( $self->{translate}->{entry}->{$attrname} ) ) { + my $t = $self->translate( + $attrvalue, + $self->{ref}, + "Attribute :$attrname:", + "comment" => join( "\n", @comments ), + "wrap" => 1 + ); + $t =~ s/\n/ \\\n/g; + $self->pushline(":$attrname$attrsep$t\n"); + } else { + $attrvalue =~ s/\n/ $linebreak\n/g; + $self->pushline(":$attrname$attrsep$attrvalue\n"); + } + @comments = (); + } elsif ( not defined $self->{verbatim} + and ( $line =~ m/^([\w\d][\w\d-]*)(::)(\S*|\S*\{.*\}\S*)\[(.*)\]$/ ) ) + { + my $macroname = $1; + my $macrotype = $2; + my $macrotarget = $3; + my $macroparam = $4; + + # Found a macro + # print STDERR "macro: $macroname|type: $macrotype|target: $macrotarget|param: $macroparam\n"; + + # Don't process include macros in tables, pass them through + if ( ( $macroname eq "include" ) + and ( $macrotype eq '::' ) + and ( defined( $self->{type} ) and ( $self->{type} eq "Table" ) ) ) + { + $paragraph .= $line . "\n"; + } elsif ( ( $macroname eq "include" || $macroname eq "ifeval" ) + and ( $macrotype eq '::' ) ) + { + $self->pushline( $line . "\n" ); + } else { + if ( $macrotype eq '::' ) { + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = 1; + undef $self->{bullet}; + undef $self->{indent}; + } + my $t = $self->parse_macro( $macroname, $macrotype, $macrotarget, $macroparam ); + $self->pushline("$t\n"); + @comments = (); + } + } elsif ( not defined $self->{verbatim} + and ($paragraph =~ m/^\s*$/) + and ( $line !~ m/^\.\./ ) + and ( $line =~ m/^\.(\S.*)$/ ) ) + { + my $title = $1; + + # Found block title + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = 1; + undef $self->{bullet}; + undef $self->{indent}; + my $t = $self->translate( + $title, + $self->{ref}, + "Block title", + "comment" => join( "\n", @comments ), + "wrap" => 0 + ); + $self->pushline(".$t\n"); + @comments = (); + } elsif ( not defined $self->{verbatim} + and ( $line =~ m/^(\s*)((?:(?:[-*o+\.]+(?:\s+\[[ xX\*]\])?)|(?:[0-9]+[.\)])|(?:[a-z][.\)])|\([0-9]+\))\s+)(.*)$/ ) ) + { + my $indent = $1 || ""; + my $bullet = $2; + my $text = $3; + print STDERR "Item (bullet: '$bullet')\n" if ( $debug{parse} ); + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = $text . "\n"; + $self->{indent} = $indent; + $self->{bullet} = $bullet; + } elsif ( not defined $self->{verbatim} + and ( $line =~ m/^((?:<?(?:[0-9]|\.)+)?> +)(.*)$/ ) ) + { + my $bullet = $1; + my $text = $2; + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = $text . "\n"; + $self->{indent} = ""; + $self->{bullet} = $bullet; + } elsif ( not defined $self->{verbatim} + and ( $line eq " +") ) { + $paragraph .= $line . "\n"; + } elsif ( ( $line =~ /^\s*$/ ) and ( !defined( $self->{type} ) or ( $self->{type} ne "Table" ) ) ) { + + # When not in table, empty lines or lines containing only spaces do break paragraphs + print STDERR "Empty new line. Wrap: " . ( defined( $self->{verbatim} ) ? "yes. " : "no. " ) . "\n" + if $debug{parse}; + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = 1 unless defined( $self->{verbatim} ); + $self->pushline( $line . "\n" ); + + } elsif ( ( $line =~ /^\s*$/ ) ) { + + # When in table, empty lines are either added to the current paragraph if it not empty, or pushed verbatim if not + if ( length $paragraph ) { + $paragraph .= $line . "\n"; + } else { + $self->pushline( $line . "\n" ); + } + + # print STDERR ">>$paragraph<<\n"; + } elsif ( not defined $self->{verbatim} + and ( defined $self->{bullet} and $line =~ m/^(\s+)(.*)$/ ) ) + { + my $indent = $1; + my $text = $2; + print STDERR "bullet (" . ( $self->{bullet} ) . ") starting with " . length($indent) . " spaces\n" + if $debug{'parse'}; + if ( $paragraph eq "" && length( $self->{bullet} ) && length($indent) ) { + + # starting a paragraph with a bullet (not an enum or so), and indented. + # Thus a literal paragraph in a list. + $wrapped_mode = 0; + } + if ( not defined $self->{indent} ) { + + # No indent level before => Starting a paragraph? + $paragraph .= $text . "\n"; + $self->{indent} = $indent; + print STDERR "Starting a paragraph\n" if ( $debug{parse} ); + } elsif ( length($paragraph) + and ( length( $self->{bullet} ) + length( $self->{indent} ) == length($indent) ) ) + { + # same indent level as before: append + $paragraph .= $text . "\n"; + } elsif ( length($paragraph) + and ( length( $self->{bullet} ) == 0 ) ) + { + # definition list continuation + $paragraph .= $text . "\n"; + $self->{indent} = ""; + print STDERR " definition list continuation\n" if ( $debug{parse} ); + } else { + + # not the same indent level: start a new translated paragraph + print STDERR "New paragraph (indent: '" . ( $self->{indent} ) . "')\n" if ( $debug{parse} ); + do_paragraph( $self, $paragraph, $wrapped_mode ); + if ( length( $self->{indent} ) > 0 && length( $self->{indent} ) < length($indent) ) { + + # increase indentation: the new block must not be wrapped + $wrapped_mode = 0; + } + $paragraph = $text . "\n"; + $self->{indent} = $indent; + $self->{bullet} = ""; + } + } elsif ( $line =~ /^-- $/ ) { + + # Break paragraphs on email signature hint + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = 1; + $self->pushline( $line . "\n" ); + } elsif ( $line =~ /^=+$/ + or $line =~ /^_+$/ + or $line =~ /^-+$/ ) + { + $wrapped_mode = 0; + $paragraph .= $line . "\n"; + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = 1; + } elsif ( $paragraph ne "" + && $self->{bullet} + && length( $self->{indent} || "" ) == 0 + && ( $line =~ m/^(\s*)((?:[-*o+]+|([0-9]+[.\)])|\([0-9]+\))\s+)/s ) ) + { + # If the next line starts with a bullet, process this immediately and setup the next line + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = 0; + $self->unshiftline( $line, $ref ); + $line = ""; + undef $self->{bullet}; + undef $self->{indent}; + } elsif ( $line =~ /^\|===/ ) { + + # This is a table, treat it as a non-wrapped paragraph + print STDERR "Found Table delimiter\n" if ( $debug{parse} ); + if ( ( $paragraph eq "" ) or ( defined( $self->{type} ) and ( $self->{type} =~ /^delimited block/i ) ) ) { + + # Start the table + $wrapped_mode = 0; + $self->{type} = "Table"; + } else { + + # End the Table + if ( $self->{options}{'tablecells'} + and not defined $self->{disabletablecells} ) + { + do_stripped_unwrapped_paragraph( $self, $paragraph, $wrapped_mode ); + $self->pushline("\n"); + } else { + do_paragraph( $self, $paragraph, $wrapped_mode ); + } + undef $self->{verbatim}; + undef $self->{type}; + undef $self->{disabletablecells}; + $paragraph = ""; + } + $self->pushline( $line . "\n" ); + } else { + + # A stupid paragraph of text + print STDERR "Regular line. " + . "Bullet: '" + . ( defined( $self->{bullet} ) ? $self->{bullet} : 'none' ) . "'; " + . "Indent: '" + . ( defined( $self->{indent} ) ? $self->{indent} : 'none' ) . "'\n" + if ( $debug{parse} ); + + if ( $line =~ /^\s/ ) { + + # A line starting by a space indicates a non-wrap + # paragraph + $wrapped_mode = 0; + } + + if ( ( $paragraph ne "" && $self->{bullet} && length( $self->{indent} || "" ) == 0 ) ) + { + if ( ( !$self->{options}{'nolinting'} ) && ($paragraph !~ m/ \+\n/ ) ) { + # Second line of an item block is not indented. It is unindented + # (and allowed) additional text or a new list item. + warn wrap_mod( + "$ref", + dgettext( + "po4a", + "It seems that you are adding unindented content to an item. " + . "The standard allows this, but you may still want to change your document " + . "to use indented text to provide better visual clues to writers." + ) + ); + } + } else { + undef $self->{bullet}; + undef $self->{indent}; + } + + # TODO: comments + $paragraph .= $line . "\n"; + } + + ( $line, $ref ) = $self->shiftline(); + } + if ( length $paragraph ) { + do_paragraph( $self, $paragraph, $wrapped_mode ); + } +} + +sub do_stripped_unwrapped_paragraph { + my ( $self, $paragraph, $wrap ) = ( shift, shift, shift ); + my $type = shift || $self->{type} || "Plain text"; + my ( $pre, $trans, $post ) = $paragraph =~ /^(\s*)(.*?)(\s*)$/s; + $self->pushline($pre); + do_paragraph( $self, $trans, $wrap, $type ); + $self->pushline($post); +} + +sub do_paragraph { + my ( $self, $paragraph, $wrap ) = ( shift, shift, shift ); + my $type = shift || $self->{type} || "Plain text"; + return if ( $paragraph eq "" ); + + # DEBUG + # my $b; + # if (defined $self->{bullet}) { + # $b = $self->{bullet}; + # } else { + # $b = "UNDEF"; + # } + # $type .= " verbatim: '".($self->{verbatim}||"NONE")."' bullet: '$b' indent: '".($self->{indent}||"NONE")."' type: '".($self->{type}||"NONE")."'"; + + if ( not defined $self->{verbatim} ) { + + # Detect bullets + # | * blah blah + # |<spaces> blah + # | ^-- aligned + # <empty line> + # + # Other bullets supported: + # - blah o blah + blah + # 1. blah 1) blah (1) blah + TEST_BULLET: + if ( $paragraph =~ m/^(\s*)((?:(?:[-*o+](?:\s+\[[ Xx\*]\])?)|([0-9]+[.\)])|\([0-9]+\))\s+)([^\n]*\n)(.*)$/s ) { + my $para = $5; + my $bullet = $2; + my $indent1 = $1; + my $indent2 = "$1" . ( ' ' x length $bullet ); + my $text = $4; + while ( $para !~ m/$indent2(?:(?:[-*o+](?:\\s+[[ Xx\*]\])?)|([0-9]+[.\)])|\([0-9]+\))\s+/ + and $para =~ s/^$indent2(\S[^\n]*\n)//s ) + { + $text .= $1; + } + + # TODO: detect if a line starts with the same bullet + if ( $text !~ m/\S[ \t][ \t][ \t]+\S/s ) { + my $bullet_regex = quotemeta( $indent1 . $bullet ); + $bullet_regex =~ s/[0-9]+/\\d\+/; + if ( $para eq '' or $para =~ m/^$bullet_regex\S/s ) { + my $trans = $self->translate( + $text, + $self->{ref}, + "Bullet: '$indent1$bullet'", + "wrap" => 1, + "wrapcol" => -( length $indent2 ) + ); + $trans =~ s/^/$indent1$bullet/s; + $trans =~ s/\n(.)/\n$indent2$1/sg; + $self->pushline( $trans . "\n" ); + if ( $para eq '' ) { + return; + } else { + + # Another bullet + $paragraph = $para; + goto TEST_BULLET; + } + } + } + } + } + + my $end = ""; + if ($wrap) { + $paragraph =~ s/(\n*)$//s; + $end = $1 || ""; + } + if ( defined $self->{bullet} ) { + my $bullet = $self->{bullet}; + my $indent1 = $self->{indent}; + $self->pushline($indent1 . $bullet); + } + $paragraph = $self->translate_indexterms($paragraph); + + my $t = $self->translate( + $paragraph, + $self->{ref}, + $type, + "comment" => join( "\n", @comments ), + "wrap" => $wrap + ); + my $bullet = $self->{bullet} || ""; + # print STDERR "translated: '$t', $bullet\n"; + + my $unwrap_result = !$self->{options}{'forcewrap'} && $wrap && (! ($t =~ /\+\n/) ) ; + if ($unwrap_result) { + $t =~ s/[\n ]+/ /g; + } + + @comments = (); + if ( ( defined $self->{bullet} ) && !($t =~ /\+\n/ ) ) { + my $bullet = $self->{bullet}; + my $indent1 = $self->{indent}; + my $indent2 = $indent1 . ( ' ' x length($bullet) ); + $t =~ s/\n(.)/\n$indent2$1/sg; + } + $self->pushline( $t . $end ); +} + +sub translate_indexterms { + my ($self, $paragraph) = @_; + $paragraph = $self->translate_in_regex($paragraph, qr/\(\(\(([^\)]+)\)\)\)/); + return $self->translate_in_regex($paragraph, qr/indexterm:\[([^\]]+)\]/); +} + +sub translate_in_regex { + # Detect index entries and translate them separately. + # They are moved in front of the paragraph, regardless of their original location, + # but that's consistant with the specification. + my ($self, $paragraph, $pattern) = @_; + if ( my @indexes = ($paragraph =~ m/$pattern/g ) ) { + for my $index (@indexes) { + my @terms = (); + while ( + $index =~ m/\G( + "(?:[^"\\])+" # quoted term + | (?:[^,\\])+ # unquoted term + )(,\s*+)?/gx + ) + { + my $term = $1; + if ( $term =~ /^"(.*)"$/ ) { + push @terms, '"' . ($self->translate( + $1, + $self->{ref}, + "Index entry", + "wrap" => 1, + "wrapcol" => 0)) . '"'; + } else { + push @terms, $self->translate( + $term, + $self->{ref}, + "Index entry", + "wrap" => 1, + "wrapcol" => 0); + } + } + $self->pushline("(((" . join (",", @terms) . ")))"); + + } + } + $paragraph =~ s/$pattern\n?//g; + return $paragraph; +} + +sub parse_style { + my ( $self, $text ) = ( shift, shift ); + $text =~ s/^\[//; + $text =~ s/\]$//; + $text =~ m/^([^=,]+)/; + if ( defined($1) && $self->is_unsplitted_attributelist( $1, 'style' ) ) { + my $t = $self->translate( + $text, + $self->{ref}, + "Unsplitted AttributeList", + "comment" => join( "\n", @comments ), + "wrap" => 0 + ); + return "[$t]"; + } + my @attributes = $self->split_attributelist($text); + return "[" . join( ", ", $self->join_attributelist( "style", @attributes ) ) . "]"; +} + +sub parse_macro { + my ( $self, $macroname, $macrotype, $macrotarget, $macroparam ) = ( shift, shift, shift, shift, shift ); + if ( $self->is_unsplitted_attributelist( $macroname, 'macro' ) ) { + my $t = $self->translate( + "$macroname$macrotype$macrotarget\[$macroparam\]", + $self->{ref}, + "Unsplitted macro call", + "comment" => join( "\n", @comments ), + "wrap" => 0 + ); + return $t; + } + my @attributes = (); + @attributes = $self->split_attributelist($macroparam) unless $macroparam eq ""; + + unshift @attributes, $macroname; + my @translated_attributes = $self->join_attributelist( "macro", @attributes ); + shift @translated_attributes; + if ( $self->is_translated_target($macroname) ) { + my $target = unquote_space($macrotarget); + my $t = $self->translate( + $target, + $self->{ref}, + "Target for macro $macroname", + "comment" => join( "\n", @comments ), + "wrap" => 0 + ); + $macrotarget = quote_space($t); + } + return "$macroname$macrotype$macrotarget\[" . join( ", ", @translated_attributes ) . "]"; +} + +sub split_attributelist { + my ( $self, $text ) = ( shift, shift ); + + print STDERR "Splitting attributes in: $text\n" if $debug{split_attributelist}; + my @attributes = (); + while ( + $text =~ m/\G( + [^\W\d][-\w]*="(?:[^"\\]++|\\.)*+" # named attribute + | [^\W\d][-\w]*=None # undefined named attribute + | [^\W\d][-\w]*=\S+ # invalid, but accept it anyway + | "(?:[^"\\]++|\\.)*+" # quoted attribute + | (?:[^,\\]++|\\.)++ # unquoted attribute + | ^$ # Empty attribute list allowed + + )(?:,\s*+)?/gx + ) + { + print STDERR " -> $1\n" if $debug{split_attributelist}; + push @attributes, $1; + } + return @attributes; +} + +sub join_attributelist { + my ( $self, $type ) = ( shift, shift ); + my @attributes = @_; + my $command = shift(@attributes); + my $position; + if ( $type eq 'macro' ) { + $position = 0; # macroname is passed through the first attribute + } else { + $position = 1; + } + my @text = ($command); + if ( $command =~ m/=/ ) { + my $attr = $command; + $command =~ s/=.*//; + @text = (); + push @text, $self->translate_attributelist( $type, $command, $position, $attr ); + } + foreach my $attr (@attributes) { + $position++; + push @text, $self->translate_attributelist( $type, $command, $position, $attr ); + } + print STDERR "Joined attributes: " . join( ", ", @text ) . "\n" if $debug{join_attributelist}; + return @text; +} + +sub translate_attributelist { + my ( $self, $type, $command, $count, $attr ) = ( shift, shift, shift, shift, shift ); + return $attr unless defined $self->{translate}->{$type}->{$command}; + if ( $attr =~ m/^([^\W\d][-\w]*)=(.*)/ ) { + my $attrname = $1; + my $attrvalue = $2; + if ( $self->{translate}->{$type}->{$command} =~ m/,$attrname,/ ) { + my $value = unquote($attrvalue); + my $t = $self->translate( + $value, + $self->{ref}, + "Named '$attrname' AttributeList argument for $type '$command'", + "comment" => join( "\n", @comments ), + "wrap" => 0 + ); + if ( $attrvalue eq 'None' && $t eq 'None' ) { + $attr = $attrname . "=None"; + } else { + $attr = $attrname . "=" . quote($t); + } + } + } else { + if ( $self->{translate}->{$type}->{$command} =~ m/,$count,/ ) { + my $attrvalue = unquote($attr); + my $t = $self->translate( + $attrvalue, + $self->{ref}, + "Positional (\$$count) AttributeList argument for $type '$command'", + "comment" => join( "\n", @comments ), + "wrap" => 0 + ); + $attr = quote($t); + } + } + return $attr; +} + +sub unquote { + my ($text) = shift; + return $text unless $text =~ s/^"(.*)"$/$1/; + $text =~ s/\\"/"/g; + return $text; +} + +sub quote { + my $text = shift; + $text =~ s/"/\\"/g; + return '"' . $text . '"'; +} + +sub quote_space { + my $text = shift; + $text =~ s/ /%20/g; + return $text; +} + +sub unquote_space { + my $text = shift; + $text =~ s/%20/ /g; + return $text; +} + +=head1 STATUS OF THIS MODULE + +Tested successfully on simple AsciiDoc files. + +=head1 AUTHORS + + Nicolas François <nicolas.francois@centraliens.net> + Denis Barbier <barbier@linuxfr.org> + +=head1 COPYRIGHT AND LICENSE + + Copyright © 2005-2008 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>. + Copyright © 2012 Denis BARBIER <barbier@linuxfr.org>. + Copyright © 2017 Martin Quinson <mquinson#debian.org>. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut + +1; + +__END__ + +# LocalWords: Charset charset AsciiDoc tablecells po UTF gettext msgid nostrip diff --git a/lib/Locale/Po4a/BibTeX.pm b/lib/Locale/Po4a/BibTeX.pm new file mode 100644 index 0000000..d4e73be --- /dev/null +++ b/lib/Locale/Po4a/BibTeX.pm @@ -0,0 +1,146 @@ +#!/usr/bin/perl -w + +# Po4a::BibTeX.pm +# +# extract and translate translatable strings from BibTeX documents +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::BibTeX - convert BibTeX documents from/to PO files + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::BibTeX is a module to help the translation of +bibliographies in the BibTeX format into other [human] languages. + +Fields values are extracted and proposed for translation. + +=head1 OPTIONS ACCEPTED BY THIS MODULE + +NONE. + +=head1 STATUS OF THIS MODULE + +It is a very simple module, but still young. + +=cut + +package Locale::Po4a::BibTeX; + +use 5.16.0; +use strict; +use warnings; + +require Exporter; +use vars qw(@ISA @EXPORT); +@ISA = qw(Locale::Po4a::TransTractor); +@EXPORT = qw(); + +use Locale::Po4a::TransTractor; +use Locale::Po4a::Common; + +sub initialize { } + +sub parse { + my $self = shift; + my ( $line, $ref ); + my $paragraph = ""; + my $field = ""; + my $id = ""; + my $wrapped_mode = 1; + ( $line, $ref ) = $self->shiftline(); + while ( defined($line) ) { + chomp($line); + + #print "tutu: '$line'\n"; + $self->{ref} = "$ref"; + if ( $id eq "" + and $line =~ m/^\@.*?\s*\{\s*(.*),\s*$/ ) + { + $id = $1; + $self->pushline( $line . "\n" ); + } elsif ( $id ne "" + and $field eq "" + and $line =~ m/^((.*?)\s*=\s*)([^ "{].*?)(\s*,?\s*)$/ ) + { + my $end = ( defined $4 ) ? $4 : ""; + $self->pushline( $1 . $self->translate( $3, $self->{ref}, "$2 ($id)", "wrap" => 1 ) . $end . "\n" ); + $field = ""; + $paragraph = ""; + } elsif ( $id ne "" + and $field eq "" + and $line =~ m/^((.*?)\s*=\s*)(.*)$/ ) + { + $field = $2; + $paragraph = $3 . "\n"; + $self->pushline($1); + } elsif ( $field ne "" ) { + $paragraph .= "$line\n"; + } elsif ( $line =~ m/^\s*(\%.*)?$/ ) { + $self->pushline( $line . "\n" ); + } elsif ( $line =~ m/^\s*\}\s*$/ ) { + $self->pushline( $line . "\n" ); + $id = ""; + } else { + print "unsupported line: '$line'\n"; + } + if ( $paragraph =~ m/^(\s*\{)(.*)(\}\s*,?\s*)$/s + or $paragraph =~ m/^(\s*")(.*)("\s*,?\s*)$/s + or $paragraph =~ m/^(\s*)([^ "{].*)(\s*,?\s*)$/s ) + { + $self->pushline( $1 . $self->translate( $2, $self->{ref}, "$field ($id)", "wrap" => 1 ) . $3 ); + $field = ""; + $paragraph = ""; + } + ( $line, $ref ) = $self->shiftline(); + } + if ( $paragraph =~ m/^(\s*\{)(.*)(\}\s*,?\s*)$/s + or $paragraph =~ m/^(\s*")(.*)("\s*,?\s*)$/s + or $paragraph =~ m/^(\s*)(.*)(\s*,?\s*)$/s ) + { + $self->pushline( $self->translate( $1, $self->{ref}, "$field ($id)", "wrap" => 1 ) . $2 ); + $field = ""; + $paragraph = ""; + } +} + +sub do_paragraph { + my ( $self, $paragraph, $wrap ) = ( shift, shift, shift ); + $self->pushline( $self->translate( $paragraph, $self->{ref}, "Plain text", "wrap" => $wrap ) ); +} + +1; + +=head1 AUTHORS + + Nicolas François <nicolas.francois@centraliens.net> + +=head1 COPYRIGHT AND LICENSE + + Copyright © 2006 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). diff --git a/lib/Locale/Po4a/Chooser.pm b/lib/Locale/Po4a/Chooser.pm new file mode 100644 index 0000000..fa96084 --- /dev/null +++ b/lib/Locale/Po4a/Chooser.pm @@ -0,0 +1,178 @@ +# Locale::Po4a::Chooser -- Manage po4a modules +# +# This program is free software; you may redistribute it and/or modify it +# under the terms of GPL v2.0 or later (see COPYING). +# +# This module converts POD to PO file, so that it becomes possible to +# translate POD formatted documentation. See gettext documentation for +# more info about PO files. + +############################################################################ +# Modules and declarations +############################################################################ + +package Locale::Po4a::Chooser; + +use 5.16.0; +use strict; +use warnings; +use Locale::Po4a::Common; + +sub new { + my ($module) = shift; + my (%options) = @_; + + die wrap_mod( "po4a::chooser", gettext("Please provide a module name") ) + unless defined $module; + + my $modname; + if ( $module eq 'kernelhelp' ) { + $modname = 'KernelHelp'; + } elsif ( $module eq 'newsdebian' ) { + $modname = 'NewsDebian'; + } elsif ( $module eq 'latex' ) { + $modname = 'LaTeX'; + } elsif ( $module eq 'bibtex' ) { + $modname = 'BibTex'; + } elsif ( $module eq 'tex' ) { + $modname = 'TeX'; + } elsif ( $module eq 'asciidoc' ) { + $modname = 'AsciiDoc'; + } elsif ( $module eq 'Rd' || $module eq 'rubydoc' ) { + $modname = 'RubyDoc'; + } else { + $modname = ucfirst($module); + } + if ( !UNIVERSAL::can( "Locale::Po4a::$modname", 'new' ) ) { + eval qq{use Locale::Po4a::$modname}; + if ($@) { + my $error = $@; + warn wrap_msg( gettext("Unknown format type: %s."), $module ); + warn wrap_mod( "po4a::chooser", gettext("Module loading error: %s"), $error ) + unless defined $options{'quiet'}; + list(1); + } + } + return "Locale::Po4a::$modname"->new(%options); +} + +sub list { + warn wrap_msg( + gettext("List of valid formats:") + + # ."\n - ".gettext("bibtex: BibTex bibliography format.") + . "\n - " + . gettext("asciidoc: AsciiDoc format.") + . "\n - " + . gettext("dia: uncompressed Dia diagrams.") + . "\n - " + . gettext("docbook: DocBook XML.") + . "\n - " + . gettext("guide: Gentoo Linux's XML documentation format.") + . "\n - " + . gettext("ini: INI format.") + . "\n - " + . gettext("kernelhelp: Help messages of each kernel compilation option.") + . "\n - " + . gettext("latex: LaTeX format.") + . "\n - " + . gettext("man: Good old manual page format.") + . "\n - " + . gettext("pod: Perl Online Documentation format.") + . "\n - " + . gettext("rubydoc: Ruby Documentation (RD) format.") + . "\n - " + . gettext("sgml: either DebianDoc or DocBook DTD.") + . "\n - " + . gettext("texinfo: The info page format.") + . "\n - " + . gettext("tex: generic TeX documents (see also latex).") + . "\n - " + . gettext("text: simple text document.") + . "\n - " + . gettext("wml: WML documents.") + . "\n - " + . gettext("xhtml: XHTML documents.") + . "\n - " + . gettext("xml: generic XML documents (see also docbook).") + . "\n - " + . gettext("yaml: YAML documents.") + ); + exit shift; +} +############################################################################## +# Module return value and documentation +############################################################################## + +1; +__END__ + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Chooser - manage po4a modules + +=head1 DESCRIPTION + +Locale::Po4a::Chooser is a module to manage po4a modules. Previously, all po4a +binaries used to know all po4a modules (pod, man, sgml, etc). This made the +addition of a new module boring, because you had to make sure that the documentation is synchronized +in all modules, and that each of them can access the new module. + +Now, you just have to call the Locale::Po4a::Chooser::new() function, +passing the name of module as argument. + +The function Locale::Po4a::Chooser::list() lists the available +formats, and exits with the value passed as argument. So, we call +Locale::Po4a::Chooser::list(0) when requested for the list of +formats, and Locale::Po4a::Chooser::list(1) when passed an invalid +format name. + +=head1 SEE ALSO + +=over 4 + +=item About po4a: + +L<Locale::Po4a::Po(3pm)>, +L<Locale::Po4a::TransTractor(3pm)>, +L<po4a(7)|po4a.7> + +=item About modules: + +L<Locale::Po4a::Dia(3pm)>, +L<Locale::Po4a::Docbook(3pm)>, +L<Locale::Po4a::Guide(3pm)>, +L<Locale::Po4a::Halibut(3pm)>, +L<Locale::Po4a::Ini(3pm)>, +L<Locale::Po4a::KernelHelp(3pm)>, +L<Locale::Po4a::LaTeX(3pm)>, +L<Locale::Po4a::Man(3pm)>, +L<Locale::Po4a::Pod(3pm)>, +L<Locale::Po4a::RubyDoc(3pm)>, +L<Locale::Po4a::Sgml(3pm)>, +L<Locale::Po4a::TeX(3pm)>, +L<Locale::Po4a::Texinfo(3pm)>, +L<Locale::Po4a::Text(3pm)>, +L<Locale::Po4a::Wml(3pm)>. +L<Locale::Po4a::Xhtml(3pm)>, +L<Locale::Po4a::Xml(3pm)>, +L<Locale::Po4a::Wml(3pm)>, +L<Locale::Po4a::Yaml(3pm)>. + +=back + +=head1 AUTHORS + + Denis Barbier <barbier@linuxfr.org> + Martin Quinson (mquinson#debian.org) + +=head1 COPYRIGHT AND LICENSE + +Copyright © 2002-2005, 2014, 2017 SPI, Inc. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut diff --git a/lib/Locale/Po4a/Common.pm b/lib/Locale/Po4a/Common.pm new file mode 100644 index 0000000..1b09adc --- /dev/null +++ b/lib/Locale/Po4a/Common.pm @@ -0,0 +1,252 @@ +# Locale::Po4a::Common -- Common parts of the po4a scripts and utils +# +# Copyright © 2005 Jordi Vilalta <jvprat@gmail.com> +# +# This program is free software; you may redistribute it and/or modify it +# under the terms of GPL v2.0 or later (see COPYING). +# +# This module has common utilities for the various scripts of po4a + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Common - common parts of the po4a scripts and utils + +=head1 DESCRIPTION + +Locale::Po4a::Common contains common parts of the po4a scripts and some useful +functions used along the other modules. + +If needed, you can disable the use of Text::WrapI18N as such: + + use Locale::Po4a::Common qw(nowrapi18n); + use Locale::Po4a::Text; + +instead of: + + use Locale::Po4a::Text; + +The ordering is important here: as most Locale::Po4a modules load themselves +Locale::Po4a::Common, the first time this module is loaded determines whether Text::WrapI18N is used. + +=cut + +package Locale::Po4a::Common; + +require Exporter; +use vars qw(@ISA @EXPORT); +@ISA = qw(Exporter); +@EXPORT = qw(wrap_msg wrap_mod wrap_ref_mod textdomain gettext dgettext); + +use 5.16.0; +use strict; +use warnings; + +sub import { + my $class = shift; + + my $wrapi18n = 1; + if ( exists $_[0] && defined $_[0] && $_[0] eq 'nowrapi18n' ) { + shift; + $wrapi18n = 0; + } + $class->export_to_level( 1, $class, @_ ); + + return if defined &wrapi18n; + + if ( $wrapi18n && -t STDERR && -t STDOUT && eval { require Text::WrapI18N } ) { + + # Don't bother determining the wrap column if we cannot wrap. + my $col = $ENV{COLUMNS}; + if ( !defined $col ) { + my @term = eval "use Term::ReadKey; Term::ReadKey::GetTerminalSize()"; + $col = $term[0] if ( !$@ ); + + # If GetTerminalSize() failed we will fallback to a safe default. + # This can happen if Term::ReadKey is not available + # or this is a terminal-less build or such strange condition. + } + $col = 76 if ( !defined $col ); + + eval ' use Text::WrapI18N qw($columns); + $columns = $col; + '; + + eval ' sub wrapi18n($$$) { Text::WrapI18N::wrap($_[0],$_[1],$_[2]) } '; + } else { + + # If we cannot wrap, well, that's too bad. Survive anyway. + eval ' sub wrapi18n($$$) { $_[0].$_[2] } '; + } +} + +sub min($$) { + return $_[0] < $_[1] ? $_[0] : $_[1]; +} + +=head1 FUNCTIONS + +=head2 Showing output messages + +=over + +=item + +show_version($) + +Shows the current version of the script, and a short copyright message. It +takes the name of the script as an argument. + +=cut + +sub show_version { + my $name = shift; + + print sprintf( + gettext( + "%s version %s.\n" + . "Written by Martin Quinson and Denis Barbier.\n\n" + . "Copyright © 2002-2022 Software in the Public Interest, Inc.\n" + . "This is free software; see source code for copying\n" + . "conditions. There is NO warranty; not even for\n" + . "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + ), + $name, + $Locale::Po4a::TransTractor::VERSION + ) . "\n"; +} + +=item + +wrap_msg($@) + +This function displays a message the same way as sprintf() does, but wraps +the result so that they look nice on the terminal. + +=cut + +sub wrap_msg($@) { + my $msg = shift; + my @args = @_; + + # print "'$msg' ; ".(scalar @args)." $args[0] $args[1]\n"; + return wrapi18n( "", "", sprintf( $msg, @args ) ) . "\n"; +} + +=item + +wrap_mod($$@) + +This function works like wrap_msg(), but it takes a module name as the first +argument, and leaves a space at the left of the message. + +=cut + +sub wrap_mod($$@) { + my ( $mod, $msg ) = ( shift, shift ); + my @args = @_; + + $mod .= ": "; + my $spaces = " " x min( length($mod), 15 ); + return wrapi18n( $mod, $spaces, sprintf( $msg, @args ) ) . "\n"; +} + +=item + +wrap_ref_mod($$$@) + +This function works like wrap_msg(), but it takes a file:line reference as the +first argument, a module name as the second one, and leaves a space at the left +of the message. + +=back + +=cut + +sub wrap_ref_mod($$$@) { + my ( $ref, $mod, $msg ) = ( shift, shift, shift ); + my @args = @_; + + if ( !$mod ) { + + # If we don't get a module name, show the message like wrap_mod does + return wrap_mod( $ref, $msg, @args ); + } else { + $ref .= ": "; + my $spaces = " " x min( length($ref), 15 ); + $msg = "$ref($mod)\n$msg"; + return wrapi18n( "", $spaces, sprintf( $msg, @args ) ) . "\n"; + } +} + +=head2 Wrappers for other modules + +=over + +=item + +Locale::Gettext + +When the Locale::Gettext module cannot be loaded, this module provide dummy +(empty) implementation of the following functions. In that case, po4a +messages won't get translated but the program will continue to work. + +If Locale::gettext is present, this wrapper also calls +setlocale(LC_MESSAGES, "") so callers don't depend on the POSIX module +either. + +=over + +=item + +bindtextdomain($$) + +=item + +textdomain($) + +=item + +gettext($) + +=item + +dgettext($$) + +=back + +=back + +=cut + +BEGIN { + if ( eval { require Locale::gettext } ) { + import Locale::gettext; + require POSIX; + POSIX::setlocale( &POSIX::LC_MESSAGES, '' ); + } else { + eval ' + sub bindtextdomain($$) { } + sub textdomain($) { } + sub gettext($) { shift } + sub dgettext($$) { return $_[1] } + ' + } +} + +1; +__END__ + +=head1 AUTHORS + + Jordi Vilalta <jvprat@gmail.com> + +=head1 COPYRIGHT AND LICENSE + +Copyright © 2005 SPI, Inc. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut diff --git a/lib/Locale/Po4a/Dia.pm b/lib/Locale/Po4a/Dia.pm new file mode 100644 index 0000000..2652edc --- /dev/null +++ b/lib/Locale/Po4a/Dia.pm @@ -0,0 +1,114 @@ +#!/usr/bin/perl + +# Po4a::Dia.pm +# +# extract and translate translatable strings from Dia diagrams. +# +# This code extracts plain text from string tags on uncompressed Dia +# diagrams. +# +# Copyright © 2004 Jordi Vilalta <jvprat@gmail.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Dia - convert uncompressed Dia diagrams from/to PO files + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::Dia is a module to help the translation of diagrams in the +uncompressed Dia format into other [human] languages. + +You can get Dia (the graphical editor for these diagrams) from: + http://www.gnome.org/projects/dia/ + +=head1 TRANSLATING WITH PO4A::DIA + +This module only translates uncompressed Dia diagrams. You can save your +uncompressed diagrams with Dia itself, unchecking the "Compress diagram +files" at the "Save Diagram" dialog. + +Another way is to uncompress the dia files from command line with: + gunzip < original.dia > uncompressed.dia + +=head1 STATUS OF THIS MODULE + +This module is fully functional, as it relies in the L<Locale::Po4a::Xml> +module. This only defines the translatable tags (E<lt>dia:stringE<gt>), and +filters the internal strings (the content of the E<lt>dia:diagramdataE<gt> +tag), not interesting for translation. + +=head1 SEE ALSO + +L<Locale::Po4a::TransTractor(3pm)>, L<Locale::Po4a::Xml(3pm)>, L<po4a(7)|po4a.7> + +=head1 AUTHORS + + Jordi Vilalta <jvprat@gmail.com> + +=head1 COPYRIGHT AND LICENSE + +Copyright © 2004 Jordi Vilalta <jvprat@gmail.com> + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut + +package Locale::Po4a::Dia; + +use 5.16.0; +use strict; +use warnings; + +use Locale::Po4a::Common; +use Locale::Po4a::Xml; + +use vars qw(@ISA); +@ISA = qw(Locale::Po4a::Xml); + +sub initialize { + my $self = shift; + my %options = @_; + + $self->SUPER::initialize(%options); + $self->{options}{'nostrip'} = 1; + $self->{options}{'_default_translated'} .= ' <dia:string>'; + print "Call treat_options\n" if $self->{options}{'debug'}; + $self->treat_options; +} + +sub found_string { + my ( $self, $text, $ref, $options ) = @_; + return $text if $text =~ m/^\s*$/s; + + #We skip the paper type string + if ( $self->get_path() !~ /<dia:diagramdata>/ ) { + $text =~ /^#(.*)#$/s; + $text = "#" . $self->translate( $1, $ref, "String", 'wrap' => $self->{options}{'wrap'} ) . "#"; + } + + return $text; +} diff --git a/lib/Locale/Po4a/Docbook.pm b/lib/Locale/Po4a/Docbook.pm new file mode 100644 index 0000000..4b1dd4f --- /dev/null +++ b/lib/Locale/Po4a/Docbook.pm @@ -0,0 +1,2060 @@ +#!/usr/bin/perl +# aptitude: cmdsynopsis => missing removal of leading spaces + +# Po4a::Docbook.pm +# +# extract and translate translatable strings from DocBook XML documents. +# +# This code extracts plain text from tags and attributes on DocBook XML +# documents. +# +# Copyright © 2004 Jordi Vilalta <jvprat@gmail.com> +# Copyright © 2007-2009 Nicolas François <nicolas.francois@centraliens.net> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Docbook - convert DocBook XML documents from/to PO files + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::Docbook is a module to help the translation of DocBook XML +documents into other [human] languages. + +=head1 STATUS OF THIS MODULE + +This module is fully functional, as it relies in the L<Locale::Po4a::Xml> +module. This only defines the translatable tags and attributes. + +The only known issue is that it doesn't handle entities yet, and this includes +the file inclusion entities, but you can translate most of those files alone +(except the typical entities files), and it's usually better to maintain them +separated. + +=head1 OVERRIDE THE DEFAULT BEHAVIOR WITH COMMAND LINE OPTIONS + +The default behavior of system provided modules is set to be on the safe side. + +For example, the default of B<< <author> >> tag is aiming it to appear under +B<< <para> >>. But you may be using it only under B<< <bookinfo> >>. For this +case, you may want to translate it independently for each author. + +If you don't like the default behavior of the xml module and its derivative +modules, you can provide command line options to change their behavior. For +example, you can add the following to the po4a configuration file: + + opt:"-k 0 -o nodefault=\"<bookinfo> <author>\" \ + -o break=\"<bookinfo> <author>\" \ + -o untranslated=\"<bookinfo>\" \ + -o translated=\"<author>\"" + +This overrides the default behavior for B<< <bookinfo> >> and B<< <author> >>, +set B<< <bookinfo> >> and B<< <author> >> to break input data stream on these +tags, set B<< <bookinfo> >> not to translate its tagged content, and set B<< +<author> >> to translate its tagged content. + +=head1 SEE ALSO + +L<Locale::Po4a::TransTractor(3pm)>, L<Locale::Po4a::Xml(3pm)>, L<po4a(7)|po4a.7> + +=head1 AUTHORS + + Jordi Vilalta <jvprat@gmail.com> + +=head1 COPYRIGHT AND LICENSE + + Copyright © 2004 Jordi Vilalta <jvprat@gmail.com> + Copyright © 2007-2009 Nicolas François <nicolas.francois@centraliens.net> + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut + +package Locale::Po4a::Docbook; + +use 5.16.0; +use strict; +use warnings; + +use Locale::Po4a::Common; +use Locale::Po4a::Xml; + +use vars qw(@ISA); +@ISA = qw(Locale::Po4a::Xml); + +sub initialize { + my $self = shift; + my %options = @_; + + $self->SUPER::initialize(%options); + $self->{options}{'wrap'} = 1; + $self->{options}{'doctype'} = $self->{options}{'doctype'} || 'docbook xml'; + + # AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + + # abbrev; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <abbrev>"; + $self->{options}{'_default_inline'} .= " <abbrev>"; + + # abstract; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <abstract>"; + $self->{options}{'_default_break'} .= " <abstract>"; + + # accel; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <accel>"; + $self->{options}{'_default_inline'} .= " <accel>"; + + # ackno; does not contain text; Formatted as a displayed block + # Replaced by acknowledgements in DocBook v5.0 + $self->{options}{'_default_untranslated'} .= " <ackno>"; + $self->{options}{'_default_break'} .= " <ackno>"; + + # acknowledgements; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <acknowledgements>"; + $self->{options}{'_default_break'} .= " <acknowledgements>"; + + # acronym; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <acronym>"; + $self->{options}{'_default_inline'} .= " <acronym>"; + + # action; contains text; Formatted inline; v4, not in v5 + $self->{options}{'_default_translated'} .= " <action>"; + $self->{options}{'_default_inline'} .= " <action>"; + + # address; contains text; Formatted as a displayed block; verbatim + $self->{options}{'_default_translated'} .= " W<address>"; + $self->{options}{'_default_placeholder'} .= " <address>"; + + # affiliation; does not contain text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_untranslated'} .= " <affiliation>"; + $self->{options}{'_default_inline'} .= " <affiliation>"; + + # alt; contains text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_translated'} .= " <alt>"; + $self->{options}{'_default_inline'} .= " <alt>"; + + # anchor; does not contain text; Produces no output + $self->{options}{'_default_untranslated'} .= " <anchor>"; + $self->{options}{'_default_inline'} .= " <anchor>"; + + # annotation; does not contain text; + $self->{options}{'_default_untranslated'} .= " <annotation>"; + $self->{options}{'_default_placeholder'} .= " <annotation>"; + + # answer; does not contain text; + $self->{options}{'_default_untranslated'} .= " <answer>"; + $self->{options}{'_default_break'} .= " <answer>"; + + # appendix; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <appendix>"; + $self->{options}{'_default_break'} .= " <appendix>"; + + # appendixinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <appendixinfo>"; + $self->{options}{'_default_placeholder'} .= " <appendixinfo>"; + + # application; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <application>"; + $self->{options}{'_default_inline'} .= " <application>"; + + # arc; does not contain text; + $self->{options}{'_default_untranslated'} .= " <arc>"; + $self->{options}{'_default_inline'} .= " <arc>"; + + # area; does not contain text; + # NOTE: the area is not translatable as is, but the coords + # attribute might be. + $self->{options}{'_default_untranslated'} .= " <area>"; + $self->{options}{'_default_inline'} .= " <area>"; + + # areaset; does not contain text; + # NOTE: the areaset is not translatable as is. depending on the + # language there might be more or less area tags inside. + $self->{options}{'_default_untranslated'} .= " <areaset>"; + $self->{options}{'_default_inline'} .= " <areaset>"; + + # areaspec; does not contain text; + # NOTE: see area and areaset + $self->{options}{'_default_translated'} .= " <areaspec>"; + $self->{options}{'_default_break'} .= " <areaspec>"; + + # arg; contains text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_translated'} .= " <arg>"; + $self->{options}{'_default_inline'} .= " <arg>"; + + # artheader; does not contain text; renamed to articleinfo in v4.0 + $self->{options}{'_default_untranslated'} .= " <artheader>"; + $self->{options}{'_default_placeholder'} .= " <artheader>"; + + # article; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <article>"; + $self->{options}{'_default_break'} .= " <article>"; + + # articleinfo; does not contain text; v4 only + $self->{options}{'_default_untranslated'} .= " <articleinfo>"; + $self->{options}{'_default_placeholder'} .= " <articleinfo>"; + + # artpagenums; contains text; Formatted inline + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <artpagenums>"; + $self->{options}{'_default_inline'} .= " <artpagenums>"; + + # attribution; contains text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_translated'} .= " <attribution>"; + $self->{options}{'_default_inline'} .= " <attribution>"; + + # audiodata; does not contain text; + # NOTE: the attributes might be translated + $self->{options}{'_default_translated'} .= " <audiodata>"; + $self->{options}{'_default_placeholder'} .= " <audiodata>"; + $self->{options}{'_default_attributes'} .= ' <audiodata>fileref'; + + # audioobject; does not contain text; + # NOTE: might be contained in a inlinemediaobject + $self->{options}{'_default_translated'} .= " <audioobject>"; + $self->{options}{'_default_placeholder'} .= " <audioobject>"; + + # author; does not contain text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_untranslated'} .= " <author>"; + $self->{options}{'_default_inline'} .= " <author>"; + + # authorblurb; does not contain text; Formatted as a displayed block. + # v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <authorblurb>"; + $self->{options}{'_default_placeholder'} .= " <authorblurb>"; + + # authorgroup; does not contain text; Formatted inline or as a + # displayed block depending on context + # NOTE: given the possible parents, it is probably very rarely + # inlined + $self->{options}{'_default_untranslated'} .= " <authorgroup>"; + $self->{options}{'_default_break'} .= " <authorgroup>"; + + # authorinitials; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <authorinitials>"; + $self->{options}{'_default_inline'} .= " <authorinitials>"; + + # BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB + + # beginpage; does not contain text; v4, not in v5 + # beginpage and indexterm are ubiquitous elements, they + # can be inlined or block elements. It is better to + # make it inline, like indexterm. A placeholder may + # be used too. + $self->{options}{'_default_untranslated'} .= " <beginpage>"; + $self->{options}{'_default_inline'} .= " <beginpage>"; + + # bibliocoverage; contains text; Formatted inline + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <bibliocoverage>"; + $self->{options}{'_default_inline'} .= " <bibliocoverage>"; + + # bibliodiv; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <bibliodiv>"; + $self->{options}{'_default_break'} .= " <bibliodiv>"; + + # biblioentry; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <biblioentry>"; + $self->{options}{'_default_break'} .= " <biblioentry>"; + + # bibliography; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <bibliography>"; + $self->{options}{'_default_break'} .= " <bibliography>"; + + # bibliographyinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <bibliographyinfo>"; + $self->{options}{'_default_placeholder'} .= " <bibliographyinfo>"; + + # biblioid; contains text; Formatted inline + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <biblioid>"; + $self->{options}{'_default_inline'} .= " <biblioid>"; + + # bibliolist; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <bibliolist>"; + $self->{options}{'_default_break'} .= " <bibliolist>"; + + # bibliomisc; contains text; Formatted inline + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <bibliomisc>"; + $self->{options}{'_default_inline'} .= " <bibliomisc>"; + + # bibliomixed; contains text; Formatted as a displayed block + $self->{options}{'_default_translated'} .= " <bibliomixed>"; + $self->{options}{'_default_placeholder'} .= " <bibliomixed>"; + + # bibliomset; contains text; Formatted as a displayed block + # NOTE: content might need to be inlined, e.g. <bibliomset><title> + $self->{options}{'_default_translated'} .= " <bibliomset>"; + $self->{options}{'_default_placeholder'} .= " <bibliomset>"; + + # biblioref; does not contain text; Formatted inline + $self->{options}{'_default_untranslated'} .= " <biblioref>"; + $self->{options}{'_default_inline'} .= " <biblioref>"; + + # bibliorelation; does not contain text; Formatted inline + $self->{options}{'_default_translated'} .= " <bibliorelation>"; + $self->{options}{'_default_inline'} .= " <bibliorelation>"; + + # biblioset; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <biblioset>"; + $self->{options}{'_default_break'} .= " <biblioset>"; + + # bibliosource; contains text; Formatted inline + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <bibliosource>"; + $self->{options}{'_default_inline'} .= " <bibliosource>"; + + # blockinfo; does not contain text; v4.2, not in v5 + $self->{options}{'_default_untranslated'} .= " <blockinfo>"; + $self->{options}{'_default_placeholder'} .= " <blockinfo>"; + + # blockquote; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <blockquote>"; + $self->{options}{'_default_break'} .= " <blockquote>"; + + # book; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <book>"; + $self->{options}{'_default_break'} .= " <book>"; + + # bookbiblio; does not contain text; Formatted as a displayed block + # Removed in v4.0 + $self->{options}{'_default_untranslated'} .= " <bookbiblio>"; + $self->{options}{'_default_break'} .= " <bookbiblio>"; + + # bookinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <bookinfo>"; + $self->{options}{'_default_placeholder'} .= " <bookinfo>"; + + # bridgehead; contains text; Formatted as a displayed block + $self->{options}{'_default_translated'} .= " <bridgehead>"; + $self->{options}{'_default_break'} .= " <bridgehead>"; + + # CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC + + # callout; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <callout>"; + $self->{options}{'_default_break'} .= " <callout>"; + + # calloutlist; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <calloutlist>"; + $self->{options}{'_default_break'} .= " <calloutlist>"; + + # caption; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <caption>"; + $self->{options}{'_default_break'} .= " <caption>"; + + # caption (db.html.caption); contains text; Formatted as a displayed block + # TODO: Check if this works + $self->{options}{'_default_translated'} .= " <table><caption>"; + $self->{options}{'_default_break'} .= " <table><caption>"; + + # caution; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <caution>"; + $self->{options}{'_default_break'} .= " <caution>"; + + # chapter; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <chapter>"; + $self->{options}{'_default_break'} .= " <chapter>"; + + # chapterinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <chapterinfo>"; + $self->{options}{'_default_placeholder'} .= " <chapterinfo>"; + + # citation; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <citation>"; + $self->{options}{'_default_inline'} .= " <citation>"; + + # citebiblioid; contains text; Formatted inline + # NOTE: maybe untranslated? + $self->{options}{'_default_translated'} .= " <citebiblioid>"; + $self->{options}{'_default_inline'} .= " <citebiblioid>"; + + # citerefentry; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <citerefentry>"; + $self->{options}{'_default_inline'} .= " <citerefentry>"; + + # citetitle; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <citetitle>"; + $self->{options}{'_default_inline'} .= " <citetitle>"; + + # city; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <city>"; + $self->{options}{'_default_inline'} .= " <city>"; + + # classname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <classname>"; + $self->{options}{'_default_inline'} .= " <classname>"; + + # classsynopsis; does not contain text; may be in a para + # NOTE: It may contain a classsynopsisinfo, which should be + # verbatim + $self->{options}{'_default_untranslated'} .= " <classsynopsis>"; + $self->{options}{'_default_placeholder'} .= " <classsynopsis>"; + + # classsynopsisinfo; contains text; + # NOTE: see above + $self->{options}{'_default_translated'} .= " W<classsynopsisinfo>"; + $self->{options}{'_default_inline'} .= " <classsynopsisinfo>"; + + # cmdsynopsis; does not contain text; may be in a para + $self->{options}{'_default_untranslated'} .= " <cmdsynopsis>"; + $self->{options}{'_default_placeholder'} .= " <cmdsynopsis>"; + + # co; does not contain text; Formatted inline + # XXX: tranlsated or not? (label attribute) + $self->{options}{'_default_translated'} .= " <co>"; + $self->{options}{'_default_inline'} .= " <co>"; + + # code; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <code>"; + $self->{options}{'_default_inline'} .= " <code>"; + + # col; does not contain text; + # NOTE: could be translated to change the layout in a translation + # To be done on colgroup in that case. + $self->{options}{'_default_untranslated'} .= " <col>"; + $self->{options}{'_default_break'} .= " <col>"; + + # colgroup; does not contain text; + # NOTE: could be translated to change the layout in a translation + $self->{options}{'_default_untranslated'} .= " <colgroup>"; + $self->{options}{'_default_break'} .= " <colgroup>"; + + # collab; does not contain text; Formatted inline or as a + # displayed block depending on context + # NOTE: could be in the break class + $self->{options}{'_default_untranslated'} .= " <collab>"; + $self->{options}{'_default_inline'} .= " <collab>"; + + # collabname; contains text; Formatted inline or as a + # displayed block depending on context; v4, not in v5 + $self->{options}{'_default_translated'} .= " <collabname>"; + $self->{options}{'_default_inline'} .= " <collabname>"; + + # colophon; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <colophon>"; + $self->{options}{'_default_break'} .= " <colophon>"; + + # colspec; does not contain text; + # NOTE: could be translated to change the layout in a translation + $self->{options}{'_default_untranslated'} .= " <colspec>"; + $self->{options}{'_default_break'} .= " <colspec>"; + + # command; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <command>"; + $self->{options}{'_default_inline'} .= " <command>"; + + # comment; contains text; Formatted inline or as a displayed block + # Renamed to remark in v4.0 + $self->{options}{'_default_translated'} .= " <comment>"; + $self->{options}{'_default_inline'} .= " <comment>"; + + # computeroutput; contains text; Formatted inline + # NOTE: "is not a verbatim environment, but an inline." + $self->{options}{'_default_translated'} .= " <computeroutput>"; + $self->{options}{'_default_inline'} .= " <computeroutput>"; + + # confdates; contains text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_translated'} .= " <confdates>"; + $self->{options}{'_default_inline'} .= " <confdates>"; + + # confgroup; does not contain text; Formatted inline or as a + # displayed block depending on context + # NOTE: could be in the break class + $self->{options}{'_default_untranslated'} .= " <confgroup>"; + $self->{options}{'_default_inline'} .= " <confgroup>"; + + # confnum; contains text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_translated'} .= " <confnum>"; + $self->{options}{'_default_inline'} .= " <confnum>"; + + # confsponsor; contains text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_translated'} .= " <confsponsor>"; + $self->{options}{'_default_inline'} .= " <confsponsor>"; + + # conftitle; contains text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_translated'} .= " <conftitle>"; + $self->{options}{'_default_inline'} .= " <conftitle>"; + + # constant; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <constant>"; + $self->{options}{'_default_inline'} .= " <constant>"; + + # constraint; does not contain text; + # NOTE: it might be better to have the production as verbatim + # Keeping the constrainst inline to have it close to the + # lhs or rhs. + # The attribute is translatable + $self->{options}{'_default_untranslated'} .= " <constraint>"; + $self->{options}{'_default_break'} .= " <constraint>"; + + # constraintdef; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <constraintdef>"; + $self->{options}{'_default_break'} .= " <constraintdef>"; + + # constructorsynopsis; does not contain text; may be in a para + $self->{options}{'_default_untranslated'} .= " <constructorsynopsis>"; + $self->{options}{'_default_placeholder'} .= " <constructorsynopsis>"; + + # contractnum; contains text; Formatted inline or as a displayed block + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <contractnum>"; + $self->{options}{'_default_inline'} .= " <contractnum>"; + + # contractsponsor; contains text; Formatted inline or as a displayed block + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <contractsponsor>"; + $self->{options}{'_default_inline'} .= " <contractsponsor>"; + + # contrib; contains text; Formatted inline or as a displayed block + $self->{options}{'_default_translated'} .= " <contrib>"; + $self->{options}{'_default_inline'} .= " <contrib>"; + + # copyright; contains text; Formatted inline or as a displayed block + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <copyright>"; + $self->{options}{'_default_inline'} .= " <copyright>"; + + # coref; does not contain text; Formatted inline + # XXX: tranlsated or not? (label attribute) + $self->{options}{'_default_translated'} .= " <coref>"; + $self->{options}{'_default_inline'} .= " <coref>"; + + # corpauthor; contains text; Formatted inline or as a + # displayed block depending on context; v4, not in v5 + $self->{options}{'_default_translated'} .= " <corpauthor>"; + $self->{options}{'_default_inline'} .= " <corpauthor>"; + + # corpcredit; contains text; Formatted inline or as a + # displayed block depending on context; v4, not in v5 + $self->{options}{'_default_translated'} .= " <corpcredit>"; + $self->{options}{'_default_inline'} .= " <corpcredit>"; + + # corpname; contains text; Formatted inline or as a + # displayed block depending on context; v4, not in v5 + $self->{options}{'_default_translated'} .= " <corpname>"; + $self->{options}{'_default_inline'} .= " <corpname>"; + + # country; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <country>"; + $self->{options}{'_default_inline'} .= " <country>"; + + # cover; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <cover>"; + $self->{options}{'_default_break'} .= " <cover>"; + + # DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD + + # database; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <database>"; + $self->{options}{'_default_inline'} .= " <database>"; + + # date; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <date>"; + $self->{options}{'_default_inline'} .= " <date>"; + + # dedication; contains text; Formatted as a displayed block + $self->{options}{'_default_translated'} .= " <dedication>"; + $self->{options}{'_default_break'} .= " <dedication>"; + + # destructorsynopsis; does not contain text; may be in a para + $self->{options}{'_default_untranslated'} .= " <destructorsynopsis>"; + $self->{options}{'_default_placeholder'} .= " <destructorsynopsis>"; + + # docinfo; does not contain text; removed in v4.0 + $self->{options}{'_default_untranslated'} .= " <docinfo>"; + $self->{options}{'_default_placeholder'} .= " <docinfo>"; + + # EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE + + # edition; contains text; Formatted inline or as a displayed block + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <edition>"; + $self->{options}{'_default_inline'} .= " <edition>"; + + # editor; does not contain text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_untranslated'} .= " <editor>"; + $self->{options}{'_default_inline'} .= " <editor>"; + + # email; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <email>"; + $self->{options}{'_default_inline'} .= " <email>"; + + # emphasis; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <emphasis>"; + $self->{options}{'_default_inline'} .= " <emphasis>"; + + # entry; contains text; + $self->{options}{'_default_translated'} .= " <entry>"; + $self->{options}{'_default_break'} .= " <entry>"; + + # entrytbl; does not contain text; + $self->{options}{'_default_untranslated'} .= " <entrytbl>"; + $self->{options}{'_default_break'} .= " <entrytbl>"; + + # envar; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <envar>"; + $self->{options}{'_default_inline'} .= " <envar>"; + + # epigraph; contains text; Formatted as a displayed block. + # NOTE: maybe contained in a para + $self->{options}{'_default_translated'} .= " <epigraph>"; + $self->{options}{'_default_placeholder'} .= " <epigraph>"; + + # equation; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <equation>"; + $self->{options}{'_default_break'} .= " <equation>"; + + # errorcode; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <errorcode>"; + $self->{options}{'_default_inline'} .= " <errorcode>"; + + # errorname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <errorname>"; + $self->{options}{'_default_inline'} .= " <errorname>"; + + # errortext; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <errortext>"; + $self->{options}{'_default_inline'} .= " <errortext>"; + + # errortype; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <errortype>"; + $self->{options}{'_default_inline'} .= " <errortype>"; + + # example; does not contain text; Formatted as a displayed block. + # NOTE: maybe contained in a para + $self->{options}{'_default_untranslated'} .= " <example>"; + $self->{options}{'_default_placeholder'} .= " <example>"; + + # exceptionname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <exceptionname>"; + $self->{options}{'_default_inline'} .= " <exceptionname>"; + + # extendedlink; does not contain text; + $self->{options}{'_default_untranslated'} .= " <extendedlink>"; + $self->{options}{'_default_inline'} .= " <extendedlink>"; + + # FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF + + # fax; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <fax>"; + $self->{options}{'_default_inline'} .= " <fax>"; + + # fieldsynopsis; does not contain text; may be in a para + $self->{options}{'_default_untranslated'} .= " <fieldsynopsis>"; + $self->{options}{'_default_inline'} .= " <fieldsynopsis>"; + + # figure; does not contain text; Formatted as a displayed block. + # NOTE: maybe contained in a para + $self->{options}{'_default_untranslated'} .= " <figure>"; + $self->{options}{'_default_placeholder'} .= " <figure>"; + + # filename; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <filename>"; + $self->{options}{'_default_inline'} .= " <filename>"; + + # firstname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <firstname>"; + $self->{options}{'_default_inline'} .= " <firstname>"; + + # firstterm; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <firstterm>"; + $self->{options}{'_default_inline'} .= " <firstterm>"; + + # footnote; contains text; + $self->{options}{'_default_translated'} .= " <footnote>"; + $self->{options}{'_default_placeholder'} .= " <footnote>"; + + # footnoteref; contains text; + $self->{options}{'_default_translated'} .= " <footnoteref>"; + $self->{options}{'_default_inline'} .= " <footnoteref>"; + + # foreignphrase; contains text; + $self->{options}{'_default_translated'} .= " <foreignphrase>"; + $self->{options}{'_default_inline'} .= " <foreignphrase>"; + + # formalpara; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <formalpara>"; + $self->{options}{'_default_break'} .= " <formalpara>"; + + # funcdef; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <funcdef>"; + $self->{options}{'_default_inline'} .= " <funcdef>"; + + # funcparams; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <funcparams>"; + $self->{options}{'_default_inline'} .= " <funcparams>"; + + # funcprototype; does not contain text; + # NOTE: maybe contained in a funcsynopsis, contained in a para + $self->{options}{'_default_untranslated'} .= " <funcprototype>"; + $self->{options}{'_default_placeholder'} .= " <funcprototype>"; + + # funcsynopsis; does not contain text; + # NOTE: maybe contained in a para + $self->{options}{'_default_untranslated'} .= " <funcsynopsis>"; + $self->{options}{'_default_placeholder'} .= " <funcsynopsis>"; + + # funcsynopsisinfo; contains text; verbatim + # NOTE: maybe contained in a funcsynopsis, contained in a para + $self->{options}{'_default_translated'} .= " W<funcsynopsisinfo>"; + $self->{options}{'_default_placeholder'} .= " <funcsynopsisinfo>"; + + # function; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <function>"; + $self->{options}{'_default_inline'} .= " <function>"; + + # GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG + + # glossary; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <glossary>"; + $self->{options}{'_default_break'} .= " <glossary>"; + + # glossaryinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <glossaryinfo>"; + $self->{options}{'_default_placeholder'} .= " <glossaryinfo>"; + + # glossdef; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <glossdef>"; + $self->{options}{'_default_break'} .= " <glossdef>"; + + # glossdiv; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <glossdiv>"; + $self->{options}{'_default_break'} .= " <glossdiv>"; + + # glossentry; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <glossentry>"; + $self->{options}{'_default_break'} .= " <glossentry>"; + + # glosslist; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <glosslist>"; + $self->{options}{'_default_break'} .= " <glosslist>"; + + # glosssee; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <glosssee>"; + $self->{options}{'_default_break'} .= " <glosssee>"; + + # glossseealso; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <glossseealso>"; + $self->{options}{'_default_break'} .= " <glossseealso>"; + + # glossterm; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <glossterm>"; + $self->{options}{'_default_inline'} .= " <glossterm>"; + + # graphic; does not contain text; Formatted as a displayed block + # v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <graphic>"; + $self->{options}{'_default_inline'} .= " <graphic>"; + $self->{options}{'_default_attributes'} .= ' <graphic>fileref'; + + # graphicco; does not contain text; Formatted as a displayed block. + # v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <graphicco>"; + $self->{options}{'_default_placeholder'} .= " <graphicco>"; + + # group; does not contain text; Formatted inline + $self->{options}{'_default_untranslated'} .= " <group>"; + $self->{options}{'_default_inline'} .= " <group>"; + + # guibutton; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <guibutton>"; + $self->{options}{'_default_inline'} .= " <guibutton>"; + + # guiicon; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <guiicon>"; + $self->{options}{'_default_inline'} .= " <guiicon>"; + + # guilabel; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <guilabel>"; + $self->{options}{'_default_inline'} .= " <guilabel>"; + + # guimenu; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <guimenu>"; + $self->{options}{'_default_inline'} .= " <guimenu>"; + + # guimenuitem; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <guimenuitem>"; + $self->{options}{'_default_inline'} .= " <guimenuitem>"; + + # guisubmenu; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <guisubmenu>"; + $self->{options}{'_default_inline'} .= " <guisubmenu>"; + + # HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH + + # hardware; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <hardware>"; + $self->{options}{'_default_inline'} .= " <hardware>"; + + # highlights; does not contain text; Formatted inline + # v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <highlights>"; + $self->{options}{'_default_break'} .= " <highlights>"; + + # holder; contains text; + # NOTE: may depend on the copyright container + $self->{options}{'_default_translated'} .= " <holder>"; + $self->{options}{'_default_inline'} .= " <holder>"; + + # honorific; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <honorific>"; + $self->{options}{'_default_inline'} .= " <honorific>"; + + # html:button; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <html:button>"; + $self->{options}{'_default_inline'} .= " <html:button>"; + + # html:fieldset; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <html:fieldset>"; + $self->{options}{'_default_inline'} .= " <html:fieldset>"; + + # html:form; does not contain text; + $self->{options}{'_default_translated'} .= " <html:form>"; + $self->{options}{'_default_inline'} .= " <html:form>"; + + # html:input; does not contain text; Formatted inline + # NOTE: attributes are translatable + $self->{options}{'_default_translated'} .= " <html:input>"; + $self->{options}{'_default_inline'} .= " <html:input>"; + + # html:label; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <html:label>"; + $self->{options}{'_default_inline'} .= " <html:label>"; + + # html:legend; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <html:legend>"; + $self->{options}{'_default_inline'} .= " <html:legend>"; + + # html:option; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <html:option>"; + $self->{options}{'_default_inline'} .= " <html:option>"; + + # html:select; does not contain text; Formatted inline + $self->{options}{'_default_translated'} .= " <html:select>"; + $self->{options}{'_default_inline'} .= " <html:select>"; + + # html:textarea; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <html:textarea>"; + $self->{options}{'_default_placeholder'} .= " <html:textarea>"; + + # imagedata; does not contain text; May be formatted inline or + # as a displayed block, depending on context + $self->{options}{'_default_translated'} .= " <imagedata>"; + $self->{options}{'_default_inline'} .= " <imagedata>"; + $self->{options}{'_default_attributes'} .= ' <imagedata>fileref'; + + # imageobject; does not contain text; May be formatted inline or + # as a displayed block, depending on context + $self->{options}{'_default_untranslated'} .= " <imageobject>"; + $self->{options}{'_default_inline'} .= " <imageobject>"; + + # imageobjectco; does not contain text; Formatted as a displayed block + # NOTE: may be in a inlinemediaobject + # TODO: check if this works when the inlinemediaobject is defined + # as inline + $self->{options}{'_default_untranslated'} .= " <imageobjectco>"; + $self->{options}{'_default_break'} .= " <imageobjectco>"; + + # important; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <important>"; + $self->{options}{'_default_break'} .= " <important>"; + + # index; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <index>"; + $self->{options}{'_default_break'} .= " <index>"; + + # indexdiv; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <indexdiv>"; + $self->{options}{'_default_break'} .= " <indexdiv>"; + + # indexentry; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <indexentry>"; + $self->{options}{'_default_break'} .= " <indexentry>"; + + # indexinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <indexinfo>"; + $self->{options}{'_default_placeholder'} .= " <indexinfo>"; + + # indexterm; does not contain text; + $self->{options}{'_default_untranslated'} .= " <indexterm>"; + $self->{options}{'_default_placeholder'} .= " <indexterm>"; + + # info; does not contain text; + $self->{options}{'_default_untranslated'} .= " <info>"; + $self->{options}{'_default_placeholder'} .= " <info>"; + + # informalequation; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <informalequation>"; + $self->{options}{'_default_placeholder'} .= " <informalequation>"; + + # informalexample; does not contain text; Formatted as a displayed block. + # NOTE: can be in a para + $self->{options}{'_default_untranslated'} .= " <informalexample>"; + $self->{options}{'_default_break'} .= " <informalexample>"; + + # informalfigure; does not contain text; Formatted as a displayed block. + # NOTE: can be in a para + $self->{options}{'_default_untranslated'} .= " <informalfigure>"; + $self->{options}{'_default_break'} .= " <informalfigure>"; + + # informaltable; does not contain text; Formatted as a displayed block. + # NOTE: can be in a para + $self->{options}{'_default_untranslated'} .= " <informaltable>"; + $self->{options}{'_default_break'} .= " <informaltable>"; + + # initializer; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <initializer>"; + $self->{options}{'_default_inline'} .= " <initializer>"; + + # inlineequation; does not contain text; Formatted inline + $self->{options}{'_default_translated'} .= " W<inlineequation>"; + $self->{options}{'_default_placeholder'} .= " <inlineequation>"; + + # inlinegraphic; does not contain text; Formatted inline + # empty; v4, not in v5 + $self->{options}{'_default_translated'} .= " W<inlinegraphic>"; + $self->{options}{'_default_inline'} .= " <inlinegraphic>"; + + # inlinemediaobject; does not contain text; Formatted inline + $self->{options}{'_default_translated'} .= " <inlinemediaobject>"; + $self->{options}{'_default_placeholder'} .= " <inlinemediaobject>"; + + # interface; contains text; Formatted inline; v4, not in v5 + $self->{options}{'_default_translated'} .= " <interface>"; + $self->{options}{'_default_inline'} .= " <interface>"; + + # interfacedefinition; contains text; Formatted inline + # Removed in v4.0 + $self->{options}{'_default_translated'} .= " <interfacedefinition>"; + $self->{options}{'_default_inline'} .= " <interfacedefinition>"; + + # interfacename; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <interfacename>"; + $self->{options}{'_default_inline'} .= " <interfacename>"; + + # invpartnumber; contains text; Formatted inline; v4, not in v5 + $self->{options}{'_default_translated'} .= " <invpartnumber>"; + $self->{options}{'_default_inline'} .= " <invpartnumber>"; + + # isbn; contains text; Formatted inline; v4, not in v5 + $self->{options}{'_default_translated'} .= " <isbn>"; + $self->{options}{'_default_inline'} .= " <isbn>"; + + # issn; contains text; Formatted inline; v4, not in v5 + $self->{options}{'_default_translated'} .= " <issn>"; + $self->{options}{'_default_inline'} .= " <issn>"; + + # issuenum; contains text; Formatted inline or as a displayed block + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <issuenum>"; + $self->{options}{'_default_inline'} .= " <issuenum>"; + + # itemizedlist; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <itemizedlist>"; + $self->{options}{'_default_break'} .= " <itemizedlist>"; + + # itermset; does not contain text; + # FIXME + $self->{options}{'_default_untranslated'} .= " <itermset>"; + $self->{options}{'_default_inline'} .= " <itermset>"; + + # JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ + + # jobtitle; contains text; Formatted inline or as a displayed block + # NOTE: can be in a para + $self->{options}{'_default_translated'} .= " <jobtitle>"; + $self->{options}{'_default_inline'} .= " <jobtitle>"; + + # KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK + + # keycap; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <keycap>"; + $self->{options}{'_default_inline'} .= " <keycap>"; + + # keycode; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <keycode>"; + $self->{options}{'_default_inline'} .= " <keycode>"; + + # keycombo; does not contain text; Formatted inline + $self->{options}{'_default_translated'} .= " <keycombo>"; + $self->{options}{'_default_inline'} .= " <keycombo>"; + + # keysym; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <keysym>"; + $self->{options}{'_default_inline'} .= " <keysym>"; + + # keyword; contains text; + # NOTE: could be inline + $self->{options}{'_default_translated'} .= " <keyword>"; + $self->{options}{'_default_break'} .= " <keyword>"; + + # keywordset; contains text; Formatted inline or as a displayed block + # NOTE: could be placeholder/break + $self->{options}{'_default_translated'} .= " <keywordset>"; + $self->{options}{'_default_break'} .= " <keywordset>"; + + # LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL + + # label; contains text; Formatted as a displayed block + $self->{options}{'_default_translated'} .= " <label>"; + $self->{options}{'_default_break'} .= " <label>"; + + # legalnotice; contains text; Formatted as a displayed block + $self->{options}{'_default_translated'} .= " <legalnotice>"; + $self->{options}{'_default_break'} .= " <legalnotice>"; + + # lhs; contains text; Formatted as a displayed block. + # NOTE: it might be better to have the production as verbatim + # Keeping the constrainst inline to have it close to the + # lhs or rhs. + $self->{options}{'_default_translated'} .= " <lhs>"; + $self->{options}{'_default_break'} .= " <lhs>"; + + # lineage; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <lineage>"; + $self->{options}{'_default_inline'} .= " <lineage>"; + + # lineannotation; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <lineannotation>"; + $self->{options}{'_default_inline'} .= " <lineannotation>"; + + # link; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <link>"; + $self->{options}{'_default_inline'} .= " <link>"; + + # listitem; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <listitem>"; + $self->{options}{'_default_break'} .= " <listitem>"; + + # literal; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <literal>"; + $self->{options}{'_default_inline'} .= " <literal>"; + + # literallayout; contains text; verbatim + $self->{options}{'_default_translated'} .= " W<literallayout>"; + $self->{options}{'_default_placeholder'} .= " <literallayout>"; + + # locator; does not contain text; + $self->{options}{'_default_untranslated'} .= " <locator>"; + $self->{options}{'_default_inline'} .= " <locator>"; + + # lot; does not contain text; Formatted as a displayed block. + # v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <lot>"; + $self->{options}{'_default_break'} .= " <lot>"; + + # lotentry; contains text; Formatted as a displayed block. + # v4, not in v5 + $self->{options}{'_default_translated'} .= " <lotentry>"; + $self->{options}{'_default_break'} .= " <lotentry>"; + + # MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM + + # manvolnum; contains text; + $self->{options}{'_default_translated'} .= " <manvolnum>"; + $self->{options}{'_default_inline'} .= " <manvolnum>"; + + # markup; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <markup>"; + $self->{options}{'_default_inline'} .= " <markup>"; + + # mathphrase; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <mathphrase>"; + $self->{options}{'_default_inline'} .= " <mathphrase>"; + + # medialabel; contains text; Formatted inline + # v4, not in v5 + $self->{options}{'_default_translated'} .= " <medialabel>"; + $self->{options}{'_default_inline'} .= " <medialabel>"; + + # mediaobject; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <mediaobject>"; + $self->{options}{'_default_placeholder'} .= " <mediaobject>"; + + # mediaobjectco; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <mediaobjectco>"; + $self->{options}{'_default_placeholder'} .= " <mediaobjectco>"; + + # member; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <member>"; + $self->{options}{'_default_inline'} .= " <member>"; + + # menuchoice; does not contain text; Formatted inline + $self->{options}{'_default_translated'} .= " <menuchoice>"; + $self->{options}{'_default_inline'} .= " <menuchoice>"; + + # methodname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <methodname>"; + $self->{options}{'_default_inline'} .= " <methodname>"; + + # methodparam; does not contain text; Formatted inline + $self->{options}{'_default_translated'} .= " <methodparam>"; + $self->{options}{'_default_inline'} .= " <methodparam>"; + + # methodsynopsis; does not contain text; Formatted inline + $self->{options}{'_default_translated'} .= " <methodsynopsis>"; + $self->{options}{'_default_inline'} .= " <methodsynopsis>"; + + # modifier; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <modifier>"; + $self->{options}{'_default_inline'} .= " <modifier>"; + + # mousebutton; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <mousebutton>"; + $self->{options}{'_default_inline'} .= " <mousebutton>"; + + # msg; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <msg>"; + $self->{options}{'_default_break'} .= " <msg>"; + + # msgaud; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <msgaud>"; + $self->{options}{'_default_break'} .= " <msgaud>"; + + # msgentry; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <msgentry>"; + $self->{options}{'_default_break'} .= " <msgentry>"; + + # msgexplan; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <msgexplan>"; + $self->{options}{'_default_break'} .= " <msgexplan>"; + + # msginfo; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <msginfo>"; + $self->{options}{'_default_break'} .= " <msginfo>"; + + # msglevel; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <msglevel>"; + $self->{options}{'_default_break'} .= " <msglevel>"; + + # msgmain; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <msgmain>"; + $self->{options}{'_default_break'} .= " <msgmain>"; + + # msgorig; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <msgorig>"; + $self->{options}{'_default_break'} .= " <msgorig>"; + + # msgrel; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <msgrel>"; + $self->{options}{'_default_break'} .= " <msgrel>"; + + # msgset; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <msgset>"; + $self->{options}{'_default_placeholder'} .= " <msgset>"; + + # msgsub; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <msgsub>"; + $self->{options}{'_default_break'} .= " <msgsub>"; + + # msgtext; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <msgtext>"; + $self->{options}{'_default_break'} .= " <msgtext>"; + + # NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN + + # nonterminal; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <nonterminal>"; + $self->{options}{'_default_inline'} .= " <nonterminal>"; + + # note; does not contain text; Formatted inline + # NOTE: can be in a para + $self->{options}{'_default_untranslated'} .= " <note>"; + $self->{options}{'_default_inline'} .= " <note>"; + + # OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO + + # objectinfo; does not contain text; v3.1 -> v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <objectinfo>"; + $self->{options}{'_default_placeholder'} .= " <objectinfo>"; + + # olink; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <olink>"; + $self->{options}{'_default_inline'} .= " <olink>"; + + # ooclass; does not contain text; Formatted inline + $self->{options}{'_default_translated'} .= " <ooclass>"; + $self->{options}{'_default_inline'} .= " <ooclass>"; + + # ooexception; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <ooexception>"; + $self->{options}{'_default_inline'} .= " <ooexception>"; + + # oointerface; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <oointerface>"; + $self->{options}{'_default_inline'} .= " <oointerface>"; + + # option; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <option>"; + $self->{options}{'_default_inline'} .= " <option>"; + + # optional; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <optional>"; + $self->{options}{'_default_inline'} .= " <optional>"; + + # orderedlist; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <orderedlist>"; + $self->{options}{'_default_placeholder'} .= " <orderedlist>"; + + # org; does not contain text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_untranslated'} .= " <org>"; + $self->{options}{'_default_inline'} .= " <org>"; + + # orgdiv; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <orgdiv>"; + $self->{options}{'_default_inline'} .= " <orgdiv>"; + + # orgname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <orgname>"; + $self->{options}{'_default_inline'} .= " <orgname>"; + + # otheraddr; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <otheraddr>"; + $self->{options}{'_default_inline'} .= " <otheraddr>"; + + # othercredit; does not contain text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_untranslated'} .= " <othercredit>"; + $self->{options}{'_default_inline'} .= " <othercredit>"; + + # othername; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <othername>"; + $self->{options}{'_default_inline'} .= " <othername>"; + + # PPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP + + # package; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <package>"; + $self->{options}{'_default_inline'} .= " <package>"; + + # pagenums; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <pagenums>"; + $self->{options}{'_default_inline'} .= " <pagenums>"; + + # para; contains text; Formatted as a displayed block + $self->{options}{'_default_translated'} .= " <para>"; + $self->{options}{'_default_break'} .= " <para>"; + + # paramdef; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <paramdef>"; + $self->{options}{'_default_inline'} .= " <paramdef>"; + + # parameter; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <parameter>"; + $self->{options}{'_default_inline'} .= " <parameter>"; + + # part; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <part>"; + $self->{options}{'_default_break'} .= " <part>"; + + # partinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <partinfo>"; + $self->{options}{'_default_placeholder'} .= " <partinfo>"; + + # partintro; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <partintro>"; + $self->{options}{'_default_break'} .= " <partintro>"; + + # person; does not contain text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_untranslated'} .= " <person>"; + $self->{options}{'_default_inline'} .= " <person>"; + + # personblurb; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <personblurb>"; + $self->{options}{'_default_placeholder'} .= " <personblurb>"; + + # personname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <personname>"; + $self->{options}{'_default_inline'} .= " <personname>"; + + # phone; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <phone>"; + $self->{options}{'_default_inline'} .= " <phone>"; + + # phrase; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <phrase>"; + $self->{options}{'_default_inline'} .= " <phrase>"; + + # pob; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <pob>"; + $self->{options}{'_default_inline'} .= " <pob>"; + + # postcode; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <postcode>"; + $self->{options}{'_default_inline'} .= " <postcode>"; + + # preface; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <preface>"; + $self->{options}{'_default_break'} .= " <preface>"; + + # prefaceinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <prefaceinfo>"; + $self->{options}{'_default_placeholder'} .= " <prefaceinfo>"; + + # primary; contains text; + $self->{options}{'_default_translated'} .= " <primary>"; + $self->{options}{'_default_break'} .= " <primary>"; + + # primaryie; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <primaryie>"; + $self->{options}{'_default_break'} .= " <primaryie>"; + + # printhistory; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <printhistory>"; + $self->{options}{'_default_break'} .= " <printhistory>"; + + # procedure; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <procedure>"; + $self->{options}{'_default_placeholder'} .= " <procedure>"; + + # production; doesnot contain text; + # NOTE: it might be better to have the production as verbatim + # Keeping the constrainst inline to have it close to the + # lhs or rhs. + $self->{options}{'_default_untranslated'} .= " <production>"; + $self->{options}{'_default_break'} .= " <production>"; + + # productionrecap; does not contain text; like production + $self->{options}{'_default_untranslated'} .= " <productionrecap>"; + $self->{options}{'_default_break'} .= " <productionrecap>"; + + # productionset; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <productionset>"; + $self->{options}{'_default_placeholder'} .= " <productionset>"; + + # productname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <productname>"; + $self->{options}{'_default_inline'} .= " <productname>"; + + # productnumber; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <productnumber>"; + $self->{options}{'_default_inline'} .= " <productnumber>"; + + # programlisting; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " W<programlisting>"; + $self->{options}{'_default_placeholder'} .= " <programlisting>"; + + # programlistingco; contains text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <programlistingco>"; + $self->{options}{'_default_placeholder'} .= " <programlistingco>"; + + # prompt; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <prompt>"; + $self->{options}{'_default_inline'} .= " <prompt>"; + + # property; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <property>"; + $self->{options}{'_default_inline'} .= " <property>"; + + # pubdate; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <pubdate>"; + $self->{options}{'_default_inline'} .= " <pubdate>"; + + # publisher; does not contain text; Formatted inline or as a displayed block + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <publisher>"; + $self->{options}{'_default_inline'} .= " <publisher>"; + + # publishername; contains text; Formatted inline or as a displayed block + $self->{options}{'_default_translated'} .= " <publishername>"; + $self->{options}{'_default_inline'} .= " <publishername>"; + + # QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ + + # qandadiv; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <qandadiv>"; + $self->{options}{'_default_break'} .= " <qandadiv>"; + + # qandaentry; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <qandaentry>"; + $self->{options}{'_default_break'} .= " <qandaentry>"; + + # qandaset; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <qandaset>"; + $self->{options}{'_default_break'} .= " <qandaset>"; + + # question; does not contain text; + $self->{options}{'_default_untranslated'} .= " <question>"; + $self->{options}{'_default_break'} .= " <question>"; + + # quote; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <quote>"; + $self->{options}{'_default_inline'} .= " <quote>"; + + # RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR + + # refclass; contains text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_translated'} .= " <refclass>"; + $self->{options}{'_default_break'} .= " <refclass>"; + + # refdescriptor; contains text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_translated'} .= " <refdescriptor>"; + $self->{options}{'_default_break'} .= " <refdescriptor>"; + + # refentry; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <refentry>"; + $self->{options}{'_default_break'} .= " <refentry>"; + + # refentryinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <refentryinfo>"; + $self->{options}{'_default_placeholder'} .= " <refentryinfo>"; + + # refentrytitle; contains text; Formatted as a displayed block + # FIXME: do not seems to be a block + $self->{options}{'_default_translated'} .= " <refentrytitle>"; + $self->{options}{'_default_inline'} .= " <refentrytitle>"; + + # reference; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <reference>"; + $self->{options}{'_default_break'} .= " <reference>"; + + # referenceinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <referenceinfo>"; + $self->{options}{'_default_placeholder'} .= " <referenceinfo>"; + + # refmeta; does not contains text; + # NOTE: could be in the inline class + $self->{options}{'_default_untranslated'} .= " <refmeta>"; + $self->{options}{'_default_break'} .= " <refmeta>"; + + # refmiscinfo; contains text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_translated'} .= " <refmiscinfo>"; + $self->{options}{'_default_break'} .= " <refmiscinfo>"; + + # refname; contains text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_translated'} .= " <refname>"; + $self->{options}{'_default_break'} .= " <refname>"; + + # refnamediv; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <refnamediv>"; + $self->{options}{'_default_break'} .= " <refnamediv>"; + + # refpurpose; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <refpurpose>"; + $self->{options}{'_default_inline'} .= " <refpurpose>"; + + # refsect1; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <refsect1>"; + $self->{options}{'_default_break'} .= " <refsect1>"; + + # refsect1info; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <refsect1info>"; + $self->{options}{'_default_placeholder'} .= " <refsect1info>"; + + # refsect2; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <refsect2>"; + $self->{options}{'_default_break'} .= " <refsect2>"; + + # refsect2info; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <refsect2info>"; + $self->{options}{'_default_placeholder'} .= " <refsect2info>"; + + # refsect3; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <refsect3>"; + $self->{options}{'_default_break'} .= " <refsect3>"; + + # refsect3info; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <refsect3info>"; + $self->{options}{'_default_placeholder'} .= " <refsect3info>"; + + # refsection; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <refsection>"; + $self->{options}{'_default_break'} .= " <refsection>"; + + # refsectioninfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <refsectioninfo>"; + $self->{options}{'_default_placeholder'} .= " <refsectioninfo>"; + + # refsynopsisdiv; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <refsynopsisdiv>"; + $self->{options}{'_default_break'} .= " <refsynopsisdiv>"; + + # refsynopsisdivinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <refsynopsisdivinfo>"; + $self->{options}{'_default_placeholder'} .= " <refsynopsisdivinfo>"; + + # releaseinfo; contains text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_translated'} .= " <releaseinfo>"; + $self->{options}{'_default_break'} .= " <releaseinfo>"; + + # remark; contains text; Formatted inline or as a displayed block + $self->{options}{'_default_translated'} .= " <remark>"; + $self->{options}{'_default_inline'} .= " <remark>"; + + # replaceable; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <replaceable>"; + $self->{options}{'_default_inline'} .= " <replaceable>"; + + # returnvalue; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <returnvalue>"; + $self->{options}{'_default_inline'} .= " <returnvalue>"; + + # revdescription; contains text; Formatted inline or as a displayed block + $self->{options}{'_default_translated'} .= " <revdescription>"; + $self->{options}{'_default_break'} .= " <revdescription>"; + + # revhistory; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <revhistory>"; + $self->{options}{'_default_break'} .= " <revhistory>"; + + # revision; does not contain text; + $self->{options}{'_default_untranslated'} .= " <revision>"; + $self->{options}{'_default_break'} .= " <revision>"; + + # revnumber; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <revnumber>"; + $self->{options}{'_default_inline'} .= " <revnumber>"; + + # revremark; contains text; Formatted inline or as a displayed block + $self->{options}{'_default_translated'} .= " <revremark>"; + $self->{options}{'_default_break'} .= " <revremark>"; + + # rhs; contains text; Formatted as a displayed block. + # NOTE: it might be better to have the production as verbatim + # Keeping the constrainst inline to have it close to the + # lhs or rhs. + $self->{options}{'_default_translated'} .= " <rhs>"; + $self->{options}{'_default_break'} .= " <rhs>"; + + # row; does not contain text; + $self->{options}{'_default_untranslated'} .= " <row>"; + $self->{options}{'_default_break'} .= " <row>"; + + # SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS + + # sbr; does not contain text; line break + $self->{options}{'_default_untranslated'} .= " <sbr>"; + $self->{options}{'_default_break'} .= " <sbr>"; + + # screen; contains text; verbatim + $self->{options}{'_default_translated'} .= " W<screen>"; + $self->{options}{'_default_placeholder'} .= " <screen>"; + + # screenco; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <screenco>"; + $self->{options}{'_default_placeholder'} .= " <screenco>"; + + # screeninfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <screeninfo>"; + $self->{options}{'_default_placeholder'} .= " <screeninfo>"; + + # screenshot; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <screenshot>"; + $self->{options}{'_default_placeholder'} .= " <screenshot>"; + + # secondary; contains text; + $self->{options}{'_default_translated'} .= " <secondary>"; + $self->{options}{'_default_break'} .= " <secondary>"; + + # secondaryie; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <secondaryie>"; + $self->{options}{'_default_break'} .= " <secondaryie>"; + + # sect1; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <sect1>"; + $self->{options}{'_default_break'} .= " <sect1>"; + + # sect1info; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <sect1info>"; + $self->{options}{'_default_placeholder'} .= " <sect1info>"; + + # sect2; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <sect2>"; + $self->{options}{'_default_break'} .= " <sect2>"; + + # sect2info; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <sect2info>"; + $self->{options}{'_default_placeholder'} .= " <sect2info>"; + + # sect3; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <sect3>"; + $self->{options}{'_default_break'} .= " <sect3>"; + + # sect3info; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <sect3info>"; + $self->{options}{'_default_placeholder'} .= " <sect3info>"; + + # sect4; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <sect4>"; + $self->{options}{'_default_break'} .= " <sect4>"; + + # sect4info; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <sect4info>"; + $self->{options}{'_default_placeholder'} .= " <sect4info>"; + + # sect5; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <sect5>"; + $self->{options}{'_default_break'} .= " <sect5>"; + + # sect5info; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <sect5info>"; + $self->{options}{'_default_placeholder'} .= " <sect5info>"; + + # section; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <section>"; + $self->{options}{'_default_break'} .= " <section>"; + + # sectioninfo; does not contain text; v3.1 -> v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <sectioninfo>"; + $self->{options}{'_default_placeholder'} .= " <sectioninfo>"; + + # see; contains text; + $self->{options}{'_default_translated'} .= " <see>"; + $self->{options}{'_default_break'} .= " <see>"; + + # seealso; contains text; + $self->{options}{'_default_translated'} .= " <seealso>"; + $self->{options}{'_default_break'} .= " <seealso>"; + + # seealsoie; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <seealsoie>"; + $self->{options}{'_default_break'} .= " <seealsoie>"; + + # seeie; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <seeie>"; + $self->{options}{'_default_break'} .= " <seeie>"; + + # seg; contains text; + $self->{options}{'_default_translated'} .= " <seg>"; + $self->{options}{'_default_break'} .= " <seg>"; + + # seglistitem; does not contain text; + $self->{options}{'_default_untranslated'} .= " <seglistitem>"; + $self->{options}{'_default_break'} .= " <seglistitem>"; + + # segmentedlist; does not contain text; + $self->{options}{'_default_untranslated'} .= " <segmentedlist>"; + $self->{options}{'_default_break'} .= " <segmentedlist>"; + + # segtitle; contains text; + $self->{options}{'_default_translated'} .= " <segtitle>"; + $self->{options}{'_default_break'} .= " <segtitle>"; + + # seriesinfo; does not contain text; + # Removed in v4.0 + $self->{options}{'_default_untranslated'} .= " <seriesinfo>"; + $self->{options}{'_default_placeholder'} .= " <seriesinfo>"; + + # seriesvolnums; contains text; Formatted inline + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <seriesvolnums>"; + $self->{options}{'_default_inline'} .= " <seriesvolnums>"; + + # set; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <set>"; + $self->{options}{'_default_break'} .= " <set>"; + + # setindex; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <setindex>"; + $self->{options}{'_default_break'} .= " <setindex>"; + + # setindexinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <setindexinfo>"; + $self->{options}{'_default_placeholder'} .= " <setindexinfo>"; + + # setinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <setinfo>"; + $self->{options}{'_default_placeholder'} .= " <setinfo>"; + + # sgmltag; contains text; Formatted inline; v4, not in v5 + $self->{options}{'_default_translated'} .= " <sgmltag>"; + $self->{options}{'_default_inline'} .= " <sgmltag>"; + + # shortaffil; contains text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_translated'} .= " <shortaffil>"; + $self->{options}{'_default_inline'} .= " <shortaffil>"; + + # shortcut; does not contain text; Formatted inline + $self->{options}{'_default_untranslated'} .= " <shortcut>"; + $self->{options}{'_default_inline'} .= " <shortcut>"; + + # sidebar; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <sidebar>"; + $self->{options}{'_default_break'} .= " <sidebar>"; + + # sidebarinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <sidebarinfo>"; + $self->{options}{'_default_placeholder'} .= " <sidebarinfo>"; + + # simpara; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <simpara>"; + $self->{options}{'_default_break'} .= " <simpara>"; + + # simplelist; does not contain text; + $self->{options}{'_default_untranslated'} .= " <simplelist>"; + $self->{options}{'_default_inline'} .= " <simplelist>"; + + # simplemsgentry; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <simplemsgentry>"; + $self->{options}{'_default_break'} .= " <simplemsgentry>"; + + # simplesect; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <simplesect>"; + $self->{options}{'_default_break'} .= " <simplesect>"; + + # spanspec; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <spanspec>"; + $self->{options}{'_default_break'} .= " <spanspec>"; + + # state; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <state>"; + $self->{options}{'_default_inline'} .= " <state>"; + + # step; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <step>"; + $self->{options}{'_default_break'} .= " <step>"; + + # stepalternatives; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <stepalternatives>"; + $self->{options}{'_default_break'} .= " <stepalternatives>"; + + # street; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <street>"; + $self->{options}{'_default_inline'} .= " <street>"; + + # structfield; contains text; Formatted inline; v4, not in v5 + $self->{options}{'_default_translated'} .= " <structfield>"; + $self->{options}{'_default_inline'} .= " <structfield>"; + + # structname; contains text; Formatted inline; v4, not in v5 + $self->{options}{'_default_translated'} .= " <structname>"; + $self->{options}{'_default_inline'} .= " <structname>"; + + # subject; does not contain text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_untranslated'} .= " <subject>"; + $self->{options}{'_default_break'} .= " <subject>"; + + # subjectset; does not contain text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_untranslated'} .= " <subjectset>"; + $self->{options}{'_default_break'} .= " <subjectset>"; + + # subjectterm; contains text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_translated'} .= " <subjectterm>"; + $self->{options}{'_default_break'} .= " <subjectterm>"; + + # subscript; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <subscript>"; + $self->{options}{'_default_inline'} .= " <subscript>"; + + # substeps; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <substeps>"; + $self->{options}{'_default_break'} .= " <substeps>"; + + # subtitle; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <subtitle>"; + $self->{options}{'_default_break'} .= " <subtitle>"; + + # superscript; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <superscript>"; + $self->{options}{'_default_inline'} .= " <superscript>"; + + # surname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <surname>"; + $self->{options}{'_default_inline'} .= " <surname>"; + + #svg:svg + + # symbol; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <symbol>"; + $self->{options}{'_default_inline'} .= " <symbol>"; + + # synopfragment; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <synopfragment>"; + $self->{options}{'_default_placeholder'} .= " <synopfragment>"; + + # synopfragmentref; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <synopfragmentref>"; + $self->{options}{'_default_inline'} .= " <synopfragmentref>"; + + # synopsis; contains text; verbatim + $self->{options}{'_default_translated'} .= " W<synopsis>"; + $self->{options}{'_default_placeholder'} .= " <synopsis>"; + + # systemitem; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <systemitem>"; + $self->{options}{'_default_inline'} .= " <systemitem>"; + + # TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT + + # table; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <table>"; + $self->{options}{'_default_placeholder'} .= " <table>"; + + # tag; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <tag>"; + $self->{options}{'_default_inline'} .= " <tag>"; + + # task; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <task>"; + $self->{options}{'_default_placeholder'} .= " <task>"; + + # taskprerequisites; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <taskprerequisites>"; + $self->{options}{'_default_break'} .= " <taskprerequisites>"; + + # taskrelated; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <taskrelated>"; + $self->{options}{'_default_break'} .= " <taskrelated>"; + + # tasksummary; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <tasksummary>"; + $self->{options}{'_default_break'} .= " <tasksummary>"; + + # tbody; does not contain text; + $self->{options}{'_default_untranslated'} .= " <tbody>"; + $self->{options}{'_default_break'} .= " <tbody>"; + + # td; contains text; + $self->{options}{'_default_translated'} .= " <td>"; + $self->{options}{'_default_break'} .= " <td>"; + + # term; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <term>"; + $self->{options}{'_default_break'} .= " <term>"; + + # termdef; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <termdef>"; + $self->{options}{'_default_inline'} .= " <termdef>"; + + # tertiary; contains text; Suppressed + $self->{options}{'_default_translated'} .= " <tertiary>"; + $self->{options}{'_default_placeholder'} .= " <tertiary>"; + + # tertiaryie; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <tertiaryie>"; + $self->{options}{'_default_break'} .= " <tertiaryie>"; + + # textdata; does not contain text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_untranslated'} .= " <textdata>"; + $self->{options}{'_default_break'} .= " <textdata>"; + $self->{options}{'_default_attributes'} .= ' <textdata>fileref'; + + # textobject; does not contain text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_untranslated'} .= " <textobject>"; + $self->{options}{'_default_break'} .= " <textobject>"; + + # tfoot; does not contain text; + $self->{options}{'_default_untranslated'} .= " <tfoot>"; + $self->{options}{'_default_break'} .= " <tfoot>"; + + # tgroup; does not contain text; + $self->{options}{'_default_untranslated'} .= " <tgroup>"; + $self->{options}{'_default_break'} .= " <tgroup>"; + + # th; contains text; + $self->{options}{'_default_translated'} .= " <th>"; + $self->{options}{'_default_break'} .= " <th>"; + + # thead; does not contain text; + $self->{options}{'_default_untranslated'} .= " <thead>"; + $self->{options}{'_default_break'} .= " <thead>"; + + # tip; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <tip>"; + $self->{options}{'_default_break'} .= " <tip>"; + + # title; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <title>"; + $self->{options}{'_default_break'} .= " <title>"; + + # titleabbrev; contains text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_translated'} .= " <titleabbrev>"; + $self->{options}{'_default_break'} .= " <titleabbrev>"; + + # toc; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <toc>"; + $self->{options}{'_default_break'} .= " <toc>"; + + # tocback; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <tocback>"; + $self->{options}{'_default_break'} .= " <tocback>"; + + # tocchap; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <tocchap>"; + $self->{options}{'_default_break'} .= " <tocchap>"; + + # tocdiv; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <tocdiv>"; + $self->{options}{'_default_break'} .= " <tocdiv>"; + + # tocentry; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <tocentry>"; + $self->{options}{'_default_break'} .= " <tocentry>"; + + # tocfront; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <tocfront>"; + $self->{options}{'_default_break'} .= " <tocfront>"; + + # toclevel1; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <toclevel1>"; + $self->{options}{'_default_break'} .= " <toclevel1>"; + + # toclevel2; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <toclevel2>"; + $self->{options}{'_default_break'} .= " <toclevel2>"; + + # toclevel3; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <toclevel3>"; + $self->{options}{'_default_break'} .= " <toclevel3>"; + + # toclevel4; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <toclevel4>"; + $self->{options}{'_default_break'} .= " <toclevel4>"; + + # toclevel5; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <toclevel5>"; + $self->{options}{'_default_break'} .= " <toclevel5>"; + + # tocpart; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <tocpart>"; + $self->{options}{'_default_break'} .= " <tocpart>"; + + # token; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <token>"; + $self->{options}{'_default_inline'} .= " <token>"; + + # tr; does not contain text; + $self->{options}{'_default_untranslated'} .= " <tr>"; + $self->{options}{'_default_break'} .= " <tr>"; + + # trademark; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <trademark>"; + $self->{options}{'_default_inline'} .= " <trademark>"; + + # type; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <type>"; + $self->{options}{'_default_inline'} .= " <type>"; + + # UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU + + # ulink; contains text; Formatted inline; v4, not in v5 + $self->{options}{'_default_translated'} .= " <ulink>"; + $self->{options}{'_default_inline'} .= " <ulink>"; + + # uri; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <uri>"; + $self->{options}{'_default_inline'} .= " <uri>"; + + # userinput; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <userinput>"; + $self->{options}{'_default_inline'} .= " <userinput>"; + + # VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV + + # varargs; empty element; + $self->{options}{'_default_untranslated'} .= " <varargs>"; + $self->{options}{'_default_inline'} .= " <varargs>"; + + # variablelist; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <variablelist>"; + $self->{options}{'_default_placeholder'} .= " <variablelist>"; + + # varlistentry; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <varlistentry>"; + $self->{options}{'_default_break'} .= " <varlistentry>"; + + # varname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <varname>"; + $self->{options}{'_default_inline'} .= " <varname>"; + + # videodata; contains text; Formatted inline or as a displayed block + $self->{options}{'_default_untranslated'} .= " <videodata>"; + $self->{options}{'_default_break'} .= " <videodata>"; + $self->{options}{'_default_attributes'} .= ' <videodata>fileref'; + + # videoobject; contains text; Formatted inline or as a displayed block + $self->{options}{'_default_untranslated'} .= " <videoobject>"; + $self->{options}{'_default_break'} .= " <videoobject>"; + + # void; empty element; + $self->{options}{'_default_untranslated'} .= " <void>"; + $self->{options}{'_default_inline'} .= " <void>"; + + # volumenum; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <volumenum>"; + $self->{options}{'_default_inline'} .= " <volumenum>"; + + # WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW + + # warning; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <warning>"; + $self->{options}{'_default_break'} .= " <warning>"; + + # wordasword; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <wordasword>"; + $self->{options}{'_default_inline'} .= " <wordasword>"; + + # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + + # xref; empty element; + $self->{options}{'_default_untranslated'} .= " <xref>"; + $self->{options}{'_default_inline'} .= " <xref>"; + + # YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + + # year; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <year>"; + $self->{options}{'_default_inline'} .= " <year>"; + + # xreflabel is the label to use when referencing an element that has no title + $self->{options}{'_default_attributes'} .= " xreflabel"; + + # ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ + + $self->{options}{'_default_attributes'} .= ' + lang + xml:lang'; + + print "Call treat_options\n" + if $self->{options}{'debug'}; + $self->treat_options; +} diff --git a/lib/Locale/Po4a/Gemtext.pm b/lib/Locale/Po4a/Gemtext.pm new file mode 100644 index 0000000..7be810f --- /dev/null +++ b/lib/Locale/Po4a/Gemtext.pm @@ -0,0 +1,193 @@ +#!/usr/bin/env perl -w + +# Po4a::Gemtext.pm +# +# extract and translate translatable strings from a Gemtext documents +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +require Exporter; + +package Locale::Po4a::Gemtext; + +use 5.006; +use strict; +use warnings; + +use vars qw(@ISA @EXPORT @AUTOLOAD); +@ISA = qw(Locale::Po4a::TransTractor); +@EXPORT = qw(); + +use Locale::Po4a::TransTractor qw(process new); +use Locale::Po4a::Common; + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Gemtext - convert Gettext documents from/to PO files. + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::Gemtext is a module to help the translation of Gemtext documents into +other [human] languages. + +=cut + +sub initialize { } + +sub parse { + my $self = shift; + + my ( $line, $ref ) = $self->shiftline(); + + while ( defined($line) ) { + chomp($line); + + $self->parse_heading( $line, $ref ) + or $self->parse_preformatted_text( $line, $ref ) + or $self->parse_list( $line, $ref ) + or $self->parse_quote( $line, $ref ) + or $self->parse_link( $line, $ref ) + or $self->pushline( $self->translate( $line, $ref, "paragraph" ) . "\n" ); + + ( $line, $ref ) = $self->shiftline(); + } +} + +sub parse_heading() { + my $self = shift; + my $line = shift; + my $ref = shift; + + $line =~ m/^(#{1,3}) *(.+)/ or return; + my $level = $1; + my $content = $2; + + $self->pushline( "$level " . $self->translate( $content, $ref, "heading $level" ) . "\n" ); + + return 1; +} + +sub parse_preformatted_text() { + my $self = shift; + my $line = shift; + my $ref = shift; + + $line =~ m/^(``` *)(.*)/ or return; + my $prefix = $1; + my $content = $2; + + my $toggle_line = $prefix; + $toggle_line .= $self->translate( $content, $ref, "alt text" ) if $content; + $self->pushline("$toggle_line\n"); + + my $paragraph; + ( $line, $ref ) = $self->shiftline(); + + while ( defined($line) ) { + chomp($line); + + if ( $line =~ m/^```/ ) { + $self->pushline( $self->translate( $paragraph, $ref, "preformatted text" ) . "\n" ); + $self->pushline("$line\n"); + + return 1; + } + + if ($paragraph) { + $paragraph .= "\n$line"; + } else { + $paragraph = $line; + } + + ( $line, $ref ) = $self->shiftline(); + } +} + +sub parse_list() { + my $self = shift; + my $line = shift; + my $ref = shift; + + $line =~ m/^\* (.+)/ or return; + my $content = $1; + + $self->pushline( "* " . $self->translate( $content, $ref, "list" ) . "\n" ); + + return 1; +} + +sub parse_quote() { + my $self = shift; + my $line = shift; + my $ref = shift; + + $line =~ m/^(> *)(.+)/ or return; + my $prefix = $1; + my $content = $2; + + $self->pushline( $prefix . $self->translate( $content, $ref, "quote" ) . "\n" ); + + return 1; +} + +sub parse_link() { + my $self = shift; + my $line = shift; + my $ref = shift; + + $line =~ m/^(=>[ \t]+[^ \t]+)(?:([ \t]+)(.*))?/ or return; + my $prefix = $1; + my $separator = $2; + my $content = $3; + + my $result = $prefix; + $result .= $separator . $self->translate( $content, $ref, "link" ) + if $content; + $self->pushline( $result . "\n" ); + + return 1; +} + +1; + +=head1 STATUS OF THIS MODULE + +Tested successfully on simple Gemtext files, such as the official Gemtext documentation. + +=head1 SEE ALSO + +L<Locale::Po4a::TransTractor(3pm)>, L<po4a(7)|po4a.7> + +=head1 AUTHORS + + gemmaro <gemmaro.dev@gmail.com> + +=head1 COPYRIGHT AND LICENSE + + Copyright © 2024 gemmaro <gemmaro.dev@gmail.com>. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut diff --git a/lib/Locale/Po4a/Guide.pm b/lib/Locale/Po4a/Guide.pm new file mode 100644 index 0000000..c3b3a83 --- /dev/null +++ b/lib/Locale/Po4a/Guide.pm @@ -0,0 +1,153 @@ +#!/usr/bin/perl + +# Po4a::Guide.pm +# +# extract and translate translatable strings from Guide XML documents. +# +# This code extracts plain text from tags and attributes on Guide XML +# documents. +# +# Copyright © 2004 Jordi Vilalta <jvprat@gmail.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Guide - convert Guide XML documents from/to PO files + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::Guide is a module to help in the translation of the Gentoo +Linux documentation in the Guide XML format into other [human] languages. + +This format is documented here: http://www.gentoo.org/doc/en/xml-guide.xml + +=head1 STATUS OF THIS MODULE + +This module is fully functional, as it relies in the L<Locale::Po4a::Xml> +module. This only defines the translatable tags and attributes. + +The only known issue is that it doesn't include files with the <include +href="..."> tag, but you can translate all those files alone, and it's usually +better to have them separated. + +=head1 SEE ALSO + +L<Locale::Po4a::TransTractor(3pm)>, L<Locale::Po4a::Xml(3pm)>, L<po4a(7)|po4a.7> + +=head1 AUTHORS + + Jordi Vilalta <jvprat@gmail.com> + +=head1 COPYRIGHT AND LICENSE + +Copyright © 2004 Jordi Vilalta <jvprat@gmail.com> + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut + +package Locale::Po4a::Guide; + +use 5.16.0; +use strict; +use warnings; + +use Locale::Po4a::Common; +use Locale::Po4a::Xml; + +use vars qw(@ISA); +@ISA = qw(Locale::Po4a::Xml); + +sub initialize { + my $self = shift; + my %options = @_; + + #TODO: <include href="..."> includes a file + $self->SUPER::initialize(%options); + $self->{options}{'_default_translated'} .= ' + w<abstract> + <author> + <b> + <brite> + <c> + <codenote> + <comment> + <const> + <date> + w<dd> + w<dt> + <e> + <i> + <ident> + w<impo> + <keyword> + w<li> + <mail> + w<note> + w<p> + <path> + W<pre> + <stmt> + <sub> + w<subtitle> + w<summary> + <sup> + w<th> + w<ti> + w<title> + <uri> + <var> + <version> + w<warn>'; + $self->{options}{'_default_attributes'} .= ' + <author>title + <figure>caption + <figure>link + <figure>short + <guide>lang + <guide>link + <p>by + <pre>caption'; + $self->{options}{'_default_inline'} .= ' + <b> + <brite> + <c> + <const> + <e> + <i> + <ident> + <img> + <keyword> + <mail> + <path> + <stmt> + <sub> + <sup> + <uri> + <var>'; + print "Call treat_options\n" if $self->{options}{'debug'}; + $self->treat_options; +} diff --git a/lib/Locale/Po4a/Halibut.pm b/lib/Locale/Po4a/Halibut.pm new file mode 100644 index 0000000..0e6926e --- /dev/null +++ b/lib/Locale/Po4a/Halibut.pm @@ -0,0 +1,448 @@ +#!/usr/bin/perl -w + +# Copyright © 2004-2008 Nicolas FRANÇOIS <nicolas.francois@centraliens.net> +# +# This file is part of po4a. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with po4a; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Halibut - convert Halibut documents and derivates from/to PO files + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::Halibut is a module to help the translation of Halibut documents into +other [human] languages. + +This module contains the definitions of common Halibut commands and +environments. + +=head1 STATUS OF THIS MODULE + +This module is still beta. +Please send feedback and feature requests. + +=head1 CAVEAT + +Some constructs are badly supported. The known ones are documented below. + +=head2 Verbatim blocks + + \c foo + \c bar + +The verbatim block is not considered as a whole. Each line will be +translated separately. + +=head1 SEE ALSO + +L<Locale::Po4a::TeX(3pm)|Locale::Po4a::TeX>, +L<Locale::Po4a::TransTractor(3pm)|Locale::Po4a::TransTractor>, +L<po4a(7)|po4a.7> + +=head1 AUTHORS + + Nicolas François <nicolas.francois@centraliens.net> + +=head1 COPYRIGHT AND LICENSE + +Copyright © 2004-2008 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut + +package Locale::Po4a::Halibut; + +use 5.16.0; +use strict; +use warnings; + +require Exporter; +use vars qw($VERSION @ISA @EXPORT); +$VERSION = $Locale::Po4a::TeX::VERSION; +@ISA = qw(Locale::Po4a::TeX); +@EXPORT = qw(); + +use Locale::Po4a::Common; +use Locale::Po4a::TeX; +use subs qw(&parse_definition_file + ®ister_generic_command &is_closed &translate_buffer + ®ister_verbatim_environment + &generic_command + &in_verbatim + &get_leading_command); +*parse_definition_file = \&Locale::Po4a::TeX::parse_definition_file; +*get_leading_command = \&Locale::Po4a::TeX::get_leading_command; +*register_generic_command = \&Locale::Po4a::TeX::register_generic_command; +*register_verbatim_environment = \&Locale::Po4a::TeX::register_verbatim_environment; +*generic_command = \&Locale::Po4a::TeX::generic_command; +*is_closed = \&Locale::Po4a::TeX::is_closed; +*in_verbatim = \&Locale::Po4a::TeX::in_verbatim; +*translate_buffer = \&Locale::Po4a::TeX::translate_buffer; +use vars qw($RE_ESCAPE $ESCAPE + $RE_VERBATIM + $RE_COMMENT $RE_PRE_COMMENT + $no_wrap_environments $separated_commands + %commands %environments + %command_categories %separated + %env_separators %debug + %translate_buffer_env + @exclude_include @comments); +*RE_ESCAPE = \$Locale::Po4a::TeX::RE_ESCAPE; +*ESCAPE = \$Locale::Po4a::TeX::ESCAPE; +*RE_VERBATIM = \$Locale::Po4a::TeX::RE_VERBATIM; +*RE_COMMENT = \$Locale::Po4a::TeX::RE_COMMENT; +*RE_PRE_COMMENT = \$Locale::Po4a::TeX::RE_PRE_COMMENT; +*no_wrap_environments = \$Locale::Po4a::TeX::no_wrap_environments; +*separated_commands = \$Locale::Po4a::TeX::separated_commands; +*commands = \%Locale::Po4a::TeX::commands; +*environments = \%Locale::Po4a::TeX::environments; +*command_categories = \%Locale::Po4a::TeX::command_categories; +*separated = \%Locale::Po4a::TeX::separated; +*env_separators = \%Locale::Po4a::TeX::env_separators; +*debug = \%Locale::Po4a::TeX::debug; +*translate_buffer_env = \%Locale::Po4a::TeX::translate_buffer_env; +*exclude_include = \@Locale::Po4a::TeX::exclude_include; +*comments = \@Locale::Po4a::TeX::comments; + +#$ESCAPE = "\\"; +#$RE_ESCAPE = "\\\\"; +#$RE_VERBATIM = "\@example"; +$RE_VERBATIM = "PO4A_FAKE_VERBATIM"; + +#$RE_COMMENT = "\\\@(?:c|comment)\\b"; +$RE_COMMENT = "PO4A_FAKE_COMMENT"; + +sub docheader { + return "\\# This file was generated with po4a. Translate the source file.\n" . "\n"; +} + +my %break_line = (); + +# translate_line_command indicate if the arguments to the command handled +# by line_command() should be translated: +# undefined: arguments are not translated +# 0: there should be no arguments +# 1: arguments should be translated +my %translate_line_command = (); + +sub parse { + my $self = shift; + my ( $line, $ref ); + my $paragraph = ""; # Buffer where we put the paragraph while building + my @env = (); # environment stack + my $t = ""; + + # $docheader_pushed = 0; + + LINE: + undef $self->{type}; + ( $line, $ref ) = $self->shiftline(); + + while ( defined($line) ) { + chomp($line); + $self->{ref} = "$ref"; + + if ( $line =~ /^\s*\\\s*po4a\s*:/ ) { + parse_definition_line( $self, $line ); + goto LINE; + } + + my $t; + ( $paragraph, $t, @env ) = parse_line( $self, $line, $paragraph, \@env ); + $self->pushline($t); + + # Reinit the loop + ( $line, $ref ) = $self->shiftline(); + undef $self->{type}; + } + + if ( length($paragraph) ) { + ( $t, @env ) = translate_buffer( $self, $paragraph, undef, @env ); + $self->pushline($t); + $paragraph = ""; + } +} # end of parse + +sub parse_line { + my $self = shift; + my $line = shift; + my $paragraph = shift; + my $env = shift; + my @e = @$env; + my $translated = ""; + + my $closed = 1; + if ( !in_verbatim(@e) ) { + $closed = is_closed($paragraph); + } + + # if (not $closed) { + # print "not closed. line: '$line'\n para: '$paragraph'\n"; + # } + + #warn "closed'$closed'$line'$paragraph'\n"; + if ( $closed and $line =~ /^\s*$/ ) { + + # An empty line. This indicates the end of the current + # paragraph. + $paragraph .= $line . "\n"; + if ( length($paragraph) ) { + ( $translated, @e ) = translate_buffer( $self, $paragraph, undef, @e ); + $paragraph = ""; + } + } elsif ( $line =~ m/^\\input / ) { + if ( length($paragraph) ) { + ( $translated, @e ) = translate_buffer( $self, $paragraph, undef, @e ); + $paragraph = ""; + } + $translated .= $line . "\n"; + } elsif ( $line =~ m/^$RE_COMMENT/ ) { + $translated = $line . "\n"; + } elsif ( $closed + and ( is_closed($line) or $line =~ /^\\[ce] / ) + and ( $line =~ /^\\([^ ]*?)( +.*)?$/ ) ) + { + my ( $command, $variant, $args, $buffer ); + if ( $break_line{$1} ) { + my @a = (); + $variant = ""; + $args = \@a; + $command = $1; + $buffer = $2 || ""; + } else { + ( $command, $variant, $args, $buffer ) = get_leading_command( $self, $line ); + } + if ( + $break_line{$command} + and not( ( $command eq "c" or $command eq "e" ) + and defined $args->[0] ) + ) + { + # NOTE: This is just a workaround: "\c " is a verbatim line + # and \c{...} is just a verbatim block + my $t; + if ( length($paragraph) ) { + ( $t, @e ) = translate_buffer( $self, $paragraph, undef, @e ); + $translated .= $t; + $paragraph = ""; + } + ( $t, @e ) = generic_command( $self, $command, $variant, $args, \@e ); + $translated .= $t; + + my $arg = $buffer; + my @args = (); + if ( defined $arg and length $arg ) { + + # FIXME: keep the spaces ? + $arg =~ s/\s*$//s; + @args = ( " ", $arg ); + } + ( $t, @e ) = line_command( $self, $command, "", \@args, \@e, 1 ); + $translated .= $t . "\n"; + } else { + + # continue the same paragraph + $paragraph .= $line . "\n"; + } + } else { + + # continue the same paragraph + $paragraph .= $line . "\n"; + } + + return ( $paragraph, $translated, @e ); +} + +sub line_command { + my $self = shift; + my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift ); + my $no_wrap = shift; + print "line_command($command,$variant,@$args,@$env,$no_wrap)=" + if ( $debug{'commands'} ); + + my $translated = ""; # $ESCAPE.$command; + my $line = $args->[1]; + + #warn "line_command: '$line'\n"; + if ( defined $line and length $line ) { + if ( defined $translate_line_command{$command} + and $translate_line_command{$command} ) + { + # $no_wrap could be forced to 1, but it should already be set + $no_wrap = 1; + $line =~ s/^(\s*)//; + my $spaces = $1 || ""; + my ( $t, $e ) = $self->translate_buffer( $line, $no_wrap, @$env, $command ); + + #warn "line_command: '$t'\n"; + $translated .= $spaces . $t; + } else { + $translated .= $line; + } + } + print "($translated,@$env)\n" + if ( $debug{'commands'} ); + return ( $translated, @$env ); +} + +# 3.2 Simple inline formatting commands +# 3.2.1 `\e': Emphasising text +# inline. extract only if alone +register_generic_command("-e,{_}"); +$translate_line_command{e} = 1; +$break_line{e} = 1; + +# 3.2.2 `\c' and `\cw': Displaying computer code inline +# inline. extract only if alone +# NOTE: \c and \c{...} differs. +# \c is marked as a break_line command, but this is reversed in +# parse_line when the \c{...} form is used. +register_generic_command("-c,{_}"); +$translate_line_command{c} = 1; +$break_line{c} = 1; +register_generic_command("-cw,{_}"); + +# 3.2.3 `\q': Quotation marks +# inline. extract only if alone +register_generic_command("-q,{_}"); + +# 3.2.4 `\-' and `\_': Non-breaking hyphens and spaces +# inline. + +# 3.2.5 `\date': Automatic date generation +# inline. + +# 3.2.6 `\W': WWW hyperlinks +# inline. extract only if alone +register_generic_command("-W,{_}"); + +# 3.2.7 `\u': Specifying arbitrary Unicode characters +# inline. + +# 3.2.8 `\k' and `\K': Cross-references to other sections +# inline. They should not be translated. extract only if alone +# FIXME: it will expand to "Section ..." or "section ..." +# Section and section should be translated. +register_generic_command("-k,{}"); +register_generic_command("-K,{}"); + +# 3.2.9 `\#': Inline comments +# inline. But can be removed from the head or tail. +register_generic_command("-#,{}"); +$translate_line_command{"#"} = 0; +$break_line{"#"} = 1; + +# 3.3 Paragraph-level commands +# 3.3.1 `\c': Displaying whole paragraphs of computer code +# see above +# 3.3.2 `\b', `\n', `\dt', `\dd', `\lcont': Lists +register_generic_command("*b,"); +register_generic_command("*n,"); # FIXME: \n{this-one} not supported? +register_generic_command("*dd,"); +register_generic_command("*dt,"); + +# 3.3.2.4 Continuing list items into further paragraphs +register_generic_command("*lcont,{_}"); # registered, but redefined +$commands{lcont} = sub { + my $self = shift; + my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift ); + my $no_wrap = shift; + my ( $t, @e ) = ( "", @$env ); + my $translated = $ESCAPE . $command . $variant . "{"; + my $text = $args->[1]; + my $paragraph = ""; + while ($text =~ s/^(.*?)\n(.*)$/$2/s + or $text =~ s/^([^\n]+)$//s ) + { + ( $paragraph, $t, @e ) = parse_line( $self, $1, $paragraph, \@e ); + $translated .= $t; + } + ( $t, @e ) = translate_buffer( $self, $paragraph, $no_wrap, @e ); + $translated .= $t; + $translated .= "}"; + + return ( $translated, @$env ); +}; + +# 3.3.3 `\rule': Horizontal rules +register_generic_command("rule,"); # TODO: TBC does it break paragraphs + +# 3.3.4 `\quote': Indenting multiple paragraphs as a long quotation +register_generic_command("*quote,{_}"); # TODO: TBC + +# 3.3.5 `\C', `\H', `\S', `\A', `\U': Chapter and section headings +# FIXME: What happens if the line is rewrapped? +# NOTE: The name of the section is not translated. +register_generic_command("*C,{}"); +register_generic_command("*S0,{}"); # Synonym for \H +register_generic_command("*H,{}"); +register_generic_command("*S,{}"); +register_generic_command("*S1,{}"); # Synonym for \S +register_generic_command("*S2,{}"); +register_generic_command("*S3,{}"); # FIXME: and so on + +# FIXME: \S{question-about-fish}{Question} +register_generic_command("*A,{}"); +register_generic_command("*U,{}"); + +# 3.3.6 `\copyright', `\title', `\versionid': Miscellaneous blurb commands +register_generic_command("*title,"); +register_generic_command("*copyright,"); +register_generic_command("*versionid,"); + +# 3.4 Creating a bibliography +# nocite +register_generic_command("*nocite,{}"); + +# B +register_generic_command("*B,{}"); + +# BR +register_generic_command("*BR,{}"); # FIXME: \BR{freds-book} [Fred1993] + +# 3.5 Creating an index +# 3.5.1 Simple indexing +# \i: inline \i{index} or \i\x{grep} +# \ii +register_generic_command("-ii,{_}"); + +# \IM: inline. Variable number of arguments +register_generic_command("*IM,{_}"); +$translate_line_command{IM} = 1; +$break_line{IM} = 1; + +# 3.6 Configuring Halibut +# \cfg +register_generic_command("+cfg,{}{_}"); # NOTE: the new command is not registered + +# 3.7 Defining macros +register_generic_command("*define,{}"); # FIXME: line +$translate_line_command{define} = 1; +$break_line{define} = 1; + +1; diff --git a/lib/Locale/Po4a/InProgress/Debconf.pm b/lib/Locale/Po4a/InProgress/Debconf.pm new file mode 100644 index 0000000..ab5d7c2 --- /dev/null +++ b/lib/Locale/Po4a/InProgress/Debconf.pm @@ -0,0 +1,226 @@ +#!/usr/bin/perl -w + +# Po4a::Debconf.pm +# +# extract and translate translatable strings from debconf templates +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Debconf - convert debconf templates from/to PO files + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::Debconf is a module to help the translation of the debconf +templates into other [human] languages. + +=head1 OPTIONS ACCEPTED BY THIS MODULE + +NONE. + +=head1 STATUS OF THIS MODULE + +Not tested. + + +DO NOT USE THIS MODULE TO PRODUCE TEMPLATES. It's only good to extract data. + + +=cut + +# Note that the following works. It may help to write a multi-translate + +# sub toto { +# do shift; +# } +# toto({print "ok"}); + +package Locale::Po4a::Debconf; + +use 5.16.0; +use strict; +use warnings; + +require Exporter; +use vars qw(@ISA @EXPORT); +@ISA = qw(Locale::Po4a::TransTractor); +@EXPORT = qw(); + +use Locale::Po4a::TransTractor; +use Locale::Po4a::Common; + +sub initialize { } + +sub parse { + my $self = shift; + + my ( $line, $lref ); + + my ( $field, $value, $extended, $ref, $type ) = ( '', '', '', '', '' ); + my $verb = 0; # whether we are in verbatim mode + + my $escape = sub { + my $str = shift; + $str =~ s/"/\\"/g; + return $str; + }; + + # function in charge of pushing the accumulated material to output + my $handle_field = sub { + my $field = shift; + my $value = shift; + my $extended = shift; + my $ref = shift; + my $type = shift; + + $field =~ s/^(_*)(.*)$/$2/; + my $undercount = length($1) || 0; # number of _ leading the field name + + # Only one leading _: regular translated field + if ( $undercount == 1 ) { + + # the untranslated field + $self->pushline("$field: $value"); + map { $self->pushline( ' ' . ( $_ || '.' ) ) } split( /\n/, $extended ); + + my $eval = '$self->pushline("' . $field . '[FIXME:LANGCODE.ENCODING]: "'; # what to multi-eval + $eval .= + '.$self->translate("' . $escape->($value) . "\",\"$ref\",\"$type/$field\",wrap=>1)" . '."\n".' . "\n"; + + my $count = 0; + foreach my $para ( split( /\n\n/, $extended ) ) { + my $wrap = 1; + if ( $para =~ /(^|\n)\s/m ) { + $wrap = 0; + } + $eval .= ( $count ? '.' : '' ); + $count++; + $eval .= + '$self->translate("' + . $escape->($para) + . "\",\"$ref\",\"$type/$field\[$count\]\",wrap=>$wrap)" . "\n"; + } + + $eval .= ")\n"; + print STDERR $eval if $self->{options}{'debug'}; + eval $eval; + print STDERR "XXXXXXXXXXXXXXXXX\n" if $self->{options}{'debug'}; + + # two leading _: split on coma and multi-translate each part. No extended value. + } elsif ( $undercount == 2 ) { + $self->pushline("$field: $value"); # the untranslated field + + my $eval = '$self->pushline("' . $field . 'FIXME[LANGCODE]: "'; # what to multi-eval + + my $first = 1; + for my $part ( split( /(?<!\\), */, $value, 0 ) ) { + $part =~ s/\\,/,/g; + $eval .= + ( $first ? '' : '.", "' ) + . '.$self->translate("' + . $escape->($part) + . "\",\"$ref\",\"$type/$field chunk\",wrap=>1)"; + $first = 0; + } + $eval .= ")\n"; + + print $eval if $self->{options}{'debug'}; + eval $eval; + + # no leading _: don't touch it + } else { + $self->pushline("$field: $value"); + map { $self->pushline( ' ' . ( $_ || '.' ) ) } split( /\n/, $extended ); + } + }; + + # main loop + ( $line, $lref ) = $self->shiftline(); + + while ( defined($line) ) { + + # a new field (within a stanza) + if ( $line =~ /^([-_.A-Za-z0-9]*):\s?(.*)/ ) { + + $handle_field->( $field, $value, $extended, $ref, $type ); # deal with previously accumulated + ( $field, $value, $extended, $verb ) = ( '', '', '', 0 ); + + $field = $1; + $value = $2; + $value =~ s/\s*$//; + $extended = ''; + $ref = $lref; + + $type = $value if $field eq 'Type'; + + die wrap_mod( "po4a::debconf", dgettext( "po4a", "Translated field in master document: %s" ), $field ) + if $field =~ m/-/; + + # paragraph separator within extended value + } elsif ( $line =~ /^\s\.$/ ) { + $extended .= "\n\n"; + + # continuation of extended value + } elsif ( $line =~ /^\s(.*)/ ) { + + my $bit = $1; + $verb = 1 if ( $bit =~ m/^\s/ ); + + $bit =~ s/\s*$//; + + $extended .= ( $verb ? "\n" : ' ' ) if length $extended && $extended !~ /[\n ]$/; + $extended .= $bit . ( $verb ? "\n" : "" ); + + # this may be an empty line closing the stanza, a comment or even a parse error (if file not DebConf-clean). + } else { + + $handle_field->( $field, $value, $extended, $ref, $type ); + ( $field, $value, $extended, $verb ) = ( '', '', '', 0 ); + + $self->pushline($line); + + } + + ( $line, $lref ) = $self->shiftline(); + } + + $handle_field->( $field, $value, $extended, $ref, $type ); +} + +1; + +=head1 AUTHORS + +This module is loosely inspired from both po-debconf and debconf code. The +adaptation for po4a was done by: + + Martin Quinson (mquinson#debian.org) + +=head1 COPYRIGHT AND LICENSE + + Copyright © 2005 SPI, Inc. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). diff --git a/lib/Locale/Po4a/InProgress/NewsDebian.pm b/lib/Locale/Po4a/InProgress/NewsDebian.pm new file mode 100644 index 0000000..167aa1b --- /dev/null +++ b/lib/Locale/Po4a/InProgress/NewsDebian.pm @@ -0,0 +1,155 @@ +#!/usr/bin/perl -w + +# Po4a::NewsDebian.pm +# +# extract and translate translatable strings from a NEWS.Debian documents +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::NewsDebian - convert NEWS.Debian documents from/to PO files + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::NewsDebian is a module to help the translation of the +NEWS.Debian files into other [human] languages. Those files are where +maintainer are supposed to write the important news about their package. + +=head1 OPTIONS ACCEPTED BY THIS MODULE + +NONE. + +=head1 STATUS OF THIS MODULE + +Not tested. + +A finer split of the entries may be preferable (search for /^ */, for +example), but this version is more robust and NEWS.Debian entries are not +supposed to change that often. + +=cut + +package Locale::Po4a::NewsDebian; + +use 5.16.0; +use strict; +use warnings; + +require Exporter; +use vars qw(@ISA @EXPORT); +@ISA = qw(Locale::Po4a::TransTractor); +@EXPORT = qw(); + +use Locale::Po4a::TransTractor; +use Locale::Po4a::Common; + +sub initialize { } + +sub parse { + my $self = shift; + + my ($blanklines) = (""); # We want to preserve the blank lines inside the entry, and strip the extrem ones + + my ($body) = ""; # the accumulated paragraph + my ($bodyref) = ""; + my ($bodytype) = ""; + + my ( $line, $lref ); + + # main loop + ( $line, $lref ) = $self->shiftline(); + print "seen >>$line<<\n" if $self->{options}{'debug'}; + while ( defined($line) ) { + + # Begining of an entry + if ( $line =~ m/^(\w[-+0-9a-z.]*) \(([^\(\) \t]+)\)((\s+[-0-9a-z]+)+)\;/i ) { + + die wrap_ref_mod( $lref, "po4a::newsdebian", + dgettext( "po4a", "Begin of a new entry before the end of previous one" ) ) + if ( length($body) ); + + $self->pushline( $line . "\n" ); + + # Signature of this entry + $bodyref = $lref; + $bodytype = $line; + + # eat all leading empty lines + ( $line, $lref ) = $self->shiftline(); + while ( defined($line) && $line =~ m/^\s*$/ ) { + print "Eat >>$line<<\n" if $self->{options}{'debug'}; + ( $line, $lref ) = $self->shiftline(); + } + + # ups, ate one line too much. Put it back. + $self->unshiftline( $line, $lref ); + + # get ready to read the entry (cleanups) + $blanklines = ""; + + # End of current entry + } elsif ( $line =~ m/^ \-\- (.*) <(.*)> .*$/ ) + { #((\w+\,\s*)?\d{1,2}\s+\w+\s+\d{4}\s+\d{1,2}:\d\d:\d\d\s+[-+]\d{4}(\s+\([^\\\(\)]\))?) *$/) { + + $self->translate( $body, $bodyref, $bodytype, wrap => 0 ); + $body = ""; + + # non-specific line + } else { + + if ( $line =~ /^\s*$/ ) { + $blanklines .= "$line"; + } else { + $body .= $blanklines . $line; + $blanklines = ""; + } + } + + ( $line, $lref ) = $self->shiftline(); + print "seen >>" . ( $line || '' ) . "<<\n" if $self->{options}{'debug'}; + } +} + +1; + +=head1 AUTHORS + +This module is loosely inspired from /usr/lib/dpkg/parsechangelog/debian, which is: + + Copyright © 1996 Ian Jackson. This is free software; see the GNU + General Public License version 2 or later for copying conditions. There + is NO warranty. + +The adaptation for po4a was done by: + + Martin Quinson (mquinson#debian.org) + +=head1 COPYRIGHT AND LICENSE + + Copyright © 1996 Ian Jackson. + Copyright © 2005 SPI, Inc. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). diff --git a/lib/Locale/Po4a/Ini.pm b/lib/Locale/Po4a/Ini.pm new file mode 100644 index 0000000..f29cd11 --- /dev/null +++ b/lib/Locale/Po4a/Ini.pm @@ -0,0 +1,120 @@ +# Locale::Po4a::Ini -- Convert ini files to PO file, for translation. +# +# This program is free software; you may redistribute it and/or modify it +# under the terms of GPL v2.0 or later (see COPYING). +# + +############################################################################ +# Modules and declarations +############################################################################ + +use Locale::Po4a::TransTractor qw(process new); +use Locale::Po4a::Common; + +package Locale::Po4a::Ini; + +use 5.16.0; +use strict; +use warnings; + +require Exporter; + +use vars qw(@ISA @EXPORT $AUTOLOAD); +@ISA = qw(Locale::Po4a::TransTractor); +@EXPORT = qw(); + +my $debug = 0; + +sub initialize { } + +sub parse { + my $self = shift; + my ( $line, $ref ); + my $par; + + LINE: + ( $line, $ref ) = $self->shiftline(); + + while ( defined($line) ) { + chomp($line); + print STDERR "begin\n" if $debug; + + if ( $line =~ /\"/ ) { + print STDERR "Start of line containing \".\n" if $debug; + + # Text before the first quote + $line =~ m/(^[^"\r\n]*)"/; + my $pre_text = $1; + print STDERR " PreText=" . $pre_text . "\n" if $debug; + + # The text for translation + $line =~ m/"([^\r\n]*)"/; + my $quoted_text = $1; + print STDERR " QuotedText=" . $quoted_text . "\n" if $debug; + + # Text after last quote + $line =~ m/"([^"\n]*$)/; + my $post_text = $1; + print STDERR " PostText=" . $post_text . "\n" if $debug; + + # Translate the string it + $par = $self->translate( $quoted_text, $ref, $pre_text ); + + # Escape the \n characters + $par =~ s/\n/\\n/g; + + # Now push the result + $self->pushline( $pre_text . '"' . $par . '"' . $post_text . "\n" ); + print STDERR "End of line containing \".\n" if $debug; + } else { + print STDERR "Other stuff\n" if $debug; + $self->pushline("$line\n"); + } + + # Reinit the loop + ( $line, $ref ) = $self->shiftline(); + } +} + +############################################################################## +# Module return value and documentation +############################################################################## + +1; +__END__ + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Ini - convert INI files from/to PO files + +=head1 DESCRIPTION + +Locale::Po4a::Ini is a module to help the translation of INI files into other +[human] languages. + +The module searches for lines of the following format and extracts the quoted +text: + +identificator="text than can be translated" + +NOTE: If the text is not quoted, it will be ignored. + +=head1 SEE ALSO + +L<Locale::Po4a::TransTractor(3pm)>, L<po4a(7)|po4a.7> + +=head1 AUTHORS + + Razvan Rusu <rrusu@bitdefender.com> + Costin Stroie <cstroie@bitdefender.com> + +=head1 COPYRIGHT AND LICENSE + +Copyright © 2006 BitDefender + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut diff --git a/lib/Locale/Po4a/KernelHelp.pm b/lib/Locale/Po4a/KernelHelp.pm new file mode 100644 index 0000000..96cd21f --- /dev/null +++ b/lib/Locale/Po4a/KernelHelp.pm @@ -0,0 +1,170 @@ +# Locale::Po4a::KernelHelp -- Convert kernel configuration help from/to PO files +# +# This program is free software; you may redistribute it and/or modify it +# under the terms of GPL v2.0 or later (see COPYING). +# +# See gettext documentation for more info about PO files. + +############################################################################ +# Modules and declarations +############################################################################ + +use Pod::Parser; +use Locale::Po4a::TransTractor qw(process new); +use Locale::Po4a::Common; + +package Locale::Po4a::KernelHelp; + +use 5.16.0; +use strict; +use warnings; + +require Exporter; + +use vars qw(@ISA @EXPORT $AUTOLOAD); +@ISA = qw(Locale::Po4a::TransTractor); +@EXPORT = qw(); # new process write read writepo readpo); + +my $debug = 0; + +sub initialize { } + +sub parse { + my $self = shift; + my ( $line, $ref ); + my $paragraph = ""; # Buffer where we put the paragraph while building + my ($status) = 0; # Syntax of KH is: + # description<nl>variable<nl>help text<nl><nl> + # Status will be: + # 0 1 2 3 0 + + my ( $desc, $variable ); + + LINE: + ( $line, $ref ) = $self->shiftline(); + + while ( defined($line) ) { + chomp($line); + print STDERR "status=$status;Seen >>$line<<:" if $debug; + + if ( $line =~ /^\#/ ) { + print STDERR "comment.\n" if $debug; + $self->pushline("$line\n"); + } elsif ( $status == 0 ) { + if ( $line =~ /\S/ ) { + print STDERR "short desc.\n" if $debug; + $desc = $line; + $status++; + } else { + print STDERR "empty line.\n" if $debug; + $self->pushline("$line\n"); + } + } elsif ( $status == 1 ) { + print STDERR "var name.\n" if $debug; + $variable = $line; + $status++; + + $self->pushline( $self->translate( $desc, $ref, "desc_$variable" ) . "\n$variable\n" ); + + } elsif ( $status == 2 ) { + $line =~ s/^ //; + if ( $line =~ /\S/ ) { + print STDERR "paragraph line.\n" if $debug; + $paragraph .= $line . "\n"; + } else { + print STDERR "end of paragraph.\n" if $debug; + $status++; + $paragraph = $self->translate( $paragraph, $ref, "helptxt_$variable" ); + $paragraph =~ s/^/ /gm; + $self->pushline("$paragraph\n"); + $paragraph = ""; + } + } elsif ( $status == 3 ) { + if ( $line =~ s/^ // ) { + if ( $line =~ /\S/ ) { + print "begin of paragraph.\n" if $debug; + $paragraph = $line . "\n"; + $status--; + } else { + print "end of config option.\n" if $debug; + $status = 0; + $self->pushline("\n"); + } + } else { + $self->unshiftline( $line, $ref ); + $status = 0; + } + } else { + die wrap_ref_mod( $ref, "po4a::kernelhelp", gettext("Syntax error") ); + } + + # Reinit the loop + ( $line, $ref ) = $self->shiftline(); + } +} + +sub docheader { + return <<EOT; +# +# ***************************************************** +# * GENERATED FILE, DO NOT EDIT * +# * THIS IS NO SOURCE FILE, BUT RESULT OF COMPILATION * +# ***************************************************** +# +# This file was generated by po4a(7). Do not store it (in VCS, for example), +# but store the PO file used as source file by pod-translate. +# +# In fact, consider this as a binary, and the PO file as a regular .c file: +# If the PO get lost, keeping this translation up-to-date will be harder. +# +EOT +} +1; + +############################################################################## +# Module return value and documentation +############################################################################## + +1; +__END__ + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::KernelHelp - convert kernel configuration help from/to PO files + +=head1 DESCRIPTION + +Locale::Po4a::KernelHelp is a module to help the translation of +documentation for the Linux kernel configuration options into other [human] +languages. + +=head1 STATUS OF THIS MODULE + +This module is just written, and needs more tests. Most of the needed work +will concern the tools used to parse this file (and configure the kernel), +so that they accept to read the documentation from another (translated) +file. + +=head1 SEE ALSO + +L<Pod::Parser>, +L<Locale::Po4a::Man(3pm)>, +L<Locale::Po4a::Pod(3pm)>, +L<Locale::Po4a::TransTractor(3pm)>, +L<po4a(7)|po4a.7> + +=head1 AUTHORS + + Denis Barbier <barbier@linuxfr.org> + Martin Quinson (mquinson#debian.org) + +=head1 COPYRIGHT AND LICENSE + +Copyright © 2002 SPI, Inc. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut diff --git a/lib/Locale/Po4a/LaTeX.pm b/lib/Locale/Po4a/LaTeX.pm new file mode 100644 index 0000000..c175110 --- /dev/null +++ b/lib/Locale/Po4a/LaTeX.pm @@ -0,0 +1,397 @@ +#!/usr/bin/perl -w + +# Copyright © 2004, 2005 Nicolas FRANÇOIS <nicolas.francois@centraliens.net> +# +# This file is part of po4a. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with po4a; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::LaTeX - convert LaTeX documents and derivates from/to PO files + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::LaTeX is a module to help the translation of LaTeX documents into +other [human] languages. It can also be used as a base to build modules for +LaTeX-based documents. + +This module contains the definitions of common LaTeX commands and +environments. + +See the L<Locale::Po4a::TeX(3pm)|Locale::Po4a::TeX> manpage for the list +of recognized options. + +=head1 SEE ALSO + +L<Locale::Po4a::TeX(3pm)|Locale::Po4a::TeX>, +L<Locale::Po4a::TransTractor(3pm)|Locale::Po4a::TransTractor>, +L<po4a(7)|po4a.7> + +=head1 AUTHORS + + Nicolas François <nicolas.francois@centraliens.net> + +=head1 COPYRIGHT AND LICENSE + +Copyright © 2004, 2005 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut + +package Locale::Po4a::LaTeX; + +use 5.16.0; +use strict; +use warnings; + +require Exporter; +use vars qw($VERSION @ISA @EXPORT); +$VERSION = $Locale::Po4a::TeX::VERSION; +@ISA = qw(Locale::Po4a::TeX); +@EXPORT = qw(); + +use Locale::Po4a::TeX; +use subs qw(&generic_command + &parse_definition_file + ®ister_generic_command + ®ister_generic_environment); +*parse_definition_file = \&Locale::Po4a::TeX::parse_definition_file; +*generic_command = \&Locale::Po4a::TeX::generic_command; +*register_generic_command = \&Locale::Po4a::TeX::register_generic_command; +*register_generic_environment = \&Locale::Po4a::TeX::register_generic_environment; +use vars qw($RE_ESCAPE $ESCAPE + $no_wrap_environments + %commands %environments + %separated_command %separated_environment + %command_parameters %environment_parameters + %env_separators + @exclude_include); +*RE_ESCAPE = \$Locale::Po4a::TeX::RE_ESCAPE; +*ESCAPE = \$Locale::Po4a::TeX::ESCAPE; +*no_wrap_environments = \$Locale::Po4a::TeX::no_wrap_environments; +*commands = \%Locale::Po4a::TeX::commands; +*environments = \%Locale::Po4a::TeX::environments; +*separated_command = \%Locale::Po4a::TeX::separated_command; +*separated_environment = \%Locale::Po4a::TeX::separated_environment; +*env_separators = \%Locale::Po4a::TeX::env_separators; +*exclude_include = \@Locale::Po4a::TeX::exclude_include; +*command_parameters = \%Locale::Po4a::TeX::command_parameters; +*environment_parameters = \%Locale::Po4a::TeX::environment_parameters; + +# documentclass: +# Only read the documentclass in order to find some po4a directives. +# FIXME: The documentclass could contain translatable strings. +# Maybe it should be implemented as \include{}. +register_generic_command("*documentclass,[]{}"); + +# We use register_generic_command to define the number and types of +# parameters. The function is then overwritten: +$commands{'documentclass'} = sub { + my $self = shift; + my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift ); + my $no_wrap = shift; + + # Only try to parse the file. We don't want to fail or parse this file + # if it is a standard documentclass. + my $name = ( $args->[0] eq '[' ) ? $args->[3] : $args->[1]; + parse_definition_file( $self, $name . ".cls", 1 ); + + my ( $t, @e ) = generic_command( $self, $command, $variant, $args, $env, $no_wrap ); + + return ( $t, @$env ); +}; + +# LaTeX 2 +# I chose not to translate files, counters, lengths +register_generic_command("*addcontentsline,{}{}{_}"); +register_generic_command("address,{_}"); # lines are seperated by \\ +register_generic_command("*addtocontents,{}{_}"); +register_generic_command("*addtocounter,{}{}"); +register_generic_command("*addtolength,{}{}"); +register_generic_command("*addvspace,{}"); +register_generic_command("alph,{}"); # another language may not want this alphabet +register_generic_command("arabic,{}"); # another language may not want an arabic numbering +register_generic_command("*author,{_}"); # authors are separated by \and +register_generic_command("bibitem,[]{}"); +register_generic_command("*bibliographystyle,{}"); # BibTeX +register_generic_command("*bibliography,{}"); # BibTeX +register_generic_command("*centerline,{_}"); +register_generic_command("*caption,[]{_}"); +register_generic_command("cc,{_}"); +register_generic_command("circle,[]{}"); +register_generic_command("cite,[_]{}"); +register_generic_command("cline,{}"); +register_generic_command("closing,{_}"); +register_generic_command("dashbox,{}"); # followed by a (w,h) argument +register_generic_command("date,{_}"); +register_generic_command("*enlargethispage,{}"); +register_generic_command("ensuremath,{_}"); +register_generic_command("*fbox,{_}"); +register_generic_command("fnsymbol,{}"); +register_generic_command("*footnote,[]{_}"); +register_generic_command("*footnotemark,[]"); +register_generic_command("*footnotetext,[]{_}"); +register_generic_command("frac,{_}{_}"); +register_generic_command("*frame,{_}"); +register_generic_command("*framebox,[][]{_}"); # There is another form in picture environment +register_generic_command("*hbox,{}"); +register_generic_command("*hspace,[]{}"); +register_generic_command("*hyphenation,{_}"); # Translators may wish to add/remove words +register_generic_command("*include,{}"); + +#register_generic_command("includeonly,{}"); # should not be supported for now +register_generic_command("*index,{_}"); +register_generic_command("*input,{}"); +register_generic_command("*item,[_]"); +register_generic_command("*label,{}"); +register_generic_command("lefteqn,{_}"); +register_generic_command("line,"); # The first argument is (x,y) +register_generic_command("*linebreak,[]"); +register_generic_command("linethickness,{}"); +register_generic_command("location,{_}"); +register_generic_command("makebox,[][]{_}"); # There's another form in picture environment +register_generic_command("makelabels,{}"); +register_generic_command("*markboth,[]{_}{_}"); +register_generic_command("*markright,{_}"); +register_generic_command("mathcal,{_}"); +register_generic_command("mathop,{_}"); +register_generic_command("mbox,{_}"); +register_generic_command("multicolumn,{}{}{_}"); +register_generic_command("multiput,"); # The first arguments are (x,y)(dx,dy) +register_generic_command("name,{_}"); +register_generic_command("*newcommand,{}[][]{_}"); +register_generic_command("*newcounter,{}[]"); +register_generic_command("*newenvironment,{}[]{_}{_}"); +register_generic_command("*newfont,{}{}"); +register_generic_command("*newlength,{}"); +register_generic_command("*newsavebox,{}"); +register_generic_command("*newtheorem,{}[]{_}[]"); # Two forms, {}[]{_} or {}{_}[] +register_generic_command("nocite,{}"); +register_generic_command("nolinebreak,[]"); +register_generic_command("*nopagebreak,[]"); +register_generic_command("opening,{_}"); +register_generic_command("oval,"); # The first argument is (w,h) +register_generic_command("overbrace,{_}"); +register_generic_command("overline,{_}"); +register_generic_command("*pagebreak,[]"); +register_generic_command("*pagenumbering,{_}"); +register_generic_command("pageref,{}"); +register_generic_command("*pagestyle,{}"); +register_generic_command("*parbox,[][][]{}{_}"); +register_generic_command("providecommand,{}[][]{_}"); +register_generic_command("put,"); # The first argument is (x,y) +register_generic_command("raisebox,{}[][]{_}"); +register_generic_command("ref,{}"); +register_generic_command("*refstepcounter,{}"); +register_generic_command("*renewcommand,{}[][]{_}"); +register_generic_command("*renewenvironment,{}[]{_}{_}"); +register_generic_command("roman,{}"); # another language may not want a roman numbering +register_generic_command("rule,[]{}{}"); +register_generic_command("savebox,{}"); # Optional arguments in 2nd & 3rd position +register_generic_command("sbox,{}{_}"); +register_generic_command("*setcounter,{}{}"); +register_generic_command("*setlength,{}{}"); +register_generic_command("*settodepth,{}{_}"); +register_generic_command("*settoheight,{}{_}"); +register_generic_command("*settowidth,{}{_}"); +register_generic_command("shortstack,[]{_}"); +register_generic_command("signature,{_}"); +register_generic_command("sqrt,[_]{_}"); +register_generic_command("stackrel,{_}{_}"); +register_generic_command("stepcounter,{}"); +register_generic_command("*subfigure,[_]{_}"); +register_generic_command("symbol,{_}"); +register_generic_command("telephone,{_}"); +register_generic_command("thanks,{_}"); +register_generic_command("*thispagestyle,{}"); +register_generic_command("*title,{_}"); +register_generic_command("typeout,{_}"); +register_generic_command("typein,[]{_}"); +register_generic_command("twocolumn,[_]"); +register_generic_command("underbrace,{_}"); +register_generic_command("underline,{_}"); +register_generic_command("*usebox,{}"); +register_generic_command("usecounter,{}"); +register_generic_command("*usepackage,[]{}"); +register_generic_command("value,{}"); +register_generic_command("vector,"); # The first argument is (x,y) +register_generic_command("vphantom,{_}"); +register_generic_command("*vspace,[]{}"); +register_generic_command("*vbox,{}"); +register_generic_command("*vcenter,{}"); + +register_generic_command("*part,[_]{_}"); +register_generic_command("*chapter,[_]{_}"); +register_generic_command("*section,[_]{_}"); +register_generic_command("*subsection,[_]{_}"); +register_generic_command("*subsubsection,[_]{_}"); +register_generic_command("*paragraph,[_]{_}"); +register_generic_command("*subparagraph,[_]{_}"); + +register_generic_command("textrm,{_}"); +register_generic_command("textit,{_}"); +register_generic_command("emph,{_}"); +register_generic_command("textmd,{_}"); +register_generic_command("textbf,{_}"); +register_generic_command("textup,{_}"); +register_generic_command("textsl,{_}"); +register_generic_command("textsf,{_}"); +register_generic_command("textsc,{_}"); +register_generic_command("texttt,{_}"); +register_generic_command("textnormal,{_}"); +register_generic_command("mathrm,{_}"); +register_generic_command("mathsf,{_}"); +register_generic_command("mathtt,{_}"); +register_generic_command("mathit,{_}"); +register_generic_command("mathnormal,{_}"); +register_generic_command("mathversion,{}"); + +register_generic_command("*contentspage,"); +register_generic_command("*tablelistpage,"); +register_generic_command("*figurepage,"); + +register_generic_command("*PassOptionsToPackage,{}{}"); + +register_generic_command("*ifthenelse,{}{_}{_}"); + +# graphics +register_generic_command("*includegraphics,[]{}"); +register_generic_command("*graphicspath,{}"); +register_generic_command("*resizebox,{}{}{_}"); +register_generic_command("*scalebox,{}{_}"); +register_generic_command("*rotatebox,{}{_}"); + +# url +register_generic_command("UrlFont,{}"); +register_generic_command("*urlstyle,{}"); + +# hyperref +register_generic_command("href,{}{_}"); # 1:URL +register_generic_command("url,{}"); # URL +register_generic_command("nolinkurl,{}"); # URL +register_generic_command("hyperbaseurl,{}"); # URL +register_generic_command("hyperimage,{}"); # URL +register_generic_command("hyperdef,{}{}{_}"); # 1:category, 2:name +register_generic_command("hyperref,{}{}{}{_}"); # 1:URL, 2:category, 3:name +register_generic_command("hyperlink,{}{_}"); # 1:name +register_generic_command("*hypersetup,{_}"); +register_generic_command("hypertarget,{}{_}"); # 1:name +register_generic_command("autoref,{}"); # 1:label + +register_generic_command("*selectlanguage,{}"); + +# color +register_generic_command("*definecolor,{}{}{}"); +register_generic_command("*textcolor,{}{_}"); +register_generic_command("*colorbox,{}{_}"); +register_generic_command("*fcolorbox,{}{}{_}"); +register_generic_command("*pagecolor,{_}"); +register_generic_command("*color,{}"); + +# equations/theorems +register_generic_command("*qedhere,"); +register_generic_command("*qedsymbol,"); +register_generic_command("*theoremstyle,{}"); +register_generic_command("*proclaim,{_}"); +register_generic_command("*endproclaim,"); +register_generic_command("*shoveleft,{_}"); +register_generic_command("*shoveright,{_}"); + +# commands without arguments. This is better than untranslated or +# translate_joined because the number of arguments will be checked. +foreach ( + qw(a *appendix *backmatter backslash *baselineskip *baselinestretch bf + *bigskip boldmath cal cdots *centering *cleardoublepage *clearpage + ddots dotfill em flushbottom *footnotesize frenchspacing + *frontmatter *glossary *hfill *hline hrulefill huge Huge indent it + kill large Large LARGE ldots left linewidth listoffigures + listoftables *mainmatter *makeatletter *makeglossary *makeindex + *maketitle *medskip *newline *newpage noindent nonumber *normalsize + not *null *onecolumn *par parindent *parskip *printindex protect ps + pushtabs *qquad *quad raggedbottom raggedleft raggedright right rm + sc scriptsize sf sl small *smallskip *startbreaks *stopbreaks + *tableofcontents textwidth textheight tiny today tt unitlength + vdots verb *vfill *vline *fussy *sloppy + + aleph hbar imath jmath ell wp Re Im prime nabla surd angle forall + exists partial infty triangle Box Diamond flat natural sharp + clubsuit diamondsuit heartsuit spadesuit dag ddag S P copyright + pounds Delta ASCII + + rmfamily itshape mdseries bfseries upshape slshape sffamily scshape + ttfamily *normalfont width height depth totalheight + + *fboxsep *fboxrule + *itemi *itemii *itemiii *itemiv + *theitemi *theitemii *theitemiii *theitemiv) + ) +{ + register_generic_command("$_,"); +} + +# standard environments. +# FIXME: All these definitions should be re-checked +foreach ( + qw(abstract align align* cases center description displaymath document enumerate + eqnarray eqnarray* equation equation* flushleft flushright footnotesize itemize + letter lrbox multline multline* proof quotation quote + sloppypar tabbing theorem titlepage + trivlist verbatim verbatim* verse wrapfigure) + ) +{ + register_generic_environment("$_,"); +} +register_generic_environment("tabular,[]{}"); +register_generic_environment("tabular*,{}{}"); +register_generic_environment("tabularx,{}{}"); +register_generic_environment("multicols,{}"); +register_generic_environment("list,{_}{}"); +register_generic_environment("array,[]{}"); +register_generic_environment("figure,[]"); +register_generic_environment("minipage,[]{}"); +register_generic_environment("picture,{}{}"); +register_generic_environment("table,[]"); +register_generic_environment("thebibliography,{_}"); + +# Commands and environments with separators. + +# & is the cell separator, \\ is the line separator +# '\' is escaped twice +$env_separators{'array'} = $env_separators{'tabular'} = $env_separators{'tabularx'} = "(?:&|\\\\\\\\|\\\\hline)"; + +$env_separators{'trivlist'} = $env_separators{'list'} = $env_separators{'description'} = $env_separators{'enumerate'} = + $env_separators{'itemize'} = "\\\\item"; + +$env_separators{'thebibliography'} = "\\\\bibitem"; + +$env_separators{'displaymath'} = $env_separators{'eqnarray'} = $env_separators{'eqnarray*'} = + $env_separators{'flushleft'} = $env_separators{'flushright'} = $env_separators{'center'} = + $env_separators{'author{#1}'} = $env_separators{'title{#1}'} = "\\\\\\\\"; + +# tabbing + +1; diff --git a/lib/Locale/Po4a/Man.pm b/lib/Locale/Po4a/Man.pm new file mode 100644 index 0000000..ff4157d --- /dev/null +++ b/lib/Locale/Po4a/Man.pm @@ -0,0 +1,2904 @@ +#!/usr/bin/perl -w + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Man - convert manual pages from/to PO files + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::Man is a module to help the translation of documentation in +the nroff format (the language of manual pages) into other [human] +languages. + +=head1 TRANSLATING WITH PO4A::MAN + +This module tries pretty hard to make translator's life easier. For that, +the text presented to translators isn't a verbatim copy of the text found +in the man page. Indeed, the cruder parts of the nroff format are hidden, so +that translators can't mess up with them. + +=head2 Text wrapping + +Unindented paragraphs are automatically rewrapped for the translator. This +can lead to some minor difference in the generated output, since the +rewrapping rules used by groff aren't very clear. For example, two spaces +after a parenthesis are sometimes preserved. + +Anyway, the difference will only be about the position of the extra spaces +in wrapped paragraph, and I think it's worth. + +=head2 Font specification + +The first change is about font change specifications. In nroff, there are +several ways to specify if a given word should be written in small, bold or +italics. In the text to translate, there is only one way, borrowed from the +POD (Perl online documentation) format: + +=over + +=item IE<lt>textE<gt> -- italic text + +equivalent to \fItext\fP or ".I text" + +=item BE<lt>textE<gt> -- bold text + +equivalent to \fBtext\fP or ".B text" + +=item RE<lt>textE<gt> -- roman text + +equivalent to \fRtext\fP + +=item CWE<lt>textE<gt> -- constant width text + +equivalent to \f(CWtext\fP or ".CW text" + +=back + +Remark: The CW face is not available for all groff devices. It is not +recommended to use it. It is provided for your convenience. + +=head2 Automatic characters transliteration + +Po4a automatically transliterate some characters to ease the translation +or the review of the translation. +Here is the list of the transliterations: + +=over + +=item hyphens + +Hyphens (-) and minus signs (\-) in man pages are all transliterated +as simple dashes (-) in the PO file. Then all dash are transliterated into +roff minus signs (\-) when the translation is inserted into the output +document. + +Translators can force an hyphen by using the roff glyph '\[hy]' in their +translations. + +=item non-breaking spaces + +Translators can use non-breaking spaces in their translations. These +non-breaking spaces (0xA0 in latin1) will be transliterated into a roff +non-breaking space ('\ '). + +=item quotes transliterations + +`` and '' are respectively tranliterated into \*(lq and \*(rq. + +To avoid these transliterations, translators can insert a zero width roff +character (i.e., using `\&` or '\&' respectively). + +=back + +=head2 Putting 'E<lt>' and 'E<gt>' in translations + +Since these chars are used to delimit parts under font modification, you +can't use them verbatim. Use EE<lt>ltE<gt> and EE<lt>gtE<gt> instead (as in +POD, one more time). + +=head1 OPTIONS ACCEPTED BY THIS MODULE + +These are this module's particular options: + +=over + +=item B<debug> + +Activate debugging for some internal mechanisms of this module. +Use the source to see which parts can be debugged. + +=item B<verbose> + +Increase verbosity. + +=item B<groff_code> + +This option controls the behavior of the module when it encounter +a .de, .ie or .if section. It can take the following values: + +=over + +=item I<fail> + +This is the default value. +The module will fail when a .de, .ie or .if section is encountered. + +=item I<verbatim> + +Indicates that the .de, .ie or .if sections must be copied as is +from the original to the translated document. + +=item I<translate> + +Indicates that the .de, .ie or .if sections will be proposed for the +translation. +You should only use this option if a translatable string is +contained in one of these section. Otherwise, I<verbatim> +should be preferred. + +=back + +=item B<generated> + +This option specifies that the file was generated, and that po4a should not +try to detect if the man pages was generated from another format. +This option is mandatory to use po4a on generated man pages. +Note that translating generated pages instead of sources ones is often more fragile, and thus a bad idea. + +=item B<mdoc> + +This option is only useful for mdoc pages. + +It selects a stricter support of the mdoc format by telling po4a not to +translate the 'NAME' section. +mdoc pages whose 'NAME' section is translated won't generate any header or +footer. + +According to the groff_mdoc page, the NAME, SYNOPSIS and DESCRIPTION +sections are mandatory. +There are no known issues with translated SYNOPSIS or DESCRIPTION section, +but you can also specify these sections this way: + -o mdoc=NAME,SYNOPSIS,DESCRIPTION + +This mdoc issue can also be solved with an addendum like this one: + PO4A-HEADER:mode=before;position=^.Dd + .TH DOCUMENT_TITLE 1 "Month day, year" OS "Section Name" + + +=back + +The following options specify the behavior of a user-defined macro +(with a .de request), or of a classical macro that is not supported by po4a. +They take as argument a comma-separated list of macros. +For example: + + -o noarg=FO,OB,AR -o translate_joined=BA,ZQ,UX + +Note: if a macro is not supported by po4a and if you consider that it is a +standard roff macro, you should submit it to the po4a development team. + +=over + +=item B<untranslated> + +B<untranslated> indicates that this macro (at its arguments) don't have to +be translated. + +=item B<noarg> + +B<noarg> is like B<untranslated>, except that po4a will verify that no +argument is added to this macro. + +=item B<translate_joined> + +B<translate_joined> indicates that po4a must propose to translate the +arguments of the macro. + +=item B<translate_each> + +With B<translate_each>, the arguments will also be proposed for the +translation, except that each one will be translated separately. + +=item B<no_wrap> + +This option takes as argument a list of comma-separated couples +I<begin>:I<end>, where I<begin> and I<end> are commands that delimit +the begin and end of a section that should not be rewrapped. + +Note: no test is done to ensure that an I<end> command matches its +I<begin> command; any ending command stop the no_wrap mode. +If you have a I<begin> (respectively I<end>) macro that has no I<end> +(respectively I<begin>), you can specify an existing I<end> (like fi) or +I<begin> (like nf) as a counterpart. +These macros (and their arguments) won't be translated. + +=item B<inline> + +This option specifies a list of comma-separated macros that must +not split the current paragraph. The string to translate will then contain +I<foo E<lt>.bar baz quxE<gt> quux>, where I<bar> is the command that +should be inlined, and I<baz qux> its arguments. + +=item B<unknown_macros> + +This option indicates how po4a should behave when an unknown macro is found. +By default, po4a fails with a warning. +It can take the following values: B<failed> (the default value), +B<untranslated>, B<noarg>, B<translate_joined>, or B<translate_each> (see above +for an explanation of these values). + +=back + +=head1 AUTHORING MAN PAGES COMPLIANT WITH PO4A::MAN + +This module is still very limited, and will always be, because it's not a +real nroff interpreter. It would be possible to do a real nroff +interpreter, to allow authors to use all the existing macros, or even to +define new ones in their pages, but we didn't want to. It would be too +difficult, and we thought it wasn't necessary. We do think that if +manpages' authors want to see their productions translated, they may have to +adapt to ease the work of translators. + +So, the man parser implemented in po4a have some known limitations we are +not really inclined to correct, and which will constitute some pitfalls +you'll have to avoid if you want to see translators taking care of your +documentation. + +=head2 Don't program in nroff + +nroff is a complete programming language, with macro definition, +conditionals and so on. Since this parser isn't a fully featured nroff +interpreter, it will fail on pages using these facilities (There are about +200 such pages on my box). + +=head2 Use the plain macro set + +There are still some macros which are not supported by po4a::man. This is +only because I failed to find any documentation about them. Here is the +list of unsupported macros used on my box. Note that this list isn't +exhaustive since the program fails on the first encountered unsupported +macro. If you have any information about some of these macros, I'll +happily add support for them. Because of these macros, about 250 pages on +my box are inaccessible to po4a::man. + + .. ." .AT .b .bank + .BE ..br .Bu .BUGS .BY + .ce .dbmmanage .do .En + .EP .EX .Fi .hw .i + .Id .l .LO .mf + .N .na .NF .nh .nl + .Nm .ns .NXR .OPTIONS .PB + .pp .PR .PRE .PU .REq + .RH .rn .S< .sh .SI + .splitfont .Sx .T .TF .The + .TT .UC .ul .Vb .zZ + +=head2 Hiding text from po4a + +Sometimes, the author knows that some parts are not translatable, and +should not be extracted by po4a. For example, an option may accept an +I<other> argument, and I<other> may also appear as the last item of a +list. In the first case, I<other> should be not be translatable. And in +the second case, I<other> should be translated. + +In such case, the author can avoid po4a to extract some strings, using +some special groff constructs: + + .if !'po4a'hide' .B other + +(this will require the B<-o groff_code=verbatim> option) + +A new macro can also be defined to automate this: + .de IR_untranslated + . IR \\$@ + .. + + .IR_untranslated \-q ", " \-\-quiet + +(this will require the options B<-o groff_code=verbatim> and +B<-o untranslated=IR_untranslated>; with this construct, the B<.if +!'po4a'hide'> conditional is not strictly needed since po4a will not parse +the internal of the macro definition) + +or using an alias: + .als IR_untranslated IR + + .IR_untranslated \-q ", " \-\-quiet + +This will require the B<-o untranslated=als,IR_untranslated> option. + +=head2 Conclusion + +To summarise this section, keep simple, and don't try to be clever while +authoring your man pages. A lot of things are possible in nroff, and not +supported by this parser. For example, don't try to mess with \c to +interrupt the text processing (like 40 pages on my box do). Or, be sure to +put the macro arguments on the same line that the macro itself. I know that +it's valid in nroff, but would complicate too much the parser to be +handled. + +Of course, another possibility is to use another format, more translator +friendly (like POD using po4a::pod, or one of the XML family like SGML), +but thanks to po4a::man it isn't needed anymore. That being said, if the +source format of your documentation is POD, or XML, it may be clever to +translate the source format and not this generated one. In most cases, +po4a::man will detect generated pages and issue a warning. It will even +refuse to process POD generated pages, because those pages are perfectly +handled by po4a::pod, and because their nroff counterpart defines a lot of +new macros I didn't want to write support for. On my box, 1432 of the 4323 +pages are generated from POD and will be ignored by po4a::man. + +In most cases, po4a::man will detect the problem and refuse to process the +page, issuing an adapted message. In some rare cases, the program will +complete without warning, but the output will be wrong. Such cases are +called "bugs" ;) If you encounter such case, be sure to report this, along +with a fix when possible… + +=head1 STATUS OF THIS MODULE + +This module can be used for most of the existing man pages. + +Some tests are regularly run on Linux boxes: + +=over 4 + +=item * + +one third of the pages are refused because they were generated from +another format supported by po4a (e.g. POD or SGML). + +=item * + +10% of the remaining pages are rejected with an error (e.g. a +groff macro is not supported). + +=item * + +Then, less than 1% of the pages are accepted silently by po4a, but with +significant issues (i.e. missing words, or new words inserted) + +=item * + +The other pages are usually handled without differences more important +than spacing differences or line rewrapped (font issues in less than 10% of +the processed pages). + +=back + +=head1 SEE ALSO + +L<Locale::Po4a::Pod(3pm)>, +L<Locale::Po4a::TransTractor(3pm)>, +L<po4a(7)|po4a.7> + +=head1 AUTHORS + + Denis Barbier <barbier@linuxfr.org> + Nicolas François <nicolas.francois@centraliens.net> + Martin Quinson (mquinson#debian.org) + +=head1 COPYRIGHT AND LICENSE + +Copyright © 2002-2008 SPI, Inc. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut + +package Locale::Po4a::Man; +use DynaLoader; + +use 5.16.0; +use strict; +use warnings; + +require Exporter; +use vars qw(@ISA @EXPORT); +@ISA = qw(Locale::Po4a::TransTractor DynaLoader); +@EXPORT = qw(); # new initialize); + +# Try to use a C extension if present. +eval('bootstrap Locale::Po4a::Man "0.30"'); + +use Locale::Po4a::TransTractor; +use Locale::Po4a::Common; + +use File::Spec; +use Getopt::Std; + +my %macro; # hash of known macro, with parsing sub. See end of this file +my %default_macro; # The default known macros, when no options are used. + +# A font start by \f and is followed either by +# [.*] - a font name within brackets (e.g. [P], [A_USER_FONT]) +# (.. - a parenthesis followed by two char (e.g. "(CW") +# . - a single char (e.g. B, I, R, P, 1, 2, 3, 4, etc.) +my $FONT_RE = "\\\\f(?:\\[[^\\]]*\\]|\\(..|[^\\(\\[])"; + +# Variable used to identify non breaking spaces. +# These non breaking spaces are used to ease the parsing, and a +# translator can use them in their translation (and they will be translated +# into the groff non-breaking space). +my $nbs; + +# Indicate if the page uses the mdoc macros +my $mdoc_mode = 0; + +my $unknown_macros = undef; + +######################### +#### DEBUGGING STUFF #### +######################### +my %debug; + +# The following debug options can be set with '-o debug=...': +# * splitargs see how macro args are separated +# * pretrans see pre-conditioning of translation +# * postrans see post-conditioning of translation +# * fonts see font modifier handling + +######## CONFIG ######### +# This variable indicates the behavior of the module when a .de, .if or +# .ie is encountered. +my $groff_code; + +# %no_wrap_begin and %no_wrap_end are lists of macros that respectively +# begins and ends a no_wrap paragraph. +# Any ending macro will end the no_wrap paragraph started by any beginning +# macro. +my %no_wrap_begin; +my %no_wrap_end; + +# List of macros that should be inlined (with E<.xx ...>) +my %inline; + +# The default list of inlined macros (when no options are used) +my %default_inline; + +# This variable indicates whether po4a should try to detect the generated +# files. +my $allow_generated; + +# This hash indicates section name that should not be translated in mdoc +# mode. +# The groff's mdoc processor requires the NAME section, otherwise headers +# and footers of the pages are not generated. +# The mdoc_groff man page indicates that NAME, SYNOPSIS and DESCRIPTION +# are mandatory. +my %mdoc; + +sub initialize { + my $self = shift; + my %options = @_; + + $self->{options}{'debug'} = ''; + $self->{options}{'verbose'} = ''; + $self->{options}{'groff_code'} = ''; + $self->{options}{'untranslated'} = ''; + $self->{options}{'noarg'} = ''; + $self->{options}{'translate_joined'} = ''; + $self->{options}{'translate_each'} = ''; + $self->{options}{'no_wrap'} = ''; + $self->{options}{'inline'} = ''; + $self->{options}{'generated'} = ''; + $self->{options}{'mdoc'} = ''; + $self->{options}{'unknown_macros'} = ''; + + foreach my $opt ( keys %options ) { + if ( defined $options{$opt} ) { + die wrap_mod( "po4a::man", dgettext( "po4a", "Unknown option: %s" ), $opt ) + unless exists $self->{options}{$opt}; + $self->{options}{$opt} = $options{$opt}; + } + } + + %debug = (); + if ( defined $options{'debug'} ) { + foreach ( $options{'debug'} ) { + $debug{$_} = 1; + } + } + + $groff_code = "fail"; + if ( defined $options{'groff_code'} ) { + unless ( $options{'groff_code'} =~ m/fail|verbatim|translate/ ) { + die wrap_mod( "po4a::man", + dgettext( "po4a", "Invalid 'groff_code' value. Must be one of 'fail', 'verbatim', 'translate'." ) ); + } + $groff_code = $options{'groff_code'}; + } + + if (%default_macro) { + %macro = %default_macro; + } else { + %default_macro = %macro; + } + if ( defined $options{'untranslated'} ) { + foreach ( split( /,/, $options{'untranslated'} ) ) { + $macro{$_} = \&untranslated; + } + } + if ( defined $options{'noarg'} ) { + foreach ( split( /,/, $options{'noarg'} ) ) { + $macro{$_} = \&noarg; + } + } + if ( defined $options{'translate_joined'} ) { + foreach ( split( /,/, $options{'translate_joined'} ) ) { + $macro{$_} = \&translate_joined; + } + } + if ( defined $options{'translate_each'} ) { + foreach ( split( /,/, $options{'translate_each'} ) ) { + $macro{$_} = \&translate_each; + } + } + + %no_wrap_begin = ( + 'nf' => 1, + 'EX' => 1, + 'EQ' => 1 + ); + %no_wrap_end = ( + 'fi' => 1, + 'EE' => 1, + 'EN' => 1 + ); + if ( defined $options{'no_wrap'} ) { + foreach ( split( /,/, $options{'no_wrap'} ) ) { + if ( $_ =~ m/^(.*):(.*)$/ ) { + $no_wrap_begin{$1} = 1; + $no_wrap_end{$2} = 1; + } else { + die wrap_mod( "po4a::man", + dgettext( "po4a", "The no_wrap parameters must be a set of comma-separated begin:end couples.\n" ) + ); + } + } + } + + if (%default_inline) { + %inline = %default_inline; + } else { + %default_inline = %inline; + } + if ( defined $options{'inline'} ) { + foreach ( split( /,/, $options{'inline'} ) ) { + $inline{$_} = 1; + } + } + + $allow_generated = 0; + if ( defined $options{'generated'} ) { + $allow_generated = 1; + } + + %mdoc = (); + if ( defined $options{'mdoc'} ) { + if ( $options{'mdoc'} eq 1 ) { + $mdoc{"NAME"} = 1; + } else { + foreach ( split( /,/, $options{'mdoc'} ) ) { + $mdoc{$_} = 1; + } + } + } + + $unknown_macros = undef; + if ( defined $options{'unknown_macros'} ) { + if ( $options{'unknown_macros'} eq "failed" ) { + $unknown_macros = undef; + } elsif ( $options{'unknown_macros'} eq "untranslated" ) { + $unknown_macros = \&untranslated; + } elsif ( $options{'unknown_macros'} eq "noarg" ) { + $unknown_macros = \&noarg; + } elsif ( $options{'unknown_macros'} eq "translate_joined" ) { + $unknown_macros = \&translate_joined; + } elsif ( $options{'unknown_macros'} eq "translate_each" ) { + $unknown_macros = \&translate_each; + } else { + die wrap_mod( "po4a::man", + dgettext( "po4a", "Invalid 'unknown_macros' value. Must be one of:\n" ) + . "failed untranslated noarg translate_joined translate_each\n" ); + } + } +} + +my @comments = (); +my @next_comments = (); + +# This function returns the next line of the document being parsed +# (and its reference). +# It overload the Transtractor shiftline to handle: +# - font requests (.B, .I, .BR, .BI, ...) +# because these requests can be present in a paragraph (handled +# in the parse subroutine), or in argument (on the next line) +# of some other request (for example .TP) +# - font size requests (.SM,.SB) (not done yet) +# - input escape (\ at the end of a line) +sub shiftline { + my $self = shift; + + # call Transtractor's shiftline + NEW_LINE: + my ( $line, $ref ) = $self->SUPER::shiftline(); + + if ( !defined $line ) { + + # end of file + return ( $line, $ref ); + } + + # Do as few treatments as possible with the .de, .ie and .if sections + if ( $line =~ /^\.\s*(if|ie|de)/ ) { + chomp $line; + return ( $line, $ref ); + } + + # Handle some escapes + # * reduce the number of \ in macros + if ( $line =~ /^\\?[.']/ ) { + + # The first backslash is consumed while the macro is read. + $line =~ s/\\\\/\\/g; + } + + # * \\ is equivalent to \e, which is less error prone for the rest + # of the module (e.g. when searching for a font : \f, whe don't + # want to match \\f) + $line =~ s/\\\\/\\e/g; + + # * \. is just a dot (this can even be used to introduce a macro) + $line =~ s/\\\././g; + + chomp $line; + if ( $line =~ m/^(.*?)(?:(?<!\\)\\(["#])(.*))$/ ) { + my ( $l, $t, $c ) = ( $1, $2, $3 ); + $line = $l; + unless ($allow_generated) { + + # Check for comments indicating that the file was generated. + if ( $c =~ /Pod::Man/ ) { + warn wrap_ref_mod( + $ref, + "po4a::man", + dgettext( + "po4a", + "This file was generated with Pod::Man. Translate the POD file with the pod module of po4a." + ) + ); + exit 254; + } elsif ( $c =~ /generated by help2man/ ) { + warn wrap_ref_mod( + $ref, + "po4a::man", + dgettext( + "po4a", + "This file was generated with help2man. Translate the source file with the regular gettext." + ) + ); + } elsif ( $c =~ /with docbook-to-man/ ) { + warn wrap_ref_mod( + $ref, + "po4a::man", + dgettext( + "po4a", + "This file was generated with docbook-to-man. Translate the source file with the sgml module of po4a." + ) + ); + exit 254; + } elsif ( $c =~ /generated by docbook2man/ ) { + warn wrap_ref_mod( + $ref, + "po4a::man", + dgettext( + "po4a", + "This file was generated with docbook2man. Translate the source file with the sgml module of po4a." + ) + ); + exit 254; + } elsif ( $c =~ /created with latex2man/ ) { + warn wrap_ref_mod( + $ref, + "po4a::man", + dgettext( + "po4a", + "This file was generated with %s. " + . "You should translate the source file, but continuing anyway." + ), + "latex2man" + ); + } elsif ( $c =~ /Generated by db2man.xsl/ ) { + warn wrap_ref_mod( + $ref, + "po4a::man", + dgettext( + "po4a", + "This file was generated with db2man.xsl. Translate the source file with the xml module of po4a." + ) + ); + exit 254; + } elsif ( $c =~ /generated automatically by mtex2man/ ) { + warn wrap_ref_mod( + $ref, + "po4a::man", + dgettext( + "po4a", + "This file was generated with %s. " + . "You should translate the source file, but continuing anyway." + ), + "mtex2man" + ); + } elsif ( $c =~ /THIS FILE HAS BEEN AUTOMATICALLY GENERATED. DO NOT EDIT./ + || $c =~ /DO NOT EDIT/i + || $c =~ /generated/i ) + { + warn wrap_ref_mod( + $ref, + "po4a::man", + dgettext( + "po4a", + "This file contains the line '%s'. " + . "You should translate the source file, but continuing anyway." + ), + $l . "\\\"" . $c + ); + } + } + + if ( $line =~ m/^[.']*$/ ) { + if ( $c !~ m/^\s+$/ ) { + + # This commented line may be comment for the next paragraph + push @next_comments, [ $line ? substr( $line, 0, 1 ) : '.', $c ]; + } + if ( $line =~ m/^[.']+$/ ) { + + # those lines are ignored + # (empty lines are a little bit different) + goto NEW_LINE; + } + if ( $line =~ m/^\s*$/ and $t eq "#" ) { + + # Groff comments + goto NEW_LINE; + } + } else { + push @comments, [ '.', $c ]; + } + } else { + + # finally, we did not reach the end of the paragraph. The comments + # belong to the current paragraph. + push @comments, @next_comments; + @next_comments = (); + } + + # A .I or .B request change the current font + # and on exit, switch the font to Roman + # When one of these request doesn't have its argument on its line + # (and when we support this usage), we must keep this font request to + # insert it later. + # It is a stack of fonts to be inserted (in case a .I is followed by + # a .B and then followed bysome text; note that in this case, + # only one \fR must be inserted at the end of the text) + my $insert_font = ""; + while ( $line =~ /\\$/ || $line =~ /^(\.[BI])\s*$/ || $line =~ /^\.[BI][\t ].*?\\c$/ ) { + my ( $l2, $r2 ) = $self->SUPER::shiftline(); + chomp($l2); + if ( $line =~ /^(\.[BI])\s*$/ ) { + if ( $l2 =~ /^[.'][\t ]*([BI]|BI|BR|IB|IR|RB|RI)(?:[\t ]|\s*$)/ ) { + my $font = $line; + $font =~ s/^\.([BI])\s*$/$1/; + $insert_font = "\\f$font$insert_font"; + $line = $l2; + $ref = $r2; + } elsif ( $l2 =~ /^[.'][\t ]*(SH|TP|TQ|P|PP|LP)(?:[\t ]|\s*$)/ ) { + $line =~ s/^\.([BI])\s*$/$insert_font\\f$1/; + $self->SUPER::unshiftline( $l2, $r2 ); + } elsif ( $l2 =~ /^([.'][\t ]*(?:IP)[\t ]+"?)(.*)$/ ) { + + # Install the font modifier into the next line + # after a possible quote (") + my $macro = $1; + my $arg = $2; + $line =~ /^\.([BI])\s*$/; + $line = $macro . "$insert_font\\f$1" . $arg; + $ref = $r2; + } elsif ( $l2 =~ /^[.']/ ) { + warn wrap_ref_mod( + $ref, + "po4a::man", + dgettext( + "po4a", + "Font modifiers followed by a command may disturb " + . "po4a. You should either remove the font modifier " + . "'%s', or integrate a \\f font modifier in the " + . "following command ('%s'), but continuing anyway." + ), + $line, $l2 + ); + $line = "PO4A-INLINE:$line:PO4A-INLINE"; + $self->SUPER::unshiftline( $l2, $r2 ); + } else { + + # convert " to the groff's double quote glyph; it will be + # converted back to " in pre_trans. It is needed because + # otherwise, these quotes will be taken as arguments + # delimiters. + $l2 =~ s/"/\\(dq/g; + + # append this line to the macro, with surrounding quotes, so + # that the line appear as an uniq argument. + $line .= ' "' . $l2 . '"'; + } + } else { + $line =~ s/\\$//; + $line =~ s/\\c$//; + $line .= $l2; + } + } + + # Detect non-wrapped paragraphs + # This must be done before handling the .B, .RI ... font requests + $line =~ s/^($FONT_RE)(\s+)/$2$1/; + + $line .= "\n"; + + # Handle font requests here + if ( $line =~ /^[.'][\t ]*([BI]|BI|BR|IB|IR|RB|RI)(?:(?: +|\t)(.*)|)$/ ) { + my $macro = $1; + my $arguments = $2; + my @args = splitargs( $ref, $arguments ); + if ( $macro eq 'B' || $macro eq 'I' ) { + + # To keep the space(s), we must introduce some \& + @args = map { $_ =~ s/^(\s*)$/\\&$1\\&/s; $_ } @args; + my $arg = join( " ", @args ); + $arg =~ s/^ +//; + this_macro_needs_args( $macro, $ref, $arg ); + $line = "$insert_font\\f$macro" . $arg . "\\fR\n"; + $insert_font = ""; + } + + # .BI bold alternating with italic + # .BR bold/roman + # .IB italic/bold + # .IR italic/roman + # .RB roman/bold + # .RI roman/italic + if ( $macro eq 'BI' + || $macro eq 'BR' + || $macro eq 'IB' + || $macro eq 'IR' + || $macro eq 'RB' + || $macro eq 'RI' ) + { + # num of seen args, first letter of macro name, second one + my ( $i, $a, $b ) = ( 0, substr( $macro, 0, 1 ), substr( $macro, 1 ) ); + $line = join( "", map { $i++ % 2 ? "\\f$b$_" : "\\f$a$_" } @args ) . "\\fR\n"; + if ( $i eq 0 ) { + + # If a .BI is used without argument, we must insert a + # \fI\fR. The \fR was inserted previously. + $line = "\\f$b$line"; + } + } + + if ( length $insert_font ) { + $line =~ s/\n$//; + $line = "$insert_font$line\\fR\n"; + } + + if ( $line =~ /^(.*)\\c(\\f.)?\s*\\fR\n/ ) { + my $begin = $1; + + my ( $l2, $r2 ) = $self->SUPER::shiftline(); + if ( $l2 =~ /^[.']/ ) { + $self->SUPER::unshiftline( $l2, $r2 ); + } else { + $l2 =~ s/\s*$//s; + $line = "$begin\\fR$l2\n"; + } + } + } + + return ( $line, $ref ); +} + +# Overload Transtractor's pushline. +# This pushline first push comments (if there are comments for the +# current line, and the line is not empty), and then push the line. +sub pushline { + my ( $self, $line ) = ( shift, shift ); + if ( $line !~ m/^\s*$/ ) { + + # add comments + foreach my $c (@comments) { + + # comments are pushed (maybe at the wrong place). + $self->SUPER::pushline( $self->r("$$c[0]\\\"$$c[1]\n") ); + } + @comments = (); + } + + $self->SUPER::pushline($line); +} + +# The default unshiftline from Transtractor may fail because shiftline +# is overloaded +sub unshiftline { + die wrap_mod( + "po4a::man", + dgettext( + "po4a", + "The unshiftline is not supported for the man module. " + . "Please send a bug report with the groff page that generated " + . "this error." + ) + ); +} + +############################################### +#### FUNCTION TO TRANSLATE OR NOT THE TEXT #### +############################################### +sub pushmacro { + my $self = shift; + if ( scalar @_ ) { + + # Do quote the arguments containing spaces, as it should. + + # but do not do so if they already contain quotes and escaped spaces + # For example, cdrdao(1) uses: + # .IP CATALOG\ "ddddddddddddd" (Here, the quote have to be displayed) + # Adding extra quotes as in: + # .IP "CATALOG\ "ddddddddddddd"" + # results in two args: 'CATALOG\ ' and 'ddddddddddddd""' + $self->pushline( + join( + " ", + map { + # Replace double quotes by \(dq (double quotes could be + # taken as an argument delimiter). + # Only quotes not preceded by \ are taken into account + # (\" introduces a comment). + s/(?<!\\)"/\\\(dq/g if ( defined $_ ); + + defined $_ + ? ( + length($_) + ? ( m/([^\\] |^ )/ ? "\"$_\"" : "$_" ) + + # Quote arguments that contain a space. + # (not needed for non breaknig spaces, i.e. + # spaces preceded by '\') + : '""' # empty argument + ) + : '' # no argument + } @_ + ) + . "\n" + ); + } else { + $self->pushline("\n"); + } +} + +sub this_macro_needs_args { + my ( $macroname, $ref, $args ) = @_; + unless ( length($args) ) { + die wrap_ref_mod( + $ref, + "po4a::man", + dgettext( + "po4a", + "macro %s called without arguments. " + . "Even if placing the macro arguments on the next line is authorized " + . "by man(7), handling this would make the po4a parser too complicate. " + . "Please simply put the macro args on the same line." + ), + $macroname + ); + } +} + +sub pre_trans { + my ( $self, $str, $ref, $type ) = @_; + + # Preformatting, so that translators don't see + # strange chars + my $origstr = $str; + print STDERR "pre_trans($str)=" + if ( $debug{'pretrans'} ); + + # Do as few treatments as possible with the .de, .ie and .if sections + if ( defined $self->{type} && $self->{type} =~ m/^(ie|if|de)$/ ) { + return $str; + } + + # Note: if you want to implement \c support, the groff man page is your playground + if ( not defined $self->{type} ) { + $str =~ s/(\G|^(?:.*?)\n|^) # Last position, or begin of a line + ([ \t]*[^.'][^\n]*(?<!\\)(?:\\\\)*) # the new line, which + \\c[ \t]*\n # ends by \c and followed by a line + (?![ \t]*[.'])/$1$2/sgx; # not followed by a command (.') + } + die wrap_ref_mod( $ref, "po4a::man", + dgettext( "po4a", "Escape sequence \\c encountered. This is not completely handled yet. Faulty input: %s" ), + $str ) + if ( $str =~ /\\c/ ); + + $str =~ s/>/E<gt>/sg; + $str =~ s/</E<lt>/sg; + $str =~ s/EE<lt>gt>/E<gt>/g; # could be done in a smarter way? + + while ( $str =~ m/^(.*)PO4A-INLINE:(.*?):PO4A-INLINE(.*)$/s ) { + my ( $t1, $t2, $t3 ) = ( $1, $2, $3 ); + $str = "$1E<$2>"; + if ($mdoc_mode) { + + # When a punctuation sign must be joined to an argument, mdoc allows such a construct: + # .Ar file1 , file2 , file3 ) . + # Here, we move the punctuation out of the E<...> tag. + # This is reverted in post_trans. + # FIXME: To be checked with the French punctuation + while ( $str =~ m/(?<!\\) +([.,;:\)\]]) *>/s ) { + $str =~ s/(?<!\\) +([.,;:\)\]]) *>/>$1/s; + } + } + if ( defined $t3 and length $t3 ) { + $t3 =~ s/^\n//s; + $str .= "\n$t3"; + } + } + + # simplify the fonts for the translators + if ( defined $self->{type} && $self->{type} =~ m/^(SH|SS)$/ ) { + set_regular("B"); + } + $str = do_fonts( $str, $ref ); + if ( defined $self->{type} && $self->{type} =~ m/^(SH|SS)$/ ) { + set_regular("R"); + } + + # After the simplification, the first char can be a \n. + # Simply push these newlines before the translation, but make sure the + # resulting string is not empty (or an additional line will be + # added). + if ( $str =~ /^(\n+)(.+)$/s ) { + $self->pushline($1); + $str = $2; + } + + unless ($mdoc_mode) { + + # Kill minus sign/hyphen difference. + # Aestetic of printed man pages may suffer, but: + # * they are translator-unfriendly + # * they break when using utf8 (for obscure reasons) + # * they forbid the searches, since keybords don't have hyphen key + # * they forbid copy/paste, since options need minus sign, not hyphen + $str =~ s|\\-|-|sg; + + # Groff bestiary + $str =~ s/\\\*\(lq/``/sg; + $str =~ s/\\\*\(rq/''/sg; + $str =~ s/\\\(dq/"/sg; + } + + # non-breaking spaces + # some non-breaking spaces may have been added during the parsing + $str =~ s/\Q$nbs/\\ /sg; + $str =~ s/\\~/\\ /sg; + + print STDERR "$str\n" if ( $debug{'pretrans'} ); + return $str; +} + +sub post_trans { + my ( $self, $str, $ref, $type, $wrap ) = @_; + my $transstr = $str; + + print STDERR "post_trans($str)=" + if ( $debug{'postrans'} ); + + # Do as few treatments as possible with the .de, .ie and .if sections + if ( defined $self->{type} && $self->{type} =~ m/^(ie|if|de)$/ ) { + return $str; + } + + unless ($mdoc_mode) { + + # Post formatting, so that groff see the strange chars + $str =~ s|\\-|-|sg; # in case the translator added some of them manually + # change hyphens to minus signs + # (this shouldn't be done for \s-<number> font size modifiers) + # nor on .so/.mso args + unless ( defined $self->{type} && $self->{type} =~ m/^m?so$/ ) { + my $tmp = ""; + while ( $str =~ m/^(.*?)-(.*)$/s ) { + my $begin = $1; + $str = $2; + my $tmp2 = $tmp . $begin; + if ( ( $begin =~ m/(?<!\\)(\\\\)*\\s$/s ) + or ( $begin =~ m/(?<!\\)(\\\\)*\\\((.|E<[gl]t>)?$/s ) + or ( $tmp2 =~ m/(?<!\\)(\\\\)*\\[ZHhCv]'([^']|(?<!\\)(\\\\)*\\')*$/ ) + or ( $tmp2 =~ m/(?<!\\)(\\\\)*\\(\*)?\[([^\]]|(?<!\\)(\\\\)*\\\[)*$/ ) + or ( $tmp2 =~ m/(?<!\\)(\\\\)*\\\*\(.?$/ ) ) + { + # Do not change - to \- for + # * \s-n (reduce font size) + # * \(.. (e.g. '<-', '-D') + # * inside a \h'...' + # * inside a \C'...' + # * inside a \[...] + # * inside a \*(.. + # * inside a \*[...] + # * inside a \v'...' + # * inside a \H'...' + # * inside a \Z'...' + $tmp = $tmp2 . "-"; + } else { + $tmp = $tmp2 . "\\-"; + } + } + $str = $tmp . $str; + } + } + + # There must not be an end of line inside an inline macro + $str =~ s/(E<\.[^>]*)\n([^>]*>)/$1 $2/gs; + + # No . or ' on first char, or nroff will think it's a macro + # * at the beginning of a paragraph, add \& (zero width space) at + # the beginning of the line + if ( not defined $self->{type} ) { + + # Only do it on regular text, because + # his doesn't work after a TS (this macros shift + # lines, which may contain macros) + # or for the .ta arguments (e.g. .ta .5i 3i) + $str =~ s/^((?: + (?:CW|[RBI])< + |$FONT_RE + )? + [.'] + )/\\&$1/mgx; + } elsif ( $self->{type} =~ m/^(TP|TQ)$/ ) { + + # But it is also needed for some type (e.g. TP, if followed by a + # font macro) + # This regular expression is the same as above + $str =~ s/^((?:(?:CW|[RBI])<|$FONT_RE)?[.'])/\\&$1/mg; + } + + # * degraded mode, doesn't work for the first line of a paragraph + $str =~ s/\n([.'])/ $1/mg; + + # Change ascii non-breaking space to groff one + my $nbs_out = get_out_nbs( $self->get_out_charset ); + $str =~ s/\Q$nbs_out/\\ /sg if defined $nbs_out; + + # No nbsp (said "\ " in groff on the last pos of the line, or groff adds + # an extra space + $str =~ s/\\ \n(?=.)/\\ /sg; + + # Make sure we compute internal sequences right. + # think about: B<AZE E<lt> EZA E<gt>> + while ( $str =~ m/^(.*)(CW|[RBI])<(.*)$/s ) { + my ( $done, $rest ) = ( $1 . "\\f$2", $3 ); + $done =~ s/CW$/\(CW/; + my $lvl = 1; + while ( length $rest && $lvl > 0 ) { + my $first = substr( $rest, 0, 1 ); + if ( $first eq '<' ) { + $lvl++; + } elsif ( $first eq '>' ) { + $lvl--; + } + if ( $first eq "\n" ) { + + # Don't accept \n within B<> parameters as troff seems to reset font on new line (Debian's #1016753) + $done .= ' ' if ( $lvl > 0 ); + } else { + $done .= $first if ( $lvl > 0 ); + } + $rest = substr( $rest, 1 ); + } + die wrap_ref_mod( $ref || $self->{ref}, + "po4a::man", dgettext( "po4a", "Unbalanced '<' and '>' in font modifier. Faulty message: %s" ), $str ) + if ( $lvl > 0 ); + + # Return to the regular font + $done .= "\\fP$rest"; + $str = $done; + } + + while ( $str =~ m/^(.*?)E<([.'][\t ]*.*?(?<!E<[gl]t))>(.*)$/s ) { + my ( $t1, $t2, $t3 ) = ( $1, $2, $3 ); + $t1 =~ s/ +$//s; + $t2 =~ s/\n/ /gs; + if ($mdoc_mode) { + + # restore the punctuation inside the line (see pre_trans) + if ( $t3 =~ s/^([.,;:\)\]]+)//s ) { + my $punctuation = $1; + $punctuation =~ s/([.,;:\)\]])/$1 /; + $t2 .= " $punctuation"; + } + } + $t3 =~ s/^ +//s; + if ($wrap) { + + # The no-wrap case should be checked + $t1 =~ s/\n$//s; + } + $str = $t1; + if ( length $t1 ) { + $t1 =~ s/\n$//s; + $str = "$t1\n"; + } + $str .= $t2; + if ( defined $t3 and length $t3 ) { + $t3 =~ s/^\n//s; + $str .= "\n$t3"; + } + } + my $str2 = $str; + $str2 =~ s/E<[gl]t>//g; + die wrap_ref_mod( $ref || $self->{ref}, + "po4a::man", dgettext( "po4a", "Unknown '<' or '>' sequence. " . "Faulty message: %s" ), $str ) + if $str2 =~ /[<>]/; + $str =~ s/E<gt>/>/mg; + $str =~ s/E<lt>/</mg; + + # Don't do that, because we'll go into trouble if previous line was .TP + # $str =~ s/^\\f([BI])(.*?)\\f[RP]$/\.$1 $2/mg; + + unless ($mdoc_mode) { + my $tmp = ""; + while ( $str =~ m/^(.*?)(``|'')(.*)$/s ) { + $tmp .= $1; + my $q = $2; + $str = $3; + + # There are probably many more exceptions, here are those I could + # detect in my manpages. + # \*(.' \*(.` + # \*' \*` + # \N'xxx' + if ( $tmp =~ m/(?<!\\)(?:\\\\)*\\\*\($/s ) { + $tmp .= $q; + } elsif ( + $tmp =~ m/(?<!\\)(?:\\\\)*\\\*\(.$/s + or $tmp =~ m/(?<!\\)(?:\\\\)*\\\*$/s + or ( $tmp =~ m/(?<!\\)(?:\\\\)*\\N'[0-9]*$/s + and $q eq "''" ) + ) + { + $q =~ m/(.)(.)/; + $tmp .= $1; + $str = $2 . $str; + } else { + $q =~ s/``/\\\*\(lq/; + $q =~ s/''/\\\*\(rq/; + $tmp .= $q; + } + } + $str = $tmp . $str; + } + if ( not defined $self->{type} ) { + $str =~ s/(?<!\\) $//mg; + } + + print STDERR "$str\n" if ( $debug{'postrans'} ); + return $str; +} + +sub translate { + my ( $self, $str, $ref, $type ) = ( shift, shift, shift, shift ); + my (%options) = @_; + my $origstr = $str; + + return $str unless ( defined $str ) && length($str); + return $str if ( $str eq "\n" ); + + # Do not translate the strings that only consist of fonts, spaces and + # \&. This is useful because we introduced \& in shiftline. + if ( $str =~ m/^($FONT_RE|\s|\\&)*$/s ) { + do_fonts( $str, $ref || $self->{ref} ); + return $str; + } + + # If a string is quoted, only translate the argument between the + # quotes. + if ( $options{'wrap'} or $str !~ m/\n/s ) { + if ( $str =~ m/^\"(.*)\"$/s and $1 !~ m/(?<!\\)\"/ ) { + $str = '"' . $self->translate( $1, $ref, $type, %options ) . '"'; + $str =~ s/\n"$/"\n/s; + return $str; + } + } + + $str = pre_trans( $self, $str, $ref || $self->{ref}, $type ); + $options{'comment'} .= join( '\n', map { $$_[1] } @comments ); + + # Translate this + $str = $self->SUPER::translate( $str, $ref || $self->{ref}, $type || $self->{type}, %options ); + if ( $options{'wrap'} ) { + my (@paragraph); + @paragraph = split( /\n/, $str ); + if ( defined( $paragraph[0] ) && $paragraph[0] eq '' ) { + shift @paragraph; + } + $str = join( "\n", @paragraph ) . "\n"; + } + $str = post_trans( $self, $str, $ref || $self->{ref}, $type, $options{'wrap'} ); + return $str; +} + +# shortcut +sub t { + return $_[0]->translate( $_[1] ); +} + +# shortcut. +# As a rule of thumb, I do not recode macro names, unless they may be +# followed by other characters. +sub r { + my $self = shift; + my $str = shift; + + # non-breaking spaces + # some non-breaking spaces may have been added during the parsing + $str =~ s/\Q$nbs/\\ /sg; + + return $str; +} + +sub do_paragraph { + my ( $self, $paragraph, $wrapped_mode ) = ( shift, shift, shift ); + + # Following needed because of 'ft' (at least, see ft macro below) + unless ( $paragraph =~ m/\n$/s ) { + my @paragraph = split( /\n/, $paragraph ); + + $paragraph .= "\n" + unless scalar(@paragraph) == 1; + } + + $self->pushline( $self->translate( $paragraph, $self->{ref}, "Plain text", "wrap" => ( $wrapped_mode eq 'YES' ) ) ); +} + +############################# +#### MAIN PARSE FUNCTION #### +############################# +sub parse { + my $self = shift; + my ( $line, $ref ); + my ($paragraph) = ""; # Buffer where we put the paragraph while building + my $wrapped_mode = 'YES'; # Should we wrap the paragraph? Three possible values: + # YES: do wrap + # NO: don't wrap because this paragraph contains indented lines + # this status disapear after the end of the paragraph + # MACRONO: don't wrap because we saw the nf macro. It stays so + # until the next fi macro. + + # We want to change the non-breaking space according to the input + # document charset + $nbs = get_in_nbs( $self->{TT}{'file_in_charset'} ); + + LINE: + undef $self->{type}; + ( $line, $ref ) = $self->shiftline(); + + while ( defined($line) ) { + + # print STDERR "line=$line;ref=$ref"; + chomp($line); + $self->{ref} = "$ref"; + + # print STDERR "LINE=$line<<\n"; + + if ( $line =~ /^[.']/ ) { + die wrap_mod( "po4a::man", dgettext( "po4a", "Unparsable line: %s" ), $line ) + unless ( $line =~ /^([.']+\\*?)(\\["#])(.*)/ + || $line =~ /^([.'])(\S*)(.*)/ ); + my $arg1 = $1; + $arg1 .= $2; + my $macro = $2; + my $arguments = $3; + + if ( $inline{$macro} ) { + $paragraph .= "PO4A-INLINE:" . $line . ":PO4A-INLINE\n"; + goto LINE; + } + + # Split on spaces for arguments, but not spaces within double quotes + my @args = (); + push @args, $arg1; + if ( $macro =~ /^(?:ta|TP|ie|if|de)$/ ) { + + # The number of spaces may be critical for the 'ta' macro, + # and there is no need to split the arguments. + push @args, $arguments; + } else { + push @args, splitargs( $ref, $arguments ); + } + + if ( length($paragraph) ) { + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = $wrapped_mode eq 'NO' ? 'YES' : $wrapped_mode; + } + + # Special case: Don't change these lines + # .\" => comments + # .\# => comments + # ." => comments + # . => empty point on the line + # .tr abcd... + # => substitution like Perl's tr/ac/bd/ on output. + if ( $macro eq '\\"' + || $macro eq '' + || $macro eq 'tr' + || $macro eq '"' + || $macro eq '\\#' ) + { + $self->pushline( $self->r($line) . "\n" ); + goto LINE; + } + + # Special case: + # .nf => stop wrapped mode + # .fi => wrap again + if ( $no_wrap_begin{$macro} or $no_wrap_end{$macro} ) { + if ( $no_wrap_end{$macro} ) { + $wrapped_mode = 'YES'; + } else { + $wrapped_mode = 'MACRONO'; + } + $self->pushline( $self->r($line) . "\n" ); + goto LINE; + } + + # SH resets the wrapping (in addition to starting a section) + if ( $macro eq 'SH' ) { + $wrapped_mode = 'YES'; + } + + unshift @args, $self; + + # Apply macro + $self->{type} = $macro; + + if ( defined( $macro{$macro} ) ) { + &{ $macro{$macro} }(@args); + } else { + if ( defined $unknown_macros ) { + &{$unknown_macros}(@args); + } else { + $self->pushline( $self->r($line) . "\n" ); + die wrap_ref_mod( + $ref, + "po4a::man", + dgettext( + "po4a", + "Unknown macro '%s'. Remove it from the document, or refer to the Locale::Po4a::Man manpage to see how po4a can handle new macros." + ), + $line + ); + } + } + + } elsif ( $line =~ /^ +[^. ]/ ) { + + # (Lines containing only spaces are handled as empty lines) + # Not a macro, but not a wrapped paragraph either + $wrapped_mode = $wrapped_mode eq 'YES' ? 'NO' : $wrapped_mode; + $paragraph .= $line . "\n"; + } elsif ( $line =~ /^[^.].*/ && $line !~ /^ *$/ ) { + + # (Lines containing only spaces are handled latter as empty lines) + if ( $line =~ /^\\"/ ) { + + # special case: the line is entirely a comment, keep the + # comment. + # NOTE: comment could also be found in the middle of a line. + # From info groff: + # Escape: \": Start a comment. Everything to the end of the + # input line is ignored. + $self->pushline( $self->r($line) . "\n" ); + goto LINE; + } elsif ( $line =~ /^\\#/ ) { + + # Special groff comment. Do not keep the new line + goto LINE; + } else { + + # Not a macro + # * first, try to handle some "output line continuation" (\c) + $paragraph =~ s/\\c *(($FONT_RE)?)\n?$/$1/s; + + # * append the line to the current paragraph + $paragraph .= $line . "\n"; + } + } else { #empty line, or line containing only spaces + if ( length($paragraph) ) { + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + } + $wrapped_mode = $wrapped_mode eq 'NO' ? 'YES' : $wrapped_mode; + $self->pushline( $line . "\n" ); + } + + # finally, we did not reach the end of the paragraph. The comments + # belong to the current paragraph. + push @comments, @next_comments; + @next_comments = (); + + # Reinit the loop + ( $line, $ref ) = $self->shiftline(); + undef $self->{type}; + } + + if ( length($paragraph) ) { + do_paragraph( $self, $paragraph, $wrapped_mode ); + $wrapped_mode = $wrapped_mode eq 'NO' ? 'YES' : $wrapped_mode; + $paragraph = ""; + } + + # flush the last comments + push @comments, @next_comments; + @next_comments = @comments; + @comments = (); + for my $c (@next_comments) { + $self->pushline( $self->r("$$c[0]\\\"$$c[1]\n") ); + } + + # reinitialize the module + @next_comments = (); + set_regular("R"); + set_font("R"); + set_font("R"); + $mdoc_mode = 0; +} # end of main + +# Cache the results of get_in_nbs and get_out_nbs +{ + my $last_in_charset; + my $last_in_nbs; + + # get_in_nbs(charset) + # Return the representation of a non breaking space in the input charset + # (given in argument). + # or PO4A:VERY_IMPROBABLE_STRING_USEDFOR_NON-BREAKING-SPACES if this + # character doesn't exist in this charset. + sub get_in_nbs() { + my $charset = shift; + + return $last_in_nbs + if ( defined $charset + and defined $last_in_charset + and $charset eq $last_in_charset ); + + my $nbs = "\xA0"; + my $length; + if ( defined $charset and length $charset ) { + eval( + "\$length = Encode::from_to(\$nbs, \"latin-1\", + \$charset, + 1)" + ); + } + + # fall back solution + $nbs = "PO4A:VERY_IMPROBABLE_STRING_USEDFOR_NON-BREAKING-SPACES" + unless defined $length; + $last_in_charset = $charset; + $last_in_nbs = $nbs; + + return $last_in_nbs; + } + + my $last_out_charset; + my $last_out_nbs; + + # get_out_nbs(charset) + # Return the representation of a non breaking space in the output charset + # (given in argument). + # or undef if this character doesn't exist in this charset. + sub get_out_nbs() { + my $charset = shift; + + return $last_out_nbs + if ( defined $charset + and defined $last_out_charset + and $charset eq $last_out_charset ); + + my $nbs = "\xA0"; + my $length; + if ( defined $charset and length $charset ) { + eval( + "\$length = Encode::from_to(\$nbs, \"latin-1\", + \$charset, + 1)" + ); + } + + # fall back solution + undef $nbs + unless defined $length; + $last_out_charset = $charset; + $last_out_nbs = $nbs; + + return $last_out_nbs; + } + +} + +# We can't push the header in the first line of the document, as in the +# other module, because the first line may contain indications on how the +# man page must be processed. +sub docheader { + return ""; +} + +# The header is pushed just before the .TH macro (this macro is mandatory +# and must be specified at the begining (there may be macro definitions +# before). +sub push_docheader { + my $self = shift; + $self->pushline( ".\\\"*******************************************************************\n" + . ".\\\"\n" + . ".\\\" This file was generated with po4a. Translate the source file.\n" + . ".\\\"\n" + . ".\\\"*******************************************************************\n" ); +} + +# Split request's arguments. +# see: +# info groff --index-search "Request Arguments" +sub splitargs { + my ( $ref, $arguments ) = ( $_[0], $_[1] ); + my @args = (); + my $buffer = ""; + my $escaped = 0; + if ( !defined $arguments ) { + return @args; + } + + # change non-breaking space before to ensure that split does what we want + # We change them back before pushing into the arguments. The one which + # will be translated will have the same change again (in pre_trans and + # post_trans), but the ones which won't get translated are not changed + # anymore. Let's play safe. + $arguments =~ s/\\ /$nbs/g; + $arguments =~ s/^ +//; + $arguments =~ s/\\&"/\\(dq/g; + $arguments =~ s/^ *//; + while ( length $arguments ) { + if ( $arguments =~ s/^"((?:[^"]|"")*)"(?!") *// ) { + my $a = $1; + $a =~ s/""/"/g if defined $a; + push @args, $a; + } elsif ( $arguments =~ s/^"((?:[^"]|"")*) *$// ) { + + # Unterminated quote, but this seems to be handled by removing + # the trailing spaces and closing the quotes. + my $a = $1; + $a =~ s/""/"/g if defined $a; + push @args, $a; + } elsif ( $arguments =~ s/^([^ ]+) *// ) { + push @args, $1; + } else { + die wrap_ref_mod( $ref, "po4a::man", dgettext( "po4a", "Cannot parse command arguments: %s" ), $arguments ); + } + } + if ( $debug{'splitargs'} ) { + print STDERR "ARGS="; + map { print STDERR "$_^" } @args; + print STDERR "\n"; + } + + return @args; +} + +{ + #static variables + # font stack. + # Keep track of the current font (because a font modifier can + # stay open at the end of a paragraph), and the previous font (to + # handle \fP) + my $current_font = "R"; + my $previous_font = "R"; + + # $regular_font describe the "Regular" font, which is the font used + # when there is no font modifier. + # For example, .SS use a Bold font, and thus in + # .SS This is a \fRsubsection\fB header + # the \fR and \fB font modifiers have to be kept. + my $regular_font = "R"; + + # Set the regular font + # It takes the regular font in argument (when no argument is provided, + # it uses "R"). + sub set_regular { + print STDERR "set_regular('@_')\n" + if ( $debug{'fonts'} ); + set_font(@_); + $regular_font = $current_font; + } + + sub set_font { + print STDERR "set_font('@_')\n" + if ( $debug{'fonts'} ); + my $saved_previous = $previous_font; + $previous_font = $current_font; + + if ( !defined $_[0] ) { + $current_font = "R"; + } elsif ( $_[0] =~ /^(P|\[\]|\[P\])/ ) { + $current_font = $saved_previous; + } elsif ( length( $_[0] ) == 1 ) { + $current_font = $_[0]; + } elsif ( length( $_[0] ) == 2 ) { + $current_font = "($_[0]"; + } else { + $current_font = "[$_[0]]"; + } + print STDERR "r:'$regular_font', p:'$previous_font', c:'$current_font'\n" + if ( $debug{'fonts'} ); + } + + sub do_fonts { + + # one argument: a string + my ( $str, $ref ) = ( shift, shift ); + print STDERR "do_fonts('$str', '$ref')=" + if ( $debug{'fonts'} ); + + # restore the font stack + $str = "\\f$previous_font\\f$current_font" . $str; + + # In order to be able to split on /\\f/, without problem with + # \\foo, groff backslash (\\) are changed to the (equivalent) + # form: \e (this should be done in shiftline). + my @array1 = split( /\\f/, $str ); + + $str = shift @array1; # The first element is always empty because + # the $current_font was put at the beginning + # $last_font indicates the last font that was appended to the buffer. + # It differ from $current_font because concecutive identical fonts + # are not written in the buffer. + my $last_font = $regular_font; + + foreach my $elem (@array1) { + + # Do not touch the fonts in the inline macros + # These inline macros may have their argument in bold or italic, + # we can't know. + if ( $str =~ m/E<\.([^>]|E<gt>|E<lt>)*$/s ) { + + # We can't use \\f here, otherwise the font simplifier regexp + # will use the fonts of the inline macros. + $str .= "PO4A-FAKE-FONT" . $elem; + next; + } + + # Replace \fP by the exact font (because some font modifiers will + # be removed or added, which will break groff's font stack) + $elem =~ s/^(P|\[\]|\[P\])/$previous_font/s; + + # change \f1 to \fR, etc. + # Those fonts are defined in the DESC file, which + # may depend on the groff device. + # fonts 1 to 4 are usually mapped to R, I, B, BI + # TODO: use an array for the font positions. This + # array should be updated by .fp requests. + $elem =~ s/^1/R/; + $elem =~ s/^2/I/; + $elem =~ s/^3/B/; + $elem =~ s/^4/(BI/; + + if ( $elem =~ /^([1-4]|B|I|R|\(..|\[[^]]*\]|L)(.*)$/s ) { + + # Each element should now start by a recognized font modifier + my $new_font = $1; + my $arg = $2; + + # Update the font stack + $previous_font = $current_font; + $current_font = $new_font; + + if ( $new_font eq $last_font ) { + + # continue with the same font. + $str .= $arg; + } else { + + # A new font is used, update $last_font + $last_font = $new_font; + $str .= "\\f" . $elem; + } + } else { + die wrap_ref_mod( $ref, "po4a::man", dgettext( "po4a", "Unsupported font in: '%s'." ), "\\f" . $elem ); + } + } + + # Do some simplification (they don't change the font stack) + # Remove empty font modifiers at the end + $str =~ s/($FONT_RE)*$//s; + + # close any font modifier + if ( $str =~ /.*($FONT_RE)(.*?)$/s && $1 ne "\\f$regular_font" ) { + $str =~ s/(\n?)$/\\f$regular_font$1/; + } + + # remove fonts with empty argument + while ( $str =~ /($FONT_RE){2}/ ) { + + # while $str has two consecutive font modifiers + # only keep the second one. + $str =~ s/($FONT_RE)($FONT_RE)/$2/s; + } + + # when there are two consecutive switches to the regular font, + # remove the last one. + while ( + $str =~ /^(.*)\\f$regular_font # anything followed by a + # regular font + ((?:\\(?!f)|[^\\])*) # the text concerned by + # this font (i.e. without any + # font modifier, i.e. it + # contains no '\' followed by + # an 'f') + \\f$regular_font # another regular font + (.*)$/sx + ) + { + $str = "$1\\f$regular_font$2$3"; + } + + # the regular font modifier at the beginning of the string is not + # needed (the do_fonts subroutine ensure that every paragraph ends with + # the regular font. + if ( $str =~ /^(.*?)\\f$regular_font(.*)$/s && $1 !~ /$FONT_RE/ ) { + $str = "$1$2"; + } + + # Use special markup for common fonts, so that translators don't see + # groff's font modifiers + my $PO_FONTS = "B|I|R|\\(CW"; + + # remove the regular font from this list + $PO_FONTS =~ s/^$regular_font\|//; + $PO_FONTS =~ s/\|$regular_font\|/|/; + $PO_FONTS =~ s/\|$regular_font$//; + while ( + $str =~ /^(.*?) # $1: anything (non greedy: as + # few as possible) + \\f($PO_FONTS) # followed by a common font + # modifier ($2) + ((?:\\[^f]|[^\\])*) # $3: the text concerned by + # this font (i.e. without any + # font modifier, i.e. it + # contains no '\' followed by + # an 'f') + \\f # the next font modifier + (.*)$/sx + ) + { # $4: anything up to the end + my ( $begin, $font, $arg, $end ) = ( $1, $2, $3, $4 ); + if ( $end =~ /^$regular_font(.*)$/s ) { + + # no need to add a switch to $regular_font + $str = $begin . "$font<$arg>$1"; + } else { + $str = $begin . "$font<$arg>\\f$end"; + } + } + $str =~ s/\(CW</CW</sg; + $str =~ s/PO4A-FAKE-FONT/\\f/sg; + + print STDERR "'$str'\n" if ( $debug{'fonts'} ); + return $str; + } +} + +########################################## +#### DEFINITION OF THE MACROS WE KNOW #### +########################################## +# Each sub is passed self as first arg, +# plus the args present on the roff line +# ie, <<.TH LS "1" "October 2002" "ls (coreutils) 4.5.2" "User Commands">> +# is passed (".TH","LS","1","October 2002","ls (coreutils) 4.5.2","User Commands") +# Macro name is also passed, because .B (bold) will be encoded in pod format (and mangeled). +# They should return a list, which will be join'ed(' ',..) +# or undef when they don't want to add anything + +# Some well known macro handling + +# For macro taking only one argument, but people may forget the quotes. +# Example: >>.SH Another Section<< which should be >>.SH "Another Section"<< +sub translate_joined { + my ( $self, $macroname, $macroarg ) = ( shift, shift, join( " ", @_ ) ); + + #section# .S[HS] name + + $self->pushmacro( $macroname, $self->t($macroarg) ); +} + +# For macro taking several arguments, having to be translated separately +sub translate_each { + my ( $self, $first ) = ( shift, 0 ); + $self->pushmacro( map { $first++ ? $self->t($_) : $_ } @_ ); +} + +# For macro which shouldn't be given any arg +sub noarg { + my $self = shift; + warn "Macro $_[0] does not accept any argument\n" + if ( defined( $_[1] ) ); + $self->pushmacro(@_); +} + +# For macro whose arguments shouldn't be translated +sub untranslated { + my ( $self, $first ) = ( shift, 0 ); + $self->pushmacro( map { $first++ ? $self->r($_) : $_ } @_ ); +} + +### +### man 7 man +### + +$macro{'TH'} = sub { + my $self = shift; + my ( $th, $title, $section, $date, $source, $manual ) = @_; + + #Preamble#.TH title section date source manual + # print STDERR "TH=$th;titre=$title;sec=$section;date=$date;source=$source;manual=$manual\n"; + + # Reset the memories + $self->push_docheader(); + + $self->pushmacro( $th, $self->t($title), $section, $self->t($date), $self->t($source), $self->t($manual) ); +}; + +# .SS t Subheading t (like .SH, but used for a subsection inside a section). +$macro{'SS'} = $macro{'SH'} = sub { + if ( !defined $_[2] ) { + + # The argument is on the next line. + my ( $self, $macroname ) = ( shift, shift ); + my ( $l2, $ref2 ) = $self->shiftline(); + if ( $l2 =~ /^\./ ) { + $self->SUPER::unshiftline( $l2, $ref2 ); + } else { + chomp($l2); + $self->pushmacro( $macroname, $self->t($l2) ); + } + return; + } else { + return translate_joined(@_); + } +}; + +# Macro: .SM [text] +# Set the text on the same line or the text on the next line in a +# font that is one point size smaller than the default font. +# FIXME: Maybe we should find a better way to represent this (inline is +# not really nice in the PO). +$inline{'SM'} = 1; + +# .SP n Skip n lines (I think) +$macro{'SP'} = \&untranslated; + +#Normal Paragraphs +# .LP Same as .PP (begin a new paragraph). +# .P Same as .PP (begin a new paragraph). +# .PP Begin a new paragraph and reset prevailing indent. +#Relative Margin Indent +# .RS i Start relative margin indent - moves the left margin i to the right +# As a result, all following paragraph(s) will be indented until +# the corresponding .RE. +# .RE End relative margin indent. +$macro{'LP'} = $macro{'P'} = $macro{'PP'} = sub { + noarg(@_); + + # From info groff: + # The font size and shape are reset to the default value (10pt roman if no + # `-rS' option is given on the command line). + set_font("R"); +}; +$macro{'RE'} = \&noarg; +$macro{'RS'} = \&untranslated; + +sub parse_tp_tq { + my $self = shift; + my ( $line, $l2, $ref2 ); + $line .= $_[0] if defined( $_[0] ); + $line .= ' ' . $_[1] if defined( $_[1] ); + $self->pushline( $self->r($line) . "\n" ); + + ( $l2, $ref2 ) = $self->shiftline(); + chomp($l2); + while ( $l2 =~ /^\.PD/ ) { + $self->pushline( $self->r($l2) . "\n" ); + ( $l2, $ref2 ) = $self->shiftline(); + chomp($l2); + } + + # Deal with \c line continuation in .TP or .TQ + while ( $l2 =~ s/\\c$// ) { + my ( $l3, $ref3 ) = $self->shiftline(); + $l2 .= $l3; + chomp($l2); + } + if ( $l2 =~ /^([.'][\t ]*([^\t ]*))(?:([\t ]+)(.*)$|$)/ ) { + if ( $inline{$2} ) { + my $tmp = ""; + if ( defined $4 and length $4 ) { + $tmp = $3 . $self->t( $4, "wrap" => 0 ); + } + $self->pushline( $1 . $tmp . "\n" ); + } else { + + # If the line after a .TP is a macro, + # let the parser do it's job. + # Note: use Transtractor unshiftline for now. This may require an + # implementation of the man module's own implementation. + # This may be a problem if, for example, the line resulted + # of a line continuation. + $self->SUPER::unshiftline( $l2, $ref2 ); + } + } else { + $self->pushline( $self->t( $l2, "wrap" => 0 ) . "\n" ); + } +} + +#Indented Paragraph Macros +# .TP i Begin paragraph with hanging tag. The tag is given on the next line, +# but its results are like those of the .IP command. +$macro{'TP'} = sub { + parse_tp_tq(@_); + + # From info groff: + # Note that neither font shape nor font size of the label [i.e. argument + # or first line] is set to a default value; on the other hand, the rest of + # the text has default font settings. + set_font("R"); +}; + +# Indented Paragraph Macros +# .TQ Indicates continuation of the .TP labels that precede the indented +# paragraph. +$macro{'TQ'} = sub { + warn "Macro $_[1] does not accept any argument\n" + if ( defined( $_[2] ) ); + + parse_tp_tq(@_); +}; + +# Indented Paragraph Macros +# .HP i Begin paragraph with a hanging indent (the first line of the paragraph +# is at the left margin of normal paragraphs, and the rest of the para- +# graph's lines are indented). +# +$macro{'HP'} = sub { + untranslated(@_); + + # From info groff: + # Font size and face are reset to their default values. + set_font("R"); +}; + +# Indented Paragraph Macros +# .IP [designator] [nnn] +# Sets up an indented paragraph, using designator as a tag to mark +# its beginning. The indentation is set to nnn if that argument is +# supplied (default unit is `n'), otherwise the default indentation +# value is used. Font size and face of the paragraph (but not the +# designator) are reset to its default values. To start an indented +# paragraph with a particular indentation but without a designator, +# use `""' (two doublequotes) as the second argument. + +# Note that the above is the groff_man(7) version, which of course differs radically +# from man(7). In one case, the designator is optional and the nnn is not, and the +# contrary in the other. This implies that when sticking to groff_man(7), we should +# mark an uniq argument as translatable. + +$macro{'IP'} = sub { + my $self = shift; + if ( defined $_[2] ) { + $self->pushmacro( $_[0], $self->t( $_[1] ), $_[2] ); + } elsif ( defined $_[1] ) { + $self->pushmacro( $_[0], $self->t( $_[1] ) ); + } else { + $self->pushmacro(@_); + } + + # From info groff: + # Font size and face of the paragraph (but not the designator) are reset + # to their default values. + set_font("R"); +}; + +# Hypertext Link Macros +# .UR u Begins a hypertext link to the URI (URL) u; it will end with +# the corresponding UE command. When generating HTML this should +# translate into the HTML command <A HREF="u">. +# There is an exception: if u is the special value ":", then no +# hypertext link of any kind will be generated until after the +# closing UE (this permits disabling hypertext links in +# phrases like LALR(1) when linking is not appropriate). +# .UE Ends the corresponding UR command; when generating HTML this +# should translate into </A>. +# .UN u Creates a named hypertext location named u; do not include a +# corresponding UE command. +# When generating HTML this should translate into the HTML command +# <A NAME="u" id="u"> </A> +# +# E-Mail address Macros +# .MT m Begins a mailto link to the adress m; it will end with +# the corresponding ME command. When generating HTML this should +# translate into the HTML command <A HREF="mailto:m">. +# .ME Ends the corresponding MT command; when generating HTML this +# should translate into </A>. +$inline{'UR'} = $inline{'MT'} = 1; +$inline{'UE'} = $inline{'ME'} = 1; +$macro{'UN'} = \&translate_joined; + +# Macros to describe command synopses +# +# These macros are a convenience for authors. They also assist +# automated translation tools and help browsers in recognizing command +# synopses and treating them differently from running text. +# +# .OP key value +# Describe an optional command argument. The arguments of this +# macro are set surrounded by option braces in the default Roman +# font; the first argument is printed with a bold face, while +# the second argument is typeset as italic. +# +# .SY command +# Begin synopsis. Takes a single argument, the name of a +# command. Text following, until closed by .YS, is set with a +# hanging indentation with the width of command plus a space. +# This produces the traditional look of a Unix command synopsis. +# +# .YS This macro restores normal indentation at the end of a command +# synopsis. +$macro{'OP'} = \&translate_each; +$macro{'SY'} = \&translate_joined; +$macro{'YS'} = \&noarg; + +# Miscellaneous Macros +# .DT Reset tabs to default tab values (every 0.5 inches); does not +# cause a break. +# .PD d Set inter-paragraph vertical distance to d (if omitted, d=0.4v); +# does not cause a break. +$macro{'DT'} = \&noarg; +$macro{'PD'} = \&untranslated; + +# Indexing term (printed on standard error). +# (ms macros) +$macro{'IX'} = \&translate_each; + +### +### groff macros +### +# .br +$macro{'br'} = \&noarg; + +# .bp N Eject current page and begin new page. +$macro{'bp'} = \&untranslated; + +# .ad Begin line adjustment for output lines in current adjust mode. +# .ad c Start line adjustment in mode c (c=l,r,b,n). +$macro{'ad'} = \&untranslated; + +my %ds_variables; + +# .ds stringvar anything +# Set stringvar to anything. +$macro{'ds'} = sub { + my ( $self, $m ) = ( shift, shift ); + my $name = shift; + my $string = "@_"; + $ds_variables{$name} = $string; + + # indicate to which variable this corresponds. The translator can + # find references to this string in the translation "\*(name" or + # "\*[name]" + $self->{type} = "ds $name"; + $self->pushline( $m . " " . $self->r($name) . " " . $self->translate($string) . "\n" ); +}; + +# .de macro [end] Define or redefine macro until end is encountered (end=.. by default). +# .de1 macro [end] Define or redefine macro until end is encountered (end=.. by default), turns off compatibility mode while executing the macro. +# .dei macro [end] Define or redefine macro until end is encountered (end=.. by default) substituting parameters with '.ds' content +# .dei1 macro [end] Define or redefine macro until end is encountered (end=.. by default) substituting parameters and turning compatibility off + +$macro{'de'} = $macro{'de1'} = $macro{'dei'} = $macro{'dei1'} = sub { + my $self = shift; + if ( $groff_code ne "fail" ) { + my $paragraph = "@_"; + my $end = "."; + my $comment; + my $macroname = $_[1]; + $macroname = $ds_variables{$macroname} if ( $_[0] eq "dei" || $_[0] eq "dei1" ); + if ( $macroname =~ m/(.*?)\\"(.*)$/ ) { + + # Remove comments after the macro name (Debian's #1017837) + ( $macroname, $comment ) = ( $1, $2 ); + } + $macroname =~ s/^ *//; + $macroname =~ s/ *$//; + unless ( exists $macro{$macroname} || exists $inline{$macroname} ) { + if ( defined $comment ) { + $comment =~ s/^ *//; + $comment =~ s/ *$//; + warn wrap_mod( + "po4a::man", + dgettext( + "po4a", + "This page defines a new macro '%s' with '%s' (inline comment: %s), but you did not specify the expected po4a behavior " + . "when '%s' is used. You will get an error if this macro is actually used in your page.\n" + . "Add your macro to one of the '%s', '%s', '%s', '%s', '%s' or '%s' parameters to avoid issues.\n" + . "For example, passing '%s' to po4a will ensure that the defined macro remains hidden from translators.\n" + . "Please refer to the manpage of Locale::Po4a::Man for more info on these parameters.\n" + ), + $macroname, + $_[0], + $comment, + $macroname, + 'untranslated', + 'noarg', + 'translate_joined', + 'translate_each', + 'no_wrap', + 'inline', + "-o untranslated=$macroname" + ); + } else { + warn wrap_mod( + "po4a::man", + dgettext( + "po4a", + "This page defines a new macro '%s' with '%s', but you did not specify the expected po4a behavior " + . "when '%s' is used. You will get an error if this macro is actually used in your page.\n" + . "Add your macro to one of the '%s', '%s', '%s', '%s', '%s' or '%s' parameters to avoid issues.\n" + . "For example, passing '%s' to po4a will ensure that the defined macro remains hidden from translators.\n" + . "Please refer to the manpage of Locale::Po4a::Man for more info on these parameters.\n" + ), + $macroname, + $_[0], + $macroname, + 'untranslated', + 'noarg', + 'translate_joined', + 'translate_each', + 'no_wrap', + 'inline', + "-o untranslated=$macroname" + ); + } + } + if ( $paragraph =~ /^[.'][\t ]*d[ei1]*[\t ]+([^\t ]+)[\t ]+([^\t ]+)[\t ]$/ ) { + $end = $2; + $end = $ds_variables{$end} if ( $_[0] eq "dei" || $_[0] eq "dei1" ); + } + my ( $line, $ref ) = $self->SUPER::shiftline(); + chomp $line; + $paragraph .= "\n" . $line; + while ( defined($line) and $line ne ".$end" ) { + ( $line, $ref ) = $self->SUPER::shiftline(); + if ( defined $line ) { + chomp $line; + $paragraph .= "\n" . $line; + } + } + $paragraph .= "\n"; + if ( $groff_code eq "verbatim" ) { + $self->pushline( $self->r($paragraph) ); + } else { + $self->pushline( $self->translate( $paragraph, $self->{ref}, "groff code", "wrap" => 0 ) ); + } + } else { + die wrap_ref_mod( + $self->{ref}, + "po4a::man", + dgettext( + "po4a", + "This page defines a new macro with '%s'. Since po4a is not a real groff parser, this is not supported. " + . "The option '%s' gets these macros copied verbatim in the translated file, but it's not very robust. " + . "'%s' shows these macros to the translators, but groff macros are not user-friendly for translators." + ), + $_[0], + "groff_code=verbatim", + "groff_code=translate" + ); + } +}; + +# .fam Return to previous font family. +# .fam name Set the current font family to name. +$macro{'fam'} = \&untranslated; + +# .fc a b Set field delimiter to a and pad character to b. +$macro{'fc'} = \&untranslated; + +# .ft font Change to font name or number font; +$macro{'ft'} = sub { + if ( defined $_[2] ) { + set_font( $_[2] ); + } else { + set_font("P"); + } +}; + +# .hc c Set up additional hyphenation indicator character c. +$macro{'hc'} = \&untranslated; + +# .hy Enable hyphenation (see nh) +# .hy N Switch to hyphenation mode N. +# .hym n Set the hyphenation margin to n (default scaling indicator m). +# .hys n Set the hyphenation space to n. +$macro{'hy'} = $macro{'hym'} = $macro{'hys'} = \&untranslated; + +# .ie cond anything If cond then anything else goto .el. +# .if cond anything If cond then anything; otherwise do nothing. +$macro{'ie'} = $macro{'if'} = sub { + my $self = shift; + if ( $groff_code ne "fail" ) { + my $m = $_[0]; + my $paragraph = "@_"; + my ( $line, $ref ); + my $count = 0; + $count = 1 if ( $paragraph =~ m/(?<!\\)\\\{/s ); + while (( $paragraph =~ m/(?<!\\)\\$/s ) + or ( $count > 0 ) ) + { + ( $line, $ref ) = $self->SUPER::shiftline(); + chomp $line; + $paragraph .= "\n" . $line; + $count += 1 if ( $line =~ m/(?<!\\)\\\{/s ); + $count -= 1 if ( $line =~ m/(?<!\\)\\\}/s ); + } + if ( $m eq '.ie' ) { + + # The .el line may be preceded by comments + ( $line, $ref ) = $self->SUPER::shiftline(); + chomp $line; + while ( $line =~ m/^[.']\\"/ ) { + $paragraph .= "\n" . $line; + ( $line, $ref ) = $self->SUPER::shiftline(); + chomp $line; + } + + if ( $line !~ m/^[.'][ \t]*el(\s|\\\{)/ ) { + die wrap_ref_mod( $self->{ref}, "po4a::man", + dgettext( "po4a", "The .ie macro must be followed by a .el macro." ) ); + } + my $paragraph2 = $line; + $count = 0; + $count = 1 if ( $line =~ m/(?<!\\)\\\{/s ); + while (( $paragraph2 =~ m/(?<!\\)\\$/s ) + or ( $count > 0 ) ) + { + ( $line, $ref ) = $self->SUPER::shiftline(); + chomp $line; + $paragraph2 .= "\n" . $line; + $count += 1 if ( $line =~ m/(?<!\\)\\\{/s ); + $count -= 1 if ( $line =~ m/(?<!\\)\\\}/s ); + } + $paragraph .= "\n" . $paragraph2; + } + $paragraph .= "\n"; + if ( $groff_code eq "verbatim" ) { + $self->pushline( $self->r($paragraph) ); + } else { + $self->pushline( $self->translate( $paragraph, $self->{ref}, "groff code", "wrap" => 0 ) ); + } + } else { + die wrap_ref_mod( + $self->{ref}, + "po4a::man", + dgettext( + "po4a", + "This page uses conditionals with '%s'. Since po4a is not a real groff parser, this is not supported by default. " + . "The option '%s' gets these macros copied verbatim in the translated file, but it's not very robust. " + . "'%s' shows these macros to the translators, but groff macros are not user-friendly for translators." + ), + $_[0], + "groff_code=verbatim", + "groff_code=translate" + ); + } +}; + +# .el anything Display the parameter (part of a if-then-else with .ie) +$macro{'el'} = \&translate_joined; + +# .in N Change indent according to N (default scaling indicator m). +$macro{'in'} = \&untranslated; + +# .ig end Ignore text until .end. +$macro{'ig'} = sub { + my $self = shift; + $self->pushmacro(@_); + my ( $name, $end ) = ( shift, shift || '' ); + $end = '' if ( $end =~ m/^\\\"/ ); + my ( $line, $ref ) = $self->shiftline(); + while ( defined($line) ) { + $self->pushline( $self->r($line) ); + last if ( $line =~ /^\.$end\./ ); + ( $line, $ref ) = $self->shiftline(); + } +}; + +# .it n macro Set an input line trap. +$macro{'it'} = \&untranslated; + +# .lf N file Set input line number to N and filename to file. +$macro{'lf'} = \&untranslated; + +# .ll N Set line length according to N +$macro{'ll'} = \&untranslated; + +# .nh disable hyphenation (see hy) +$macro{'nh'} = \&untranslated; + +# .na No Adjusting (see ad) +$macro{'na'} = \&untranslated; + +# .ne N Need N vertical space +$macro{'ne'} = \&untranslated; + +# .nr register N M +# Define or modify register +$macro{'nr'} = \&untranslated; + +# .ps N Point size; same as \s[N] +$macro{'ps'} = \&untranslated; + +# .so filename Include source file. +# .mso groff variant of .so (other search path) +$macro{'so'} = $macro{'mso'} = sub { + warn wrap_mod( "po4a::man", + dgettext( "po4a", "This page includes another file with '%s'. Do not forget to translate this file ('%s')." ), + $_[1], $_[2] ); + my $self = shift; + $self->pushmacro(@_); +}; + +# .sp Skip one line vertically. +# .sp N Space vertical distance N +$macro{'sp'} = \&untranslated; + +# .vs [space] +# .vs +space +# .vs -space +# Change (increase, decrease) the vertical spacing by SPACE. The +# default scaling indicator is `p'. +$macro{'vs'} = \&untranslated; + +# .ta T N Set tabs after every position that is a multiple of N. +# .ta n1 n2 ... nn T r1 r2 ... rn +# Set tabs at positions n1, n2, ..., nn, [...] +$macro{'ta'} = sub { + + # In some cases, a ta request can contain a translatable argument. + # FIXME: detect those cases (something like 5i does not need to be + # translated) + my ( $self, $m ) = ( shift, shift ); + my $line = "@_"; + $line =~ s/^ +//; + $self->pushline( $m . " " . $self->translate( $line, $self->{ref}, 'ta' ) . "\n" ); +}; + +# .ti +N Temporary indent next line (default scaling indicator m). +$macro{'ti'} = \&untranslated; + +### +### tbl macros +### +$macro{'TS'} = sub { + my $self = shift; + my ( $in_headers, $tab, $buffer ) = ( 1, "\t", "" ); + my ( $in_textblock, $preline, $postline ) = ( 0, "", "" ); + my ( $line, $ref ) = $self->shiftline(); + my @options; + + # Push table start + $self->pushmacro(@_); + while ( defined($line) ) { + if ( $line =~ /^\.TE/ ) { + + # Table end + $self->pushline( $self->r($line) ); + return; + } + if ($in_headers) { + if ( $line =~ /;$/ ) { # global options line + if ( $line =~ /\btab\s*\((.)\)/ ) { + $tab = $1; + } + } elsif ( $line =~ /\.$/ ) { + $in_headers = 0; + } + $self->pushline( $self->r($line) ); + } elsif ( $in_textblock && $line =~ /^T}\s*/ ) { # end of text block + $in_textblock = 0; + $preline = $&; # save the `T}' marker to be output later + $line = $'; # save the remaing part of the line + # Drop any EOL from entry to be translated and save it for + # output below. + if ( chomp $buffer ) { + $postline .= "\n"; + } + $self->pushline( $self->translate( $buffer, $ref, 'tbl table' ) . $postline ); + $buffer = $postline = ""; + next; # continue processing with the remaining part of the line + } elsif ( $in_textblock && $line =~ /^[.']/ ) { + + # TODO: properly handle macros inside text blocks, currently we mark them + # for translations just like the previous version did + $self->pushline( $self->translate( $buffer, $ref, 'tbl table' ) ); + $self->pushline( $self->translate( $line, $ref, 'tbl table' ) ); + $buffer = ""; + } elsif ( $line =~ /\\$/ || $in_textblock ) { + + # Lines are continued on \ at the end of line + $buffer .= $line; + } else { + if ( $line =~ s/\s*T\{\s*$// ) { # start of text block + $in_textblock = 1; + $postline = $&; # save the `T{' to be outputed below + } + + $buffer .= $line; + + # Drop any EOL from entry to be translated and save it for + # output below. + if ( chomp $buffer ) { + $postline .= "\n"; + } + + # Arguments to translate are separated by the table's tab + # character (\t by default). We must be careful to preserve + # empty trailing fields, since in particular a text block is + # likely to show up as an empty trailing field here. + $self->pushline( $preline + . join( $tab, map { $self->translate( $_, $ref, 'tbl table' ) } split( /\Q$tab/, $buffer, -1 ) ) + . $postline ); + + $buffer = $preline = $postline = ""; + + } + ( $line, $ref ) = $self->shiftline(); + } +}; + +### +### info groff +### + +## Builtin register, of course they do not need to be translated + +$macro{'F'} = $macro{'H'} = $macro{'V'} = $macro{'A'} = $macro{'T'} = \&untranslated; + +## ms package +## +# +# Displays and keeps. None of these macro accept a translated argument +# (they allow to make blocks of text which cannot be broken by new page) + +$macro{'DS'} = $macro{'LD'} = $macro{'DE'} = \&untranslated; +$macro{'ID'} = $macro{'BD'} = $macro{'CD'} = \&untranslated; +$macro{'RD'} = $macro{'KS'} = $macro{'KE'} = \&untranslated; +$macro{'KF'} = $macro{'B1'} = $macro{'B2'} = \&untranslated; +$macro{'DA'} = \&translate_joined; + +# .pc c Change page number character +$macro{'pc'} = \&translate_joined; + +# .ns Disable .sp and such +# .rs Enable them again +$macro{'ns'} = $macro{'rs'} = \&untranslated; + +# .cs font [width [em-size]] +# Switch to and from "constant glyph space mode". +$macro{'cs'} = \&untranslated; + +# .ss word_space_size [sentence_space_size] +# Change the minimum size of a space between filled words. +$macro{'ss'} = \&untranslated; + +# .ce Center one line horizontally +# .ce N Center N lines +# .ul N Underline N lines (but not the spaces) +# .cu N Underline N lines (even the spaces) +$macro{'ce'} = $macro{'ul'} = $macro{'cu'} = sub { + my $self = shift; + if ( defined $_[1] ) { + if ( $_[1] <= 0 ) { + + # disable centering, underlining, ... + $self->pushmacro( $_[0] ); + } else { + + # All of these are not handled yet because the number of line may change + # during the translation + die wrap_mod( + "po4a::man", + dgettext( + "po4a", + "This page uses the '%s' request with the number of lines in argument. This is not supported yet." + ), + $_[0] + ); + } + } else { + $self->pushmacro( $_[0] ); + } +}; + +# .ec [c] +# Set the escape character to C. With no argument the default +# escape character `\' is restored. It can be also used to +# re-enable the escape mechanism after an `eo' request. +$macro{'ec'} = sub { + my $self = shift; + if ( defined $_[1] ) { + die wrap_mod( + "po4a::man", + dgettext( + "po4a", "This page uses the '%s' request. This request is only supported when no argument is provided." + ), + $_[0] + ); + } else { + $self->pushmacro( $_[0] ); + } +}; + +### +### BSD compatibility macros: .AT and .UC +### (define the version of Berkley used) +### FIXME: the header ("3rd Berkeley Distribution" or such) declared +### by this macro isn't translatable we may want to remove +### this from the generated manpage, and declare our own header +### +$macro{'UC'} = $macro{'AT'} = \&untranslated; + +# Request: .hw word1 word2 ... +# Define how WORD1, WORD2, etc. are to be hyphenated. The words +# must be given with hyphens at the hyphenation points. +# +# If the English page needs to specify how a word must be hyphenated, the +# translated page may also have this need. +$macro{'hw'} = \&translate_each; + +############################################################################# +# +# mdoc macros +# +# The macros are defined in mdoc(7) and groff_mdoc(7) +# +# TBC: Should the font processing be disabled in the mdoc mode? +############################################################################# +# FIXME: Maybe we should verify that the page is an mdoc page +# (add a flag in Dd, and always check that this flag is set in the +# other mdoc macros) +sub translate_mdoc { + my ( $self, $macroname ) = ( shift, shift ); + my $macroarg = ""; + foreach (@_) { + $macroarg .= " " if ( length $macroarg ); + if ( $_ =~ m/((?<!\\) |\t|^$)/ ) { + $macroarg .= "\"$_\""; + } else { + $macroarg .= $_; + } + } + + $self->pushline( "$macroname " . $self->t($macroarg) . "\n" ); +} + +sub translate_mdoc_no_quotes { + my ( $self, $macroname, $macroarg ) = ( shift, shift, join( " ", @_ ) ); + + $self->pushline( "$macroname " . $self->t($macroarg) . "\n" ); +} +# +# Title Macros +# ============ +# .Dd Month day, year Document date. +$macro{'Dd'} = sub { + my ( $self, $macroname, $macroarg ) = ( shift, shift, join( " ", @_ ) ); + + $mdoc_mode = 1; + $self->push_docheader(); + + # FIXME: It would be nice if we could switch from one set of macros to the + # other. + # + # This does not work at this time. If we erase the current set of macros, + # po4a fails when a configuration file uses both mdoc and groff pages. + # + # # Erase the current macro definitions + # %macro=(); + # %inline=(); + # %no_wrap_begin=(); + # %no_wrap_end=(); + # Use the mdoc macros + define_mdoc_macros(); + + $self->translate_mdoc_no_quotes( $macroname, $macroarg ); +}; + +sub define_mdoc_macros { + + # .Dt DOCUMENT_TITLE [section] [volume] Title, in upper case. + $macro{'Dt'} = \&translate_mdoc; + + # .Os OPERATING_SYSTEM [version/release] Operating system (BSD). + $macro{'Os'} = \&translate_each; + + # Keep the quotes e.g. finger.1 + # Don't add quotes e.g. logger.1 + + # Page Layout Macros + # ================== + # .Sh Section Headers. + # (man mdoc indicates only a limited set of valid headers, + # but it should be OK to translate the header) + $macro{'Sh'} = sub { + my ( $self, $macroname ) = ( shift, shift ); + my $macroarg = ""; + foreach (@_) { + $macroarg .= " " if ( length $macroarg ); + if ( $_ =~ m/((?<!\\) |\t|^$)/ ) { + $macroarg .= "\"$_\""; + } else { + $macroarg .= $_; + } + } + if ( $mdoc{$macroarg} ) { + $self->pushline( "$macroname " . $self->r($macroarg) . "\n" ); + } else { + $self->pushline( "$macroname " . $self->t($macroarg) . "\n" ); + } + }; + + # .Ss Subsection Headers. + $macro{'Ss'} = \&translate_mdoc; + + # .Pp Paragraph Break. Vertical space (one line). + $macro{'Pp'} = \&noarg; + + # .Lp Same as .Pp + $macro{'Lp'} = \&noarg; + + # .D1 (D-one) Display-one Indent and display one text line. + $macro{'D1'} = \&translate_mdoc; + + # .Dl (D-ell) Display-one literal. + # Indent and display one line of literal text + $macro{'Dl'} = \&translate_mdoc; + + # .Bd Begin-display block. + # FIXME: Note: there are some options, some of the options argument + # may be translatable (-file <name>, -offset <string>) + $no_wrap_begin{'Bd'} = 1; + + # .Ed End-display (matches .Bd). + $no_wrap_end{'Ed'} = 1; + + # .Bl Begin-list. Create lists or columns. + # FIXME: As for .Bd, there are some options + $macro{'Bl'} = \&untranslated; + + # .El End-list. + $macro{'El'} = \&noarg; + + # .It List item. + # FIXME: Maybe we could extract other modifiers + # as in .It Fl l Ar num + $macro{'It'} = \&translate_mdoc; + + # .Lk html link + $macro{'Lk'} = \&untranslated; + + # Manual Domain Macros + # ==================== + # FIXME: I think most Manual and General text domain are in the inline category + foreach (qw(Ad An Ar Cd Cm Dv Er Ev Fa Fd Fn Ic Li Nm Op Ot Pa St Va Vt Xr)) { + $inline{$_} = 1; + } + + # FIXME: some of these macros introduce a line in bold. + # Using \fP in these line is not supported. + # do_fonts should be called for every inline line + + # General Text Domain + # =================== + foreach ( + qw(%A %B %C %D %I %J %N %O %P %Q %R %T %U %V + Ac Ao Ap Aq At Bc Bf Bo Bq Brc Bro Brq Bx Db Dc Do Dq Ec Ef Em Eo Eq Fx No Ns + Pc Pf Po Pq Qc Ql Qo Qq Re Rs Rv Sc So Sq Sm Sx Sy Tn Ux Xc Xo) + ) + { + $inline{$_} = 1; + } + + # FIXME: Maybe it should be joined with the preceding .Nm + $macro{'Nd'} = \&translate_mdoc; + + # Command line flags + $inline{'Fl'} = 1; + + # Exit status + $inline{'Ex'} = 1; + + # Opening option bracket + $inline{'Oo'} = 1; + + # Closing option bracket + $inline{'Oc'} = 1; + + # Begin keep (keep words in the same line) + $inline{'Bk'} = 1; + + # End keep + $inline{'Ek'} = 1; + + # Library Names + $inline{'Lb'} = 1; + + # Function Types + $inline{'Ft'} = 1; + + # Function open (for functions with many arguments) + $inline{'Fo'} = 1; + + # Function close + $inline{'Fc'} = 1; + + # OpenBSD macro + $inline{'Ox'} = 1; + + # BSD/OS Macro + $inline{'Bsx'} = 1; + + # #include statements + $macro{'In'} = \&translate_mdoc; + + # NetBSD Macro + $inline{'Nx'} = 1; + + # Math symbol + $inline{'Ms'} = 1; + + # Prints 'under development' + $inline{'Ud'} = 1; + + # This macro is a groff macro. I don't know if ot is valid in an mdoc page. + # But this is used in some pages and seems to work + $macro{'br'} = \&noarg; + +} # end of define_mdoc_macros + +__END__ + +# LocalWords: Charset charset po UTF gettext msgid nostrip diff --git a/lib/Locale/Po4a/Po.pm b/lib/Locale/Po4a/Po.pm new file mode 100644 index 0000000..d87a145 --- /dev/null +++ b/lib/Locale/Po4a/Po.pm @@ -0,0 +1,1618 @@ +# Locale::Po4a::Po -- manipulation of PO files +# +# This program is free software; you may redistribute it and/or modify it +# under the terms of GPL v2.0 or later (see COPYING). + +############################################################################ +# Modules and declarations +############################################################################ + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Po - PO file manipulation module + +=head1 SYNOPSIS + + use Locale::Po4a::Po; + my $pofile=Locale::Po4a::Po->new(); + + # Read PO file + $pofile->read('file.po'); + + # Add an entry + $pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour', + 'flags' => "wrap", 'reference'=>'file.c:46'); + + # Extract a translation + $pofile->gettext("Hello"); # returns 'bonjour' + + # Write back to a file + $pofile->write('otherfile.po'); + +=head1 DESCRIPTION + +Locale::Po4a::Po is a module that allows you to manipulate message +catalogs. You can load and write from/to a file (which extension is often +I<po>), you can build new entries on the fly or request for the translation +of a string. + +For a more complete description of message catalogs in the PO format and +their use, please refer to the info documentation of the gettext program (node "`PO Files"'). + +This module is part of the po4a project, which objective is to use PO files +(designed at origin to ease the translation of program messages) to +translate everything, including documentation (man page, info manual), +package description, debconf templates, and everything which may benefit +from this. + +=head1 OPTIONS ACCEPTED BY THIS MODULE + +=over 4 + +=item B<--porefs> I<type> + +Specify the reference format. Argument I<type> can be one of B<never> +to not produce any reference, B<file> to only specify the file +without the line number, B<counter> to replace line number by an +increasing counter, and B<full> to include complete references (default: full). + +=item B<--wrap-po> B<no>|B<newlines>|I<number> (default: 76) + +Specify how the po file should be wrapped. This gives the choice between either +files that are nicely wrapped but could lead to git conflicts, or files that are +easier to handle automatically, but harder to read for humans. + +Historically, the gettext suite has reformatted the po files at the 77th column +for cosmetics. This option specifies the behavior of po4a. If set to a numerical +value, po4a will wrap the po file after this column and after newlines in the +content. If set to B<newlines>, po4a will only split the msgid and msgstr after +newlines in the content. If set to B<no>, po4a will not wrap the po file at all. +The reference comments are always wrapped by the gettext tools that we use internally. + +Note that this option has no impact on how the msgid and msgstr are wrapped, i.e. +on how newlines are added to the content of these strings. + +=item B<--msgid-bugs-address> I<email@address> + +Set the report address for msgid bugs. By default, the created POT files +have no Report-Msgid-Bugs-To fields. + +=item B<--copyright-holder> I<string> + +Set the copyright holder in the POT header. The default value is +"Free Software Foundation, Inc." + +=item B<--package-name> I<string> + +Set the package name for the POT header. The default is "PACKAGE". + +=item B<--package-version> I<string> + +Set the package version for the POT header. The default is "VERSION". + +=back + +=cut + +use IO::File; + +require Exporter; + +package Locale::Po4a::Po; +use DynaLoader; + +use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext); + +use subs qw(makespace); +use vars qw(@ISA @EXPORT_OK); +@ISA = qw(Exporter DynaLoader); +@EXPORT = qw(%debug); +@EXPORT_OK = qw(&move_po_if_needed); + +use Locale::Po4a::TransTractor; + +# Try to use a C extension if present. +eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION"); + +use 5.16.0; +use strict; +use warnings; + +use Carp qw(croak); +use File::Basename; +use File::Path; # mkdir before write +use File::Copy; # move +use POSIX qw(strftime floor); +use Time::Local; + +use Encode; +use Config; + +my @known_flags = qw( + wrap no-wrap fuzzy + c-format no-c-format + objc-format no-objc-format + sh-format no-sh-format + python-format no-python-format + python-brace-format no-python-brace-format + lisp-format no-lisp-format + elisp-format no-elisp-format + librep-format no-librep-format + scheme-format no-scheme-format + smalltalk-format no-smalltalk-format + java-format no-java-format + csharp-format no-csharp-format + awk-format no-awk-format + object-pascal-format no-object-pascal-format + ycp-format no-ycp-format + tcl-format no-tcl-format + perl-format no-perl-format + perl-brace-format no-perl-brace-format + php-format no-php-format + gcc-internal-format no-gcc-internal-format + gfc-internal-format no-gfc-internal-format + qt-format no-qt-format + qt-plural-format no-qt-plural-format + kde-format no-kde-format + boost-format no-boost-format + lua-format no-lua-format + javascript-format no-javascript-format +); + +# Custom flags, used for example by weblate +push @known_flags, 'markdown-text'; + +our %debug = ( + 'canonize' => 0, + 'quote' => 0, + 'escape' => 0, + 'encoding' => 0, + 'filter' => 0 +); + +=head1 Functions concerning entire message catalogs + +=over 4 + +=item new() + +Creates a new message catalog. If an argument is provided, it's the name of +a PO file we should load. + +=cut + +sub new { + my ( $this, $options ) = ( shift, shift ); + my $class = ref($this) || $this; + my $self = {}; + bless $self, $class; + $self->initialize($options); + + my $filename = shift; + $self->read($filename) if length($filename); + return $self; +} + +# Return the numerical timezone (e.g. +0200) +# Neither the %z nor the %s formats of strftime are portable: +# '%s' is not supported on Solaris and '%z' indicates +# "2006-10-25 19:36E. Europe Standard Time" on MS Windows. +sub timezone { + my ($time) = @_; + my @l = localtime($time); + + my $diff = floor( timegm(@l) / 60 + 0.5 ) - floor( $time / 60 + 0.5 ); + my $sign = ( $diff >= 0 ? 1 : -1 ); + $diff = abs($diff); + + my $h = $sign * floor( $diff / 60 ); + my $m = $diff % 60; + + return sprintf "%+03d%02d\n", $h, $m; +} + +sub initialize { + my ( $self, $options ) = ( shift, shift ); + my $time = time; + my $date = strftime( "%Y-%m-%d %H:%M", localtime($time) ) . timezone($time); + chomp $date; + + $self->{options}{'porefs'} = 'full'; + $self->{options}{'msgid-bugs-address'} = undef; + $self->{options}{'copyright-holder'} = "Free Software Foundation, Inc."; + $self->{options}{'package-name'} = "PACKAGE"; + $self->{options}{'package-version'} = "VERSION"; + $self->{options}{'wrap-po'} = 76; + $self->{options}{'pot-language'} = ""; + + foreach my $opt ( keys %$options ) { + + # print STDERR "$opt: ".(defined($options->{$opt})?$options->{$opt}:"(undef)")."\n"; + if ( $options->{$opt} ) { + die wrap_mod( "po4a::po", dgettext( "po4a", "Unknown option: %s" ), $opt ) + unless exists $self->{options}{$opt}; + $self->{options}{$opt} = $options->{$opt}; + } + } + $self->{options}{'wrap-po'} =~ /^(no|newlines|\d+)$/ + || die wrap_mod( + "po4a::po", + dgettext( "po4a", "Invalid value for option 'wrap-po' ('%s' is not 'no' nor 'newlines' nor a number)" ), + $self->{options}{'wrap-po'} + ); + + $self->{options}{'porefs'} =~ /^(full|counter|noline|file|none|never)?$/ + || die wrap_mod( + "po4a::po", + dgettext( + "po4a", + "Invalid value for option 'porefs' ('%s' is " + . "not one of 'full', 'counter', 'noline', 'file' or 'never')" + ), + $self->{options}{'porefs'} + ); + $self->{options}{'porefs'} =~ s/noline/file/; # backward compat. 'file' used to be called 'noline'. + $self->{options}{'porefs'} =~ s/none/never/; # backward compat. 'never' used to be called 'none'. + if ( $self->{options}{'porefs'} =~ m/^counter/ ) { + $self->{counter} = {}; + } + + $self->{po} = (); + $self->{count} = 0; # number of msgids in the PO + # count_doc: number of strings in the document + # (duplicate strings counted multiple times) + $self->{count_doc} = 0; + $self->{gettextize_types} = (); # Type of each msgid found in the doc, in order + # We cannot use {$msgid}{'type'} as a type because for duplicate entries, the type is overwritten. + # So we have to copy the same info to this separate array, which is accessed through type_doc() + $self->{header_comment} = + " SOME DESCRIPTIVE TITLE\n" + . " Copyright (C) YEAR " + . $self->{options}{'copyright-holder'} . "\n" + . " This file is distributed under the same license " + . "as the " + . $self->{options}{'package-name'} + . " package.\n" + . " FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n" . "\n" + . ", fuzzy"; + + # $self->header_tag="fuzzy"; + $self->{header} = escape_text( + "Project-Id-Version: " + . $self->{options}{'package-name'} . " " + . $self->{options}{'package-version'} . "\n" + . ( + ( defined $self->{options}{'msgid-bugs-address'} ) + ? "Report-Msgid-Bugs-To: " . $self->{options}{'msgid-bugs-address'} . "\n" + : "" + ) + . "POT-Creation-Date: $date\n" + . "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" + . "Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n" + . "Language-Team: LANGUAGE <LL\@li.org>\n" + . "Language: " + . $self->{options}{'pot-language'} . "\n" + . "MIME-Version: 1.0\n" + . "Content-Type: text/plain; charset=UTF-8\n" + . "Content-Transfer-Encoding: 8bit\n" + ); + + $self->{footer} = []; + + # To make stats about gettext hits + $self->stats_clear(); +} + +=item read($) + +Reads a PO file (which name is given as argument). Previously existing +entries in self are not removed, the new ones are added to the end of the +catalog. + +=cut + +sub read { + my $self = shift; + my $filename = shift + or croak wrap_mod( "po4a::po", dgettext( "po4a", "Please provide a non-null filename" ) ); + + my $charset = shift // 'UTF-8'; + $charset = 'UTF-8' if $charset eq "CHARSET"; + warn "Read $filename with encoding: $charset" if $debug{'encoding'}; + + my $checkvalidity = shift // 1; + + my $lang = basename($filename); + $lang =~ s/\.po$//; + $self->{lang} = $lang; + + if ($checkvalidity) { # We sometimes need to read a file even if it may be invalid (eg to test whether it's empty) + my $cmd = "msgfmt" . $Config{_exe} . " --check-format --check-domain -o /dev/null \"" . $filename . '"'; + + my $locale = $ENV{'LC_ALL'}; + $ENV{'LC_ALL'} = "C"; + my $out = qx/$cmd 2>&1/; + $ENV{'LC_ALL'} = $locale; + + die wrap_msg( dgettext( "po4a", "Invalid po file %s:\n%s" ), $filename, $out ) + unless ( $? == 0 ); + } + + my $fh; + if ( $filename eq '-' ) { + $fh = *STDIN; + } else { + open( $fh, "<:encoding($charset)", $filename ) + or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot read from %s: %s" ), $filename, $! ); + } + + ## Read paragraphs line-by-line + my $pofile = ""; + while ( defined( my $textline = <$fh> ) ) { + $pofile .= $textline; + } + $pofile =~ s/\r\n/\n/sg; # Reading a DOS-encoded file from Linux (native files are handled in all cases) + + # If we did not get the charset right, reload the file with the right one + if ( $pofile =~ /charset=(.*?)[\s\\]/ ) { + my $detected_charset = $1; + + if ( $detected_charset ne $charset && uc($detected_charset) ne $charset && uc($detected_charset) ne 'CHARSET' ) + { + warn "Reloading the PO file, changing the charset from '$charset' to '$detected_charset'" + if $debug{'encoding'}; + $self->read( $filename, $detected_charset, $checkvalidity ); + return; + } + } + + if ( $pofile =~ m/^\N{BOM}/ ) { # UTF-8 BOM detected + croak "BOM detected"; + croak wrap_msg( + dgettext( + "po4a", + "The file %s starts with a BOM char indicating that its encoding is UTF-8, but you specified %s instead." + ), + $filename, + $charset + ) if ( uc($charset) ne 'UTF-8' ); + $pofile =~ s/^\N{BOM}//; + } + + if ( $filename ne '-' ) { + close $fh + or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot close %s after reading: %s" ), $filename, $! ); + } + + my $linenum = 0; + + foreach my $msg ( split( /\n\n/, $pofile ) ) { + my ( $msgid, $msgstr, $comment, $previous, $automatic, $reference, $flags, $buffer ); + my ( $msgid_plural, $msgstr_plural ); + if ( $msg =~ m/^#~/m ) { + push( @{ $self->{footer} }, $msg ); + next; + } + foreach my $line ( split( /\n/, $msg ) ) { + $linenum++; + if ( $line =~ /^#\. ?(.*)$/ ) { # Automatic comment + $automatic .= ( defined($automatic) ? "\n" : "" ) . $1; + + } elsif ( $line =~ /^#: ?(.*)$/ ) { # reference + $reference .= ( defined($reference) ? "\n" : "" ) . $1; + + } elsif ( $line =~ /^#, ?(.*)$/ ) { # flags + $flags .= ( defined($flags) ? "\n" : "" ) . $1; + + } elsif ( $line =~ /^#\| ?(.*)$/ ) { # previous translation + $previous .= ( defined($previous) ? "\n" : "" ) . ( $1 || "" ); + + } elsif ( $line =~ /^#(.*)$/ ) { # Translator comments + $comment .= ( defined($comment) ? "\n" : "" ) . ( $1 || "" ); + + } elsif ( $line =~ /^msgid (".*")$/ ) { # begin of msgid + $buffer = $1; + + } elsif ( $line =~ /^msgid_plural (".*")$/ ) { + + # begin of msgid_plural, end of msgid + + $msgid = $buffer; + $buffer = $1; + + } elsif ( $line =~ /^msgstr (".*")$/ ) { + + # begin of msgstr, end of msgid + + $msgid = $buffer; + $buffer = "$1"; + + } elsif ( $line =~ /^msgstr\[([0-9]+)\] (".*")$/ ) { + + # begin of msgstr[x], end of msgid_plural or msgstr[x-1] + + # Note: po4a cannot uses plural forms + # (no integer to use the plural form) + # * drop the msgstr[x] where x >= 2 + # * use msgstr[0] as the translation of msgid + # * use msgstr[1] as the translation of msgid_plural + + if ( $1 eq "0" ) { + $msgid_plural = $buffer; + $buffer = "$2"; + } elsif ( $1 eq "1" ) { + $msgstr = $buffer; + $buffer = "$2"; + } elsif ( $1 eq "2" ) { + $msgstr_plural = $buffer; + warn wrap_ref_mod( "$filename:$linenum", "po4a::po", + dgettext( "po4a", "Messages with more than 2 plural forms are not supported." ) ); + } + } elsif ( $line =~ /^(".*")$/ ) { + + # continuation of a line + $buffer .= "\n$1"; + + } else { + warn wrap_ref_mod( "$filename:$linenum", "po4a::po", dgettext( "po4a", "Parse error at: -->%s<--" ), + $line ); + } + } + $linenum++; + if ( defined $msgid_plural ) { + $msgstr_plural = $buffer; + + $msgid = unquote_text($msgid) if ( defined($msgid) ); + $msgstr = unquote_text($msgstr) if ( defined($msgstr) ); + + $self->push_raw( + 'msgid' => $msgid, + 'msgstr' => $msgstr, + 'reference' => $reference, + 'flags' => $flags, + 'comment' => $comment, + 'previous' => $previous, + 'automatic' => $automatic, + 'plural' => 0 + ); + + $msgid_plural = unquote_text($msgid_plural) + if ( defined($msgid_plural) ); + $msgstr_plural = unquote_text($msgstr_plural) + if ( defined($msgstr_plural) ); + + $self->push_raw( + 'msgid' => $msgid_plural, + 'msgstr' => $msgstr_plural, + 'reference' => $reference, + 'flags' => $flags, + 'comment' => $comment, + 'previous' => $previous, + 'automatic' => $automatic, + 'plural' => 1 + ); + } else { + $msgstr = $buffer; + + $msgid = unquote_text($msgid) if ( defined($msgid) ); + $msgstr = unquote_text($msgstr) if ( defined($msgstr) ); + + $self->push_raw( + 'msgid' => $msgid, + 'msgstr' => $msgstr, + 'reference' => $reference, + 'flags' => $flags, + 'comment' => $comment, + 'previous' => $previous, + 'automatic' => $automatic + ); + } + } +} + +=item write($) + +Writes the current catalog to the given file. + +=cut + +sub write { + my $self = shift; + my $filename = shift + or croak dgettext( "po4a", "Cannot write to a file without filename" ) . "\n"; + + my $fh; + if ( $filename eq '-' ) { + $fh = \*STDOUT; + } else { + + # make sure the directory in which we should write the localized + # file exists + my $dir = $filename; + if ( $dir =~ m|/| ) { + $dir =~ s|/[^/]*$||; + + File::Path::mkpath( $dir, 0, 0755 ) # Croaks on error + if ( length($dir) && !-e $dir ); + } + open( $fh, '>:encoding(UTF-8)', $filename ) + or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot write to %s: %s" ), $filename, $! ); + } + + print $fh "" . format_comment( $self->{header_comment}, "" ) + if length( $self->{header_comment} ); + + # Force the encoding of PO files in UTF-8 on disk, because msgmerge can get messed up when mixing encodings + # See https://savannah.gnu.org/bugs/index.php?65104 + my $header = $self->{header}; + $header =~ /charset=([^\s\\]*)/i; + my $oldcharset = $1 // ''; + warn sprintf( + dgettext( + "po4a", + "msgmerge suffers some bugs when PO files are not encoded in UTF-8; Recoding %s to UTF-8 (was %s) to circumvent the issue.\n" + ), + $filename, + $oldcharset + ) if $oldcharset ne 'UTF-8'; + $header =~ s/charset=[^\s\\]*/charset=UTF-8/i; + + print $fh "msgid \"\"\n"; + print $fh "msgstr " . quote_text( $header, $self->{options}{'wrap-po'} ) . "\n\n"; + + my $buf_msgstr_plural; # Used to keep the first msgstr of plural forms + my $first = 1; + foreach my $msgid ( sort { ( $self->{po}{"$a"}{'pos'} ) <=> ( $self->{po}{"$b"}{'pos'} ) } keys %{ $self->{po} } ) { + my $output = ""; + + if ($first) { + $first = 0; + } else { + $output .= "\n"; + } + + $output .= format_comment( $self->{po}{$msgid}{'comment'}, "" ) + if length( $self->{po}{$msgid}{'comment'} ); + if ( length( $self->{po}{$msgid}{'automatic'} ) ) { + foreach my $comment ( split( /\\n/, $self->{po}{$msgid}{'automatic'} ) ) { + $output .= format_comment( $comment, ". " ); + } + } + $output .= format_comment( $self->{po}{$msgid}{'type'}, ". type: " ) + if length( $self->{po}{$msgid}{'type'} ); + + if ( length( $self->{po}{$msgid}{'reference'} ) ) { + my $output_ref = wrap( $self->{po}{$msgid}{'reference'} ); + $output_ref =~ s/\s+$//mg; + $output .= format_comment( $output_ref, ": " ); + } + $output .= "#, " . join( ", ", sort split( /\s+/, $self->{po}{$msgid}{'flags'} ) ) . "\n" + if length( $self->{po}{$msgid}{'flags'} ); + $output .= format_comment( $self->{po}{$msgid}{'previous'}, "| " ) + if length( $self->{po}{$msgid}{'previous'} ); + + if ( exists $self->{po}{$msgid}{'plural'} ) { + if ( $self->{po}{$msgid}{'plural'} == 0 ) { + $output .= "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n"; + $buf_msgstr_plural = + "msgstr[0] " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n"; + } elsif ( $self->{po}{$msgid}{'plural'} == 1 ) { + + # TODO: there may be only one plural form + $output = "msgid_plural " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n"; + $output .= $buf_msgstr_plural; + $output .= + "msgstr[1] " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n"; + } else { + die wrap_msg( dgettext( "po4a", "Cannot write PO files with more than two plural forms." ) ); + } + } else { + $output .= "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n"; + $output .= "msgstr " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n"; + } + + print $fh $output; + } + print $fh join( "\n\n", @{ $self->{footer} } ) if scalar @{ $self->{footer} }; + + if ( $filename ne '-' ) { + close $fh + or croak wrap_mod( dgettext( "po4a", "Cannot close %s after writing: %s\n" ), $filename, $! ); + } +} + +=item write_if_needed($$) + +Like write, but if the PO or POT file already exists, the object will be +written in a temporary file which will be compared with the existing file +to check if the update is needed (this avoids to change a POT just to +update a line reference or the POT-Creation-Date field). + +=cut + +sub move_po_if_needed { + my ( $new_po, $old_po, $backup ) = ( shift, shift, shift ); + my $diff; + + if ( -e $old_po ) { + $diff = qx(diff -q -I'^#:' -I'^\"POT-Creation-Date:' -I'^\"PO-Revision-Date:' $old_po $new_po); + if ( $diff eq "" ) { + unlink $new_po + or die wrap_msg( dgettext( "po4a", "Cannot unlink %s: %s." ), $new_po, $! ); + + # touch the old PO + my ( $atime, $mtime ) = ( time, time ); + utime $atime, $mtime, $old_po; + } else { + move $new_po, $old_po + or die wrap_msg( dgettext( "po4a", "Cannot move %s to %s: %s." ), $new_po, $old_po, $! ); + } + } else { + move $new_po, $old_po + or die wrap_msg( dgettext( "po4a", "Cannot move %s to %s: %s." ), $new_po, $old_po, $! ); + } +} + +sub write_if_needed { + my $self = shift; + my $filename = shift + or croak dgettext( "po4a", "Cannot write to a file without filename" ) . "\n"; + + if ( -e $filename ) { + my ($tmp_filename); + my $basename = basename($filename); + ( undef, $tmp_filename ) = File::Temp::tempfile( + $basename . "XXXX", + DIR => File::Spec->tmpdir(), + OPEN => 0, + UNLINK => 0 + ); + $self->write($tmp_filename); + move_po_if_needed( $tmp_filename, $filename ); + } else { + $self->write($filename); + } +} + +=item filter($) + +This function extracts a catalog from an existing one. Only the entries having +a reference in the given file will be placed in the resulting catalog. + +This function parses its argument, converts it to a Perl function definition, +evals this definition and filters the fields for which this function returns +true. + +I love Perl sometimes ;) + +=cut + +sub filter { + my $self = shift; + our $filter = shift; + + my $res; + $res = Locale::Po4a::Po->new(); + + # Parse the filter + our $code = "sub apply { return "; + our $pos = 0; + our $length = length $filter; + + # explode chars to parts. How to subscript a string in Perl? + our @filter = split( //, $filter ); + + sub gloups { + my $fmt = shift; + my $space = ""; + for ( 1 .. $pos ) { + $space .= ' '; + } + die wrap_msg("$fmt\n$filter\n$space^ HERE"); + } + + sub showmethecode { + return unless $debug{'filter'}; + my $fmt = shift; + my $space = ""; + for ( 1 .. $pos ) { + $space .= ' '; + } + print STDERR "$filter\n$space^ $fmt\n"; #"$code\n"; + } + + # I dream of a lex in perl :-/ + sub parse_expression { + showmethecode("Begin expression") + if $debug{'filter'}; + + gloups( "Begin of expression expected, got '%s'", $filter[$pos] ) + unless ( $filter[$pos] eq '(' ); + $pos++; # pass the '(' + if ( $filter[$pos] eq '&' ) { + + # AND + $pos++; + showmethecode("Begin of AND") + if $debug{'filter'}; + $code .= "("; + while (1) { + gloups("Unfinished AND statement.") + if ( $pos == $length ); + parse_expression(); + if ( $filter[$pos] eq '(' ) { + $code .= " && "; + } elsif ( $filter[$pos] eq ')' ) { + last; # do not eat that char + } else { + gloups( "End of AND or begin of sub-expression expected, got '%s'", $filter[$pos] ); + } + } + $code .= ")"; + } elsif ( $filter[$pos] eq '|' ) { + + # OR + $pos++; + $code .= "("; + while (1) { + gloups("Unfinished OR statement.") + if ( $pos == $length ); + parse_expression(); + if ( $filter[$pos] eq '(' ) { + $code .= " || "; + } elsif ( $filter[$pos] eq ')' ) { + last; # do not eat that char + } else { + gloups( "End of OR or begin of sub-expression expected, got '%s'", $filter[$pos] ); + } + } + $code .= ")"; + } elsif ( $filter[$pos] eq '!' ) { + + # NOT + $pos++; + $code .= "(!"; + gloups("Missing sub-expression in NOT statement.") + if ( $pos == $length ); + parse_expression(); + $code .= ")"; + } else { + + # must be an equal. Let's get field and argument + my ( $field, $arg, $done ); + $field = substr( $filter, $pos ); + gloups("EQ statement contains no '=' or invalid field name") + unless ( $field =~ /([a-z]*)=/i ); + $field = lc($1); + $pos += ( length $field ) + 1; + + # check that we've got a valid field name, + # and the number it referes to + # DO NOT CHANGE THE ORDER + my @names = qw(msgid msgstr reference flags comment previous automatic); + my $fieldpos; + for ( $fieldpos = 0 ; $fieldpos < scalar @names && $field ne $names[$fieldpos] ; $fieldpos++ ) { } + gloups( "Invalid field name: %s", $field ) + if $fieldpos == scalar @names; # not found + + # Now, get the argument value. It has to be between quotes, + # which can be escaped + # We point right on the first char of the argument + # (first quote already eaten) + my $escaped = 0; + my $quoted = 0; + if ( $filter[$pos] eq '"' ) { + $pos++; + $quoted = 1; + } + showmethecode( ( $quoted ? "Quoted" : "Unquoted" ) . " argument of field '$field'" ) + if $debug{'filter'}; + + while ( !$done ) { + gloups("Unfinished EQ argument.") + if ( $pos == $length ); + + if ($quoted) { + if ( $filter[$pos] eq '\\' ) { + if ($escaped) { + $arg .= '\\'; + $escaped = 0; + } else { + $escaped = 1; + } + } elsif ($escaped) { + if ( $filter[$pos] eq '"' ) { + $arg .= '"'; + $escaped = 0; + } else { + gloups( "Invalid escape sequence in argument: '\\%s'", $filter[$pos] ); + } + } else { + if ( $filter[$pos] eq '"' ) { + $done = 1; + } else { + $arg .= $filter[$pos]; + } + } + } else { + if ( $filter[$pos] eq ')' ) { + + # counter the next ++ since we don't want to eat + # this char + $pos--; + $done = 1; + } else { + $arg .= $filter[$pos]; + } + } + $pos++; + } + + # and now, add the code to check this equality + $code .= "(\$_[$fieldpos] =~ m{$arg})"; + + } + showmethecode("End of expression") + if $debug{'filter'}; + gloups("Unfinished statement.") + if ( $pos == $length ); + gloups( "End of expression expected, got '%s'", $filter[$pos] ) + unless ( $filter[$pos] eq ')' ); + $pos++; + } + + # And now, launch the beast, finish the function and use eval + # to construct this function. + # Ok, the lack of lexer is a fair price for the eval ;) + parse_expression(); + gloups("Garbage at the end of the expression") + if ( $pos != $length ); + $code .= "; }"; + print STDERR "CODE = $code\n" + if $debug{'filter'}; + eval $code; + die wrap_mod( "po4a::po", dgettext( "po4a", "Evaluating the provided filter failed: %s" ), $@ ) + if $@; + + for ( my $cpt = (0) ; $cpt < $self->count_entries() ; $cpt++ ) { + + my ( $msgid, $ref, $msgstr, $flags, $type, $comment, $previous, $automatic ); + + $msgid = $self->msgid($cpt); + $ref = $self->{po}{$msgid}{'reference'}; + + $msgstr = $self->{po}{$msgid}{'msgstr'}; + $flags = $self->{po}{$msgid}{'flags'}; + $type = $self->{po}{$msgid}{'type'}; + $comment = $self->{po}{$msgid}{'comment'}; + $previous = $self->{po}{$msgid}{'previous'}; + $automatic = $self->{po}{$msgid}{'automatic'}; + + # DO NOT CHANGE THE ORDER + $res->push_raw( + 'msgid' => $msgid, + 'msgstr' => $msgstr, + 'flags' => $flags, + 'type' => $type, + 'reference' => $ref, + 'comment' => $comment, + 'previous' => $previous, + 'automatic' => $automatic + ) if ( apply( $msgid, $msgstr, $ref, $flags, $comment, $previous, $automatic ) ); + } + + # delete the apply subroutine + # otherwise it will be redefined. + undef &apply; + return $res; +} + +=back + +=head1 Functions to use a message catalog for translations + +=over 4 + +=item gettext($%) + +Request the translation of the string given as argument in the current catalog. +The function returns the original (untranslated) string if the string was not +found. + +After the string to translate, you can pass a hash of extra +arguments. Here are the valid entries: + +=over + +=item B<wrap> + +boolean indicating whether we can consider that whitespaces in string are +not important. If yes, the function canonizes the string before looking for +a translation, and wraps the result. + +=item B<wrapcol> + +the column at which we should wrap (default: 76). + +=back + +=cut + +sub gettext { + my $self = shift; + my $text = shift; + my (%opt) = @_; + my $res; + + return "" unless length($text); # Avoid returning the header. + my $validoption = "reference wrap wrapcol"; + my %validoption; + + map { $validoption{$_} = 1 } ( split( / /, $validoption ) ); + foreach ( keys %opt ) { + Carp::confess "internal error: unknown arg $_.\n" . "Here are the valid options: $validoption.\n" + unless $validoption{$_}; + } + + $text = canonize($text) + if ( $opt{'wrap'} ); + + my $esc_text = escape_text($text); + + $self->{gettextqueries}++; + + if ( + defined $self->{po}{$esc_text} + and defined $self->{po}{$esc_text}{'msgstr'} + and length $self->{po}{$esc_text}{'msgstr'} + and ( not defined $self->{po}{$esc_text}{'flags'} + or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/ ) + ) + { + + $self->{gettexthits}++; + $res = unescape_text( $self->{po}{$esc_text}{'msgstr'} ); + if ( defined $self->{po}{$esc_text}{'plural'} ) { + if ( $self->{po}{$esc_text}{'plural'} eq "0" ) { + warn wrap_mod( + "po4a gettextize", + dgettext( + "po4a", + "'%s' is the singular form of a message, " . "po4a will use the msgstr[0] translation (%s)." + ), + $esc_text, + $res + ); + } else { + warn wrap_mod( + "po4a gettextize", + dgettext( + "po4a", + "'%s' is the plural form of a message, " . "po4a will use the msgstr[1] translation (%s)." + ), + $esc_text, + $res + ); + } + } + } else { + $res = $text; + } + + if ( $opt{'wrap'} ) { + $res = wrap( $res, $opt{'wrapcol'} || 76, 0 ); + } + + # print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n"; + return $res; +} + +=item stats_get() + +Returns statistics about the hit ratio of gettext since the last time that +stats_clear() was called. Please note that it's not the same +statistics than the one printed by msgfmt --statistic. Here, it's statistics +about recent usage of the PO file, while msgfmt reports the status of the +file. Example of use: + + [some use of the PO file to translate stuff] + + ($percent,$hit,$queries) = $pofile->stats_get(); + print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n"; + +=cut + +sub stats_get() { + my $self = shift; + my ( $h, $q ) = ( $self->{gettexthits}, $self->{gettextqueries} ); + my $p = ( $q == 0 ? 100 : int( $h / $q * 10000 ) / 100 ); + + # $p =~ s/\.00//; + # $p =~ s/(\..)0/$1/; + + return ( $p, $h, $q ); +} + +=item stats_clear() + +Clears the statistics about gettext hits. + +=cut + +sub stats_clear { + my $self = shift; + $self->{gettextqueries} = 0; + $self->{gettexthits} = 0; +} + +=back + +=head1 Functions to build a message catalog + +=over 4 + +=item push(%) + +Push a new entry at the end of the current catalog. The arguments should +form a hash table. The valid keys are: + +=over 4 + +=item B<msgid> + +the string in original language. + +=item B<msgstr> + +the translation. + +=item B<reference> + +an indication of where this string was found. Example: file.c:46 (meaning +in 'file.c' at line 46). It can be a space-separated list in case of +multiple occurrences. + +=item B<comment> + +a comment added here manually (by the translators). The format here is free. + +=item B<automatic> + +a comment which was automatically added by the string extraction +program. See the B<--add-comments> option of the B<xgettext> program for +more information. + +=item B<flags> + +space-separated list of all defined flags for this entry. + +Valid flags are: B<c-text>, B<python-text>, B<lisp-text>, B<elisp-text>, B<librep-text>, +B<smalltalk-text>, B<java-text>, B<awk-text>, B<object-pascal-text>, B<ycp-text>, +B<tcl-text>, B<wrap>, B<no-wrap> and B<fuzzy>. + +See the gettext documentation for their meaning. + +=item B<type> + +this is mostly an internal argument: it is used while gettextizing +documents. The idea here is to parse both the original and the translation +into a PO object, and merge them, using one's msgid as msgid and the +other's msgid as msgstr. To make sure that things get ok, each msgid in PO +objects are given a type, based on their structure (like "chapt", "sect1", +"p" and so on in DocBook). If the types of strings are not the same, that +means that both files do not share the same structure, and the process +reports an error. + +This information is written as automatic comment in the PO file since this +gives to translators some context about the strings to translate. + +=item B<wrap> + +boolean indicating whether whitespaces can be mangled in cosmetic +reformattings. If true, the string is canonized before use. + +This information is written to the PO file using the B<wrap> or B<no-wrap> flag. + +=item B<wrapcol> + +ignored; the key is kept for backward computability. + +=back + +=cut + +sub push { + my $self = shift; + my %entry = @_; + + my $validoption = "wrap wrapcol type msgid msgstr automatic previous flags reference"; + my %validoption; + + map { $validoption{$_} = 1 } ( split( / /, $validoption ) ); + foreach ( keys %entry ) { + Carp::confess "internal error: unknown arg $_.\n" . "Here are the valid options: $validoption.\n" + unless $validoption{$_}; + } + + unless ( $entry{'wrap'} ) { + $entry{'flags'} .= " no-wrap"; + } + if ( defined( $entry{'msgid'} ) ) { + $entry{'msgid'} = canonize( $entry{'msgid'} ) + if ( $entry{'wrap'} ); + + $entry{'msgid'} = escape_text( $entry{'msgid'} ); + } + if ( defined( $entry{'msgstr'} ) ) { + $entry{'msgstr'} = canonize( $entry{'msgstr'} ) + if ( $entry{'wrap'} ); + + $entry{'msgstr'} = escape_text( $entry{'msgstr'} ); + } + + $self->push_raw(%entry); +} + +# The same as push(), but assuming that msgid and msgstr are already escaped +sub push_raw { + my $self = shift; + my %entry = @_; + my ( $msgid, $msgstr, $reference, $comment, $automatic, $previous, $flags, $type, $transref ) = ( + $entry{'msgid'}, $entry{'msgstr'}, $entry{'reference'}, $entry{'comment'}, $entry{'automatic'}, + $entry{'previous'}, $entry{'flags'}, $entry{'type'}, $entry{'transref'} + ); + my $keep_conflict = $entry{'conflict'}; + + # print STDERR "Push_raw\n"; + # print STDERR " msgid=>>>$msgid<<<\n" if $msgid; + # print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr; + # Carp::cluck " flags=$flags\n" if $flags; + + return unless defined( $entry{'msgid'} ); + + # no msgid => header definition + unless ( length( $entry{'msgid'} ) ) { + + # if (defined($self->{header}) && $self->{header} =~ /\S/) { + # warn dgettext("po4a","Redefinition of the header. ". + # "The old one will be discarded\n"); + # } FIXME: do that iff the header isn't the default one. + $self->{header} = $msgstr; + $self->{header_comment} = $comment; + return; + } + + if ( $self->{options}{'porefs'} =~ m/^never/ ) { + $reference = ""; + } elsif ( $self->{options}{'porefs'} =~ m/^counter/ ) { + if ( $reference =~ m/^(.+?)(?=\S+:\d+)/g ) { + my $new_ref = $1; + 1 while $reference =~ s{ # x modifier is added to add formatting and improve readability + \G(\s*)(\S+):\d+ # \G is the last match in m//g (see also the (?=) syntax above) + # $2 is the file name + }{ + $self->{counter}{$2} ||= 0, # each file has its own counter + ++$self->{counter}{$2}, # increment it + $new_ref .= "$1$2:".$self->{counter}{$2} # replace line number by this counter + }gex && pos($reference); + $reference = $new_ref; + } + } elsif ( $self->{options}{'porefs'} =~ m/^file/ ) { + $reference =~ s/:\d+//g; + } + + if ( defined( $self->{po}{$msgid} ) ) { + warn wrap_mod( "po4a::po", dgettext( "po4a", "msgid defined twice: %s" ), $msgid ) + if (0); # FIXME: put a verbose stuff + if ( defined $msgstr + and defined $self->{po}{$msgid}{'msgstr'} + and $self->{po}{$msgid}{'msgstr'} ne $msgstr ) + { + my $txt = quote_text( $msgid, $self->{options}{'wrap-po'} ); + my ( $first, $second ) = ( + format_comment( ". ", $self->{po}{$msgid}{'reference'} ) + . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ), + + format_comment( ". ", $reference ) . quote_text($msgstr), $self->{options}{'wrap-po'} + ); + + if ($keep_conflict) { + if ( $self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s ) { + $msgstr = + $self->{po}{$msgid}{'msgstr'} . "\\n#-#-#-#-# $transref (type: $type) #-#-#-#-#\\n" . $msgstr; + } else { + $msgstr = + "#-#-#-#-# " + . $self->{po}{$msgid}{'transref'} + . " (type " + . $self->{po}{$msgid}{'type'} + . ") #-#-#-#-#\\n" + . $self->{po}{$msgid}{'msgstr'} . "\\n" + . "#-#-#-#-# $transref (type: $type) #-#-#-#-#\\n" + . $msgstr; + } + + # Every msgid will have the same list of references. + # Only keep the last list. + $self->{po}{$msgid}{'reference'} = ""; + } else { + warn wrap_msg( + dgettext( + "po4a", + "Translations don't match for:\n" . "%s\n" + . "-->First translation:\n" . "%s\n" + . " Second translation:\n" . "%s\n" + . " Old translation discarded." + ), + $txt, $first, $second + ); + } + } + } + if ( defined $transref ) { + $self->{po}{$msgid}{'transref'} = $transref; + } + if ( length($reference) ) { + if ( defined $self->{po}{$msgid}{'reference'} ) { + + # Only add the new reference if it's not already included in the existing string + # It'd be much easier if $self->{po}{$msgid}{'reference'} were an array instead of a joined string... + my $oldref = $self->{po}{$msgid}{'reference'}; + $self->{po}{$msgid}{'reference'} .= " " . $reference + unless ( ( $oldref =~ m/ $reference / ) + || ( $oldref =~ m/ $reference$/ ) + || ( $oldref =~ m/^$reference$/ ) + || ( $oldref =~ m/^$reference / ) ); + } else { + $self->{po}{$msgid}{'reference'} = $reference; + } + } + $self->{po}{$msgid}{'msgstr'} = $msgstr; + $self->{po}{$msgid}{'comment'} = $comment; + $self->{po}{$msgid}{'automatic'} = $automatic; + $self->{po}{$msgid}{'previous'} = $previous; + + $self->{po}{$msgid}{pos_doc} = () unless ( defined( $self->{po}{$msgid}{pos_doc} ) ); + CORE::push( @{ $self->{po}{$msgid}{pos_doc} }, $self->{count_doc}++ ); + CORE::push( @{ $self->{gettextize_types} }, $type ); + + unless ( defined( $self->{po}{$msgid}{'pos'} ) ) { + $self->{po}{$msgid}{'pos'} = $self->{count}++; + } + $self->{po}{$msgid}{'type'} = $type; + $self->{po}{$msgid}{'plural'} = $entry{'plural'} + if defined $entry{'plural'}; + + if ( defined($flags) ) { + $flags = " $flags "; + $flags =~ s/,/ /g; + foreach my $flag (@known_flags) { + if ( index( $flags, " $flag " ) != -1 ) { # if flag to be set + unless ( defined( $self->{po}{$msgid}{'flags'} ) + && $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/ ) + { + # flag not already set + if ( defined $self->{po}{$msgid}{'flags'} ) { + $self->{po}{$msgid}{'flags'} .= " " . $flag; + } else { + $self->{po}{$msgid}{'flags'} = $flag; + } + } + } + } + } + + # print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n"; + +} + +=back + +=head1 Miscellaneous functions + +=over 4 + +=item count_entries() + +Returns the number of entries in the catalog (without the header). + +=cut + +sub count_entries($) { + my $self = shift; + return $self->{count}; +} + +=item count_entries_doc() + +Returns the number of entries in document. If a string appears multiple times +in the document, it will be counted multiple times. + +=cut + +sub count_entries_doc($) { + my $self = shift; + return $self->{count_doc}; +} + +=item msgid($) + +Returns the msgid of the given number. + +=cut + +sub msgid($$) { + my $self = shift; + my $num = shift; + + foreach my $msgid ( keys %{ $self->{po} } ) { + return $msgid if ( $self->{po}{$msgid}{'pos'} eq $num ); + } + return undef; +} + +=item msgid_doc($) + +Returns the msgid with the given position in the document. + +=cut + +sub msgid_doc($$) { + my $self = shift; + my $num = shift; + + foreach my $msgid ( keys %{ $self->{po} } ) { + foreach my $pos ( @{ $self->{po}{$msgid}{'pos_doc'} } ) { + return $msgid if ( $pos eq $num ); + } + } + return undef; +} + +=item type_doc($) + +Returns the type of the msgid with the given position in the document. This is +probably only useful to gettextization, and it's stored separately from +{$msgid}{'type'} because the later location may be overwritten by another type +when the $msgid is duplicated in the master document. + +=cut + +sub type_doc($$) { + my $self = shift; + my $num = shift; + + return ${ $self->{gettextize_types} }[$num]; +} + +=item get_charset() + +Returns the character set specified in the PO header. If it hasn't been +set, it will return "UTF-8". + +=cut + +sub get_charset() { + my $self = shift; + + $self->{header} =~ /charset=(.*?)[\s\\]/; + + if ( defined $1 ) { + return $1; + } else { + return "UTF-8"; + } +} + +#----[ helper functions ]--------------------------------------------------- + +# transforme the string from its PO file representation to the form which +# should be used to print it +sub unescape_text { + my $text = shift; + + print STDERR "\nunescape [$text]====" if $debug{'escape'}; + $text = join( "", split( /\n/, $text ) ); + $text =~ s/\\"/"/g; + + # unescape newlines + # NOTE on \G: + # The following regular expression introduce newlines. + # Thus, ^ doesn't match all beginnings of lines. + # \G is a zero-width assertion that matches the position + # of the previous substitution with s///g. As every + # substitution ends by a newline, it always matches a + # position just after a newline. + $text =~ s/( # $1: + (\G|[^\\]) # beginning of the line or any char + # different from '\' + (\\\\)* # followed by any even number of '\' + )\\n # and followed by an escaped newline + /$1\n/sgx; # single string, match globally, allow comments + # unescape carriage returns + $text =~ s/( # $1: + (\G|[^\\]) # beginning of the line or any char + # different from '\' + (\\\\)* # followed by any even number of '\' + )\\r # and followed by an escaped carriage return + /$1\r/sgx; # single string, match globally, allow comments + # unescape tabulations + $text =~ s/( # $1: + (\G|[^\\])# beginning of the line or any char + # different from '\' + (\\\\)* # followed by any even number of '\' + )\\t # and followed by an escaped tabulation + /$1\t/mgx; # multilines string, match globally, allow comments + # and unescape the escape character + $text =~ s/\\\\/\\/g; + print STDERR ">$text<\n" if $debug{'escape'}; + + return $text; +} + +# transform the string to its representation as it should be written in PO +# files +sub escape_text { + my $text = shift; + + print STDERR "\nescape [$text]====" if $debug{'escape'}; + $text =~ s/\\/\\\\/g; + $text =~ s/"/\\"/g; + $text =~ s/\n/\\n/g; + $text =~ s/\r/\\r/g; + $text =~ s/\t/\\t/g; + print STDERR ">$text<\n" if $debug{'escape'}; + + return $text; +} + +# put quotes around the string on each lines (without escaping it) +# It does also normalize the text (ie, make sure its representation is wrapped +# on the 80th char, but without changing the meaning of the string) +sub quote_text { + my $string = shift; + my $do_wrap = shift // 'no'; # either 'no' or 'newlines', or column at which we should wrap + + return '""' unless length($string); + + return "\"$string\"" if ( $do_wrap eq 'no' ); + + print STDERR "\nquote $do_wrap [$string]====" if $debug{'quote'}; + + # break lines on newlines, if any + # see unescape_text for an explanation on \G + $string =~ s/( # $1: + (\G|[^\\]) # beginning of the line or any char + # different from '\' + (\\\\)* # followed by any even number of '\' + \\n) # and followed by an escaped newline + /$1\n/sgx; # single string, match globally, allow comments + + $string = wrap( $string, $do_wrap ) if ( $do_wrap ne 'newlines' ); + my @string = split( /\n/, $string ); + $string = join( "\"\n\"", @string ); + $string = "\"$string\""; + if ( scalar @string > 1 && $string[0] ne '' ) { + $string = "\"\"\n" . $string; + } + + print STDERR ">$string<\n" if $debug{'quote'}; + return $string; +} + +# undo the work of the quote_text function +sub unquote_text { + my $string = shift; + print STDERR "\nunquote [$string]====" if $debug{'quote'}; + $string =~ s/^""\\n//s; + $string =~ s/^"(.*)"$/$1/s; + $string =~ s/"\n"//gm; + + # Note: an even number of '\' could precede \\n, but I could not build a + # document to test this + $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm; + $string =~ s|!!DUMMYPOPM!!|\\n|gm; + print STDERR ">$string<\n" if $debug{'quote'}; + return $string; +} + +# canonize the string: write it on only one line, changing consecutive +# whitespace to only one space. +# Warning, it changes the string and should only be called if the string is +# plain text +sub canonize { + my $text = shift; + print STDERR "\ncanonize [$text]====" if $debug{'canonize'}; + $text =~ s/^ *//s; + $text =~ s/^[ \t]+/ /gm; + + # if ($text eq "\n"), it messed up the first string (header) + $text =~ s/\n/ /gm if ( $text ne "\n" ); + $text =~ s/([.)]) +/$1 /gm; + $text =~ s/([^.)]) */$1 /gm; + $text =~ s/ *$//s; + print STDERR ">$text<\n" if $debug{'canonize'}; + return $text; +} + + +# Wraps the string. We don't use Text::Wrap since it mangles whitespace at the +# end of the split line. +# +# Mandatory arguments: +# - A string to wrap. May content line breaks, in such case each line will be +# wrapped separately. +# Optional arguments: +# - A column to wrap on. Default: 76. +# - The extra length allowed for the first line. Default: -10 (which means it +# will be wrapped 10 characters shorter). +sub wrap { + my $text = shift; + return "0" if ( $text eq '0' ); + my $col = shift || 76; + my $first_shift = shift || -10; + my @lines = split( /\n/, "$text" ); + my $res = ""; + + while ( defined( my $line = shift @lines ) ) { + if ( $first_shift != 0 && length($line) > $col + $first_shift ) { + unshift @lines, $line; + $first_shift = 0; + next; + } + if ( length($line) > $col ) { + my $pos = rindex( $line, " ", $col ); + while ( substr( $line, $pos - 1, 1 ) eq '.' && $pos != -1 ) { + $pos = rindex( $line, " ", $pos - 1 ); + } + if ( $pos == -1 ) { + + # There are no spaces in the first $col chars, pick-up the + # first space + $pos = index( $line, " " ); + } + if ( $pos != -1 ) { + my $end = substr( $line, $pos + 1 ); + $line = substr( $line, 0, $pos + 1 ); + if ( $end =~ s/^( +)// ) { + $line .= $1; + } + unshift @lines, $end; + } + } + $first_shift = 0; + $res .= "$line\n"; + } + + # Restore the original trailing spaces + $res =~ s/\s+$//s; + if ( $text =~ m/(\s+)$/s ) { + $res .= $1; + } + return $res; +} + +# outputs properly a '# ... ' line to be put in the PO file +sub format_comment { + my $comment = shift; + my $char = shift; + my $result = "#" . $char . $comment; + $result =~ s/\n/\n#$char/gs; + $result =~ s/^#$char$/#/gm; + $result .= "\n"; + return $result; +} + +1; +__END__ + +=back + +=head1 AUTHORS + + Denis Barbier <barbier@linuxfr.org> + Martin Quinson (mquinson#debian.org) + +=cut diff --git a/lib/Locale/Po4a/Pod.pm b/lib/Locale/Po4a/Pod.pm new file mode 100644 index 0000000..4b3dc04 --- /dev/null +++ b/lib/Locale/Po4a/Pod.pm @@ -0,0 +1,287 @@ +# Locale::Po4a::Pod -- Convert POD data to PO file, for translation. +# +# This program is free software; you may redistribute it and/or modify it +# under the terms of GPL v2.0 or later (see COPYING file). +# +# This module converts POD to PO file, so that it becomes possible to +# translate POD formatted documentation. See gettext documentation for +# more info about PO files. + +############################################################################ +# Modules and declarations +############################################################################ + +use Pod::Parser; +use Locale::Po4a::TransTractor qw(process new get_in_charset get_out_charset); + +package Locale::Po4a::Pod; + +use 5.16.0; +use strict; +use warnings; + +require Exporter; + +use vars qw(@ISA); +@ISA = qw(Locale::Po4a::TransTractor Pod::Parser); + +use Carp qw(croak confess); + +sub initialize { } + +sub translate { + my ( $self, $str, $ref, $type ) = @_; + my (%options) = @_; + + $str = $self->pre_trans( $str, $ref, $type ); + $str = $self->SUPER::translate( $str, $ref, $type, %options ); + $str = $self->post_trans( $str, $ref, $type ); + + return $str; +} + +sub pre_trans { + my ( $self, $str, $ref, $type ) = @_; + + return $str; +} + +sub post_trans { + my ( $self, $str, $ref, $type ) = @_; + + # Change ascii non-breaking space to POD one + my $nbs_out = "\xA0"; + my $enc_length = Encode::from_to( $nbs_out, "latin1", $self->get_out_charset ); + if ( defined $enc_length ) { + while ( $str =~ m/(^|.*\s)(\S+?)\Q$nbs_out\E(\S+?)(\s.*$|$)/s ) { + my ( $begin, $m1, $m2, $end ) = ( $1, $2, $3, $4 ); + $str = ( defined $begin ) ? $begin : ""; + + # Remove the non-breaking spaces in the string that will be + # between S<...> + $m2 =~ s/\Q$nbs_out\E/ /g; + $str .= "S<$m1 $m2>"; + $str .= ( defined $end ) ? $end : ""; + } + } + + return $str; +} + +sub command { + my ( $self, $command, $paragraph, $line_num ) = @_; + + # print STDOUT "cmd: '$command' '$paragraph' at $line_num\n"; + if ( $command eq 'back' + || $command eq 'cut' + || $command eq 'pod' ) + { + $self->pushline("=$command\n\n"); + } elsif ( $command eq 'over' ) { + $self->pushline( "=$command $paragraph" . ( length($paragraph) ? "" : "\n\n" ) ); + } elsif ( $command eq 'encoding' ) { + my $charset = $paragraph; + $charset =~ s/^\s*(.*?)\s*$/$1/s; + + my $master_charset = $self->get_in_charset; + croak wrap_mod( + "po4a::pod", + dgettext( + "po4a", + "The file %s declares %s as encoding, but you provided %s as master charset. Please change either setting." + ), + $self->{DOCPOD}{refname}, + $charset, + $master_charset, + ) if ( length( $master_charset // '' ) > 0 && uc($charset) ne uc($master_charset) ); + + # The =encoding line will be added by docheader + } else { + $paragraph = $self->translate( $paragraph, $self->{DOCPOD}{refname} . ":$line_num", "=$command", "wrap" => 1 ); + $self->pushline("=$command $paragraph\n\n"); + } +} + +sub verbatim { + my ( $self, $paragraph, $line_num ) = @_; + + # print "verb: '$paragraph' at $line_num\n"; + + if ( $paragraph eq "\n" ) { + $self->pushline("$paragraph\n"); + return; + } + $paragraph = $self->translate( $paragraph, $self->{DOCPOD}{refname} . ":$line_num", "verbatim" ); + $paragraph =~ s/\n$//m; + $self->pushline("$paragraph\n"); +} + +sub textblock { + my ( $self, $paragraph, $line_num ) = @_; + + # print "text: '$paragraph' at $line_num\n"; + + if ( $paragraph eq "\n" ) { + $self->pushline("$paragraph\n"); + return; + } + + # Fix a pretty damned bug. + # Podlators don't wrap explicitelly the text, and groff won't seem to + # wrap any line begining with a space. So, we have to consider as + # verbatim not only the paragraphs whose first line is indented, but + # the paragraph containing an indented line. + # That way, we'll declare more paragraphs as verbatim than needed, but + # that's harmless (only less confortable for translators). + if ( $paragraph =~ m/^[ \t]/m ) { + $self->verbatim( $paragraph, $line_num ); + return; + } + + $paragraph = $self->translate( $paragraph, $self->{DOCPOD}{refname} . ":$line_num", 'textblock', "wrap" => 1 ); + $paragraph =~ s/ *\n/ /gm; # Unwrap the content, to ensure that C<> markup is not split on several lines + $self->pushline("$paragraph\n\n"); +} + +sub end_pod { } + +sub read { + my ( $self, $filename, $refname, $charset ) = @_; + $charset ||= "UTF-8"; + my $fh; + open $fh, "<:encoding($charset)", $filename; + push @{ $self->{DOCPOD}{infile} }, ( $fh, $refname ); + $self->Locale::Po4a::TransTractor::read( $filename, $refname, $charset ); +} + +sub parse { + my $self = shift; + + my @list = @{ $self->{DOCPOD}{infile} }; + while ( scalar @list ) { + my ( $fh, $refname ) = ( shift @list, shift @list ); + $self->{DOCPOD}{refname} = $refname; + $self->parse_from_filehandle($fh); + close $fh; + } +} + +sub docheader { + my $self = shift; + my $encoding = $self->get_out_charset(); + if ( ( defined $encoding ) + and ( length $encoding ) + and ( $encoding ne "ascii" ) ) + { + $encoding = "\n=encoding $encoding\n"; + } else { + $encoding = ""; + } + + return <<EOT; + + ***************************************************** + * GENERATED FILE, DO NOT EDIT * + * THIS IS NO SOURCE FILE, BUT RESULT OF COMPILATION * + ***************************************************** + +This file was generated by po4a(7). Do not store it (in VCS, for example), +but store the PO file used as source file by po4a-translate. + +In fact, consider this as a binary, and the PO file as a regular .c file: +If the PO get lost, keeping this translation up-to-date will be harder. +$encoding +EOT +} +1; + +############################################################################## +# Module return value and documentation +############################################################################## + +1; +__END__ + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Pod - convert POD data from/to PO files + +=head1 SYNOPSIS + + use Locale::Po4a::Pod; + my $parser = Locale::Po4a::Pod->new(); + + # Read POD from STDIN and write to STDOUT. + $parser->parse_from_filehandle; + + # Read POD from file.pod and write to file.txt. + $parser->parse_from_file ('file.pod', 'file.txt'); + +=head1 DESCRIPTION + +Locale::Po4a::Pod is a module to help the translation of documentation in +the POD format (the preferred language for documenting Perl) into other +[human] languages. + +=head1 STATUS OF THIS MODULE + +I think that this module is rock stable, and there is only one known bug +with F</usr/lib/perl5/Tk/MainWindow.pod> (and some other +pages, see below) which contains: + + C<" #n"> + +Lack of luck, in the po4a version, this was split on the space by the +wrapping. As result, in the original version, the man page contains: + + " #n" + +and mine contains: + + "" #n"" + +which is logic since CE<lt>foobarE<gt> is rewritten "foobar". + +Complete list of pages having this problem on my box (from 564 pages; note +that it depends on the chosen wrapping column): + + /usr/lib/perl5/Tk/MainWindow.pod + /usr/share/perl/5.8.0/overload.pod + /usr/share/perl/5.8.0/pod/perlapi.pod + /usr/share/perl/5.8.0/pod/perldelta.pod + /usr/share/perl/5.8.0/pod/perlfaq5.pod + /usr/share/perl/5.8.0/pod/perlpod.pod + /usr/share/perl/5.8.0/pod/perlre.pod + /usr/share/perl/5.8.0/pod/perlretut.pod + + + +=head1 INTERNALS + +As a derived class from Pod::Parser, Locale::Po4a::Pod supports the same +methods and interfaces. See L<Pod::Parser> for all the details; briefly, +one creates a new parser with C<< Locale::Po4a::Pod->new() >> and then +calls either parse_from_filehandle() or parse_from_file(). + +=head1 SEE ALSO + +L<Pod::Parser>, +L<Locale::Po4a::Man(3pm)>, +L<Locale::Po4a::TransTractor(3pm)>, +L<po4a(7)|po4a.7> + +=head1 AUTHORS + + Denis Barbier <barbier@linuxfr.org> + Martin Quinson (mquinson#debian.org) + +=head1 COPYRIGHT AND LICENSE + +Copyright © 2002 SPI, Inc. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut diff --git a/lib/Locale/Po4a/RubyDoc.pm b/lib/Locale/Po4a/RubyDoc.pm new file mode 100644 index 0000000..6780ca1 --- /dev/null +++ b/lib/Locale/Po4a/RubyDoc.pm @@ -0,0 +1,527 @@ +# Locale::Po4a::RubyDoc -- Convert Ruby Document data to PO file, for translation +# +# Copyright © 2016-2017 Francesco Poli <invernomuto@paranoici.org> +# +# This program is free software; you may redistribute it and/or modify it +# under the terms of GPL v2.0 or later (see COPYING). +# +# This work is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This work is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this work; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# +# Parts of the code (such as many regular expressions) were adapted +# from the source of rdtool, under the terms of the GNU General Public +# License, version 2 or later. +# These parts are originally: +# Copyright © 2004 MoonWolf <moonwolf@moonwolf.com> +# Copyright © 2011-2012 Youhei SASAKI <uwabami@gfd-dennou.org> +# +# The initialize code was adapted from the source of Locale::Po4a::Text, +# under the terms of the GNU General Public License, version 2 or later. +# This code was originally: +# Copyright © 2005-2008 Nicolas FRANÇOIS <nicolas.francois@centraliens.net> +# +############################################################################ +# +# This module converts Ruby Document (RD) format to PO files, so that Ruby +# Document formatted texts may be translated. See gettext documentation +# for more details about PO files. +# +############################################################################ + +package Locale::Po4a::RubyDoc; + +use Locale::Po4a::TransTractor qw(process new); +use Locale::Po4a::Common; + +use 5.16.0; +use strict; +use warnings; + +require Exporter; + +use vars qw(@ISA); +@ISA = qw(Locale::Po4a::TransTractor); + +###################### +# Global variables # +###################### + +my $insiderubydoc = 0; + +############# +# Methods # +############# + +sub initialize { + my $self = shift; + my %options = @_; + + $self->{options}{'debug'} = 1; + $self->{options}{'verbose'} = 1; + $self->{options}{'puredoc'} = 0; + + foreach my $opt ( keys %options ) { + die wrap_mod( "po4a::rubydoc", dgettext( "po4a", "Unknown option: %s" ), $opt ) + unless exists $self->{options}{$opt}; + $self->{options}{$opt} = $options{$opt}; + } + + if ( defined $options{'puredoc'} ) { + + # initially assume to be already inside the Ruby Document + $insiderubydoc = 1; + } else { + + # initially assume to be outside the Ruby Document + $insiderubydoc = 0; + } +} + +sub docheader { + return <<EOT; +# +# ***************************************************** +# * GENERATED FILE, DO NOT EDIT * +# * THIS IS NO SOURCE FILE, BUT RESULT OF COMPILATION * +# ***************************************************** +# +# This file was generated by po4a-translate(1). Do not store it (in VCS, +# for example), but store the PO file used as source file by po4a-translate. +# +# In fact, consider this as a binary, and the PO file as a regular source file: +# If the PO gets lost, keeping this translation up-to-date will be harder. +# +EOT +} + +sub parse { + my $self = shift; + + # start with baseline and firstindent corresponding to no indentation + my $baseline = 0; + my $firstindent = 0; + + # start in non-verbatim mode + my $verbmode = 0; + + # we have not yet seen any Term, hence we are not yet waiting for a + # Description + my $waitfordesc = 0; + my $methodterm = ""; + + # flag to remember that we have reached the end of the document + my $eof = 0; + + PARAGRAPH: while () { + + # start accumulating a new paragraph and corresponding variables + my ( $para, $pref, $ptype, $pwrap, $symbol, $tail ) = ( "", "", "", 1, "", "" ); + + LINE: while () { + + # fetch next line and its reference + my ( $line, $lref ) = $self->shiftline(); + + unless ( defined($line) ) { + + # we reached the end of the document + $eof = 1; + last LINE; + } + + if ( $line =~ /^=begin\s*(\bRD\b.*)?\s*$/ ) { + + # we are entering a Ruby Document part + $insiderubydoc = 1; + $baseline = 0; + $verbmode = 0; + $waitfordesc = 0; + $self->pushline($line); + next PARAGRAPH; + } + + if ( $line =~ /^=end/ ) { + + # we are exiting a Ruby Document part + $insiderubydoc = 0; + $baseline = 0; + $verbmode = 0; + $waitfordesc = 0; + $tail = $line; + last LINE; + } + + # do nothing while outside the Ruby Document + next PARAGRAPH unless ($insiderubydoc); + + # we encountered a Comment: ignore it entirely + next LINE if ( $line =~ /^#/ ); + + if ( $line =~ /^(={1,4})(?!=)\s*(?=\S)(.*)/ + or $line =~ /^(\+{1,2})(?!\+)\s*(?=\S)(.*)/ ) + { + # we encountered a Headline: this is a paragraph on its own + if ( length($para) ) { + + # we already have some paragraph to be processed: + # reput the current line in input and end paragraph + $self->unshiftline( $line, $lref ); + last LINE; + } else { + + # we are at the beginning of a paragraph, but a Headline + # is a single-line paragraph: define the variables + # and end paragraph + $symbol = "$1 "; + $para = $2; + $pref = $lref; + $ptype = "Headline $1"; + $baseline = 0; + $verbmode = 0; + $waitfordesc = 0; + last LINE; + } + } + + if ( $line =~ /^<<<\s*(\S+)/ ) { + + # we encountered an Include line: end paragraph + $tail = $line; + last LINE; + } + + # compute indentation + $line =~ /^(\s*)/; + my $indent = length($1); + + if ($verbmode) { + + # use verbatim mode rules + # ----------------------- + + if ( $indent >= $firstindent ) { + + # indentation matches first line or is deeper: + # the Verbatim goes on + $para .= $line; + next LINE; + } else { + + # indentation is shallower than first line: + # reput the current line in input, exit verbatim mode + # and end paragraph + $self->unshiftline( $line, $lref ); + $verbmode = 0; + $waitfordesc = 0; + last LINE; + } + } else { + + # use non-verbatim mode rules + # --------------------------- + + if ( $line =~ /^\s*$/ ) { + + # we encountered a WHITELINE: end paragraph + $tail = $line; + last LINE; + } + + if ( $line =~ /^(\s*)\*(\s*)(.*)/ ) { + + # we encountered the first line of a ItemListItem + if ( length($para) ) { + + # we already have some paragraph to be processed: + # reput the current line in input and end paragraph + $self->unshiftline( $line, $lref ); + last LINE; + } else { + + # we are at the beginning of a paragraph: + # define the variables + $symbol = "$1*$2"; + $para .= $3; + $pref = $lref; + $ptype = "ItemListItem *"; + $baseline = length($symbol); + $waitfordesc = 0; + next LINE; + } + } + + if ( $line =~ /^(\s*)(\(\d+\))(\s*)(.*)/ ) { + + # we encountered the first line of an EnumListItem + if ( length($para) ) { + + # we already have some paragraph to be processed: + # reput the current line in input and end paragraph + $self->unshiftline( $line, $lref ); + last LINE; + } else { + + # we are at the beginning of a paragraph: + # define the variables + $symbol = "$1$2$3"; + $para .= $4; + $pref = $lref; + $ptype = "EnumListItem $2"; + $baseline = length($symbol); + $waitfordesc = 0; + next LINE; + } + } + + if ( $line =~ /^(\s*):(\s*)(.*)/ ) { + + # we encountered the Term line of a DescListItem + if ( length($para) ) { + + # we already have some paragraph to be processed: + # reput the current line in input and end paragraph + $self->unshiftline( $line, $lref ); + last LINE; + } else { + + # we are at the beginning of a paragraph, but the Term + # part of a DescListItem is a single-line paragraph: + # define the variables and end paragraph + $symbol = "$1:$2"; + $para = $3; + $pref = $lref; + $ptype = "DescListItem Term :"; + $baseline = length($symbol); + $waitfordesc = 1; + last LINE; + } + } + + if ( $line =~ /^(\s*)---(?!-|\s*$)(\s*)(.*)/ ) { + + # we encountered the Term line of a MethodListItem + if ( length($para) ) { + + # we already have some paragraph to be processed: + # reput the current line in input and end paragraph + $self->unshiftline( $line, $lref ); + last LINE; + } else { + + # we are at the beginning of a paragraph, but the Term + # part of a MethodListItem is a single-line paragraph; + # moreover, it's not translatable: end paragraph + $baseline = length("$1---$2"); + $waitfordesc = 2; + $tail = $line; + $methodterm = "--- $3"; + last LINE; + } + } + + # we apparently encountered a STRINGLINE + if ( length($para) ) { + + # we already have some paragraph to be processed: + if ( $indent == $baseline ) { + + # indentation matches baseline: + # append the STRINGLINE to the paragraph + $para .= $line; + } else { + + # indentation differs from baseline: + # reput the current line in input and end paragraph + $self->unshiftline( $line, $lref ); + last LINE; + } + } else { + + # we are at the beginning of a paragraph: + # define the variables + if ($waitfordesc) { + + # we were waiting for a DescListItem Description: + # we have just found it + if ( $waitfordesc == 1 ) { + $ptype = "DescListItem Description"; + } else { + $ptype = "MethodListItem Description $methodterm"; + } + $baseline = $indent; + $waitfordesc = 0; + + # reproduce the original indentation + $symbol = " " x $indent; + } else { + if ( $indent > $baseline ) { + + # indentation is deeper than baseline: + # we are entering a Verbatim + $verbmode = 1; + $ptype = "Verbatim"; + $pwrap = 0; + $firstindent = $indent; + } else { + + # indentation is not deeper than baseline: + # this is a TextBlock + $ptype = "TextBlock"; + $baseline = $indent; + + # reproduce the original indentation + $symbol = " " x $indent; + } + } + $para .= $line; + $pref = $lref; + } + } + + } + + if ( length($para) ) { + + # set wrap column at 76 - identation, but never less than 26 + my $ni = length($symbol); + my $wc = 76 - $ni; + $wc = 26 if ( $wc < 26 ); + + # get the translated paragraph + my $translated = $self->translate( + $para, + $pref, + $ptype, + 'wrap' => $pwrap, + 'wrapcol' => $wc + ); + + if ($pwrap) { + + # reformat the translated paragraph + my $is = " " x $ni; + chomp $translated; + $translated =~ s/\n/\n$is/g; + $translated .= "\n"; + } + + # push the paragraph to the translated document + $self->pushline( $symbol . $translated ); + } + + if ( length($tail) ) { + + # push the non translatable tail to the translated document + $self->pushline($tail); + } + + # stop processing, if we have already reached the end of the document + return if ($eof); + } +} + +########################## +# Module documentation # +########################## + +1; +__END__ + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::RubyDoc -- Convert Ruby Document data from/to PO files + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::RubyDoc is a module to help the translation of documentation in +the Ruby Document (RD) format (a language used to document Ruby) into other +[human] languages. + +=head1 STATUS OF THIS MODULE + +This module has been successfully tested on simple Ruby Document files +covering a good part of the format syntax. + +A known limitation is that it fails to properly recognize the stacked +structure of input Ruby Document: this implies that when, for instance, +an EnumListItem consists of more than one Block, only the first Block +is actually recognized as EnumListItem, while the subsequent ones are +considered just as TextBlocks... + +=head1 OPTIONS ACCEPTED BY THIS MODULE + +This module supports the following option: + +=over + +=item B<puredoc> + +Handle files entirely made of Ruby Document formatted text (without +any "=begin" line). + +By default, this module only handles Ruby Document formatted text +between "=begin" and "=end" lines (hence ignoring, among other things, +everything that precedes the first "=begin" line). + +=back + +=head1 SEE ALSO + +L<Locale::Po4a::TransTractor(3pm)> + +=head1 AUTHORS + +Francesco Poli <invernomuto@paranoici.org> + +=head1 COPYRIGHT AND LICENSE + + Copyright © 2016-2017 Francesco Poli <invernomuto@paranoici.org> + +This work is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This work is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this work; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +Parts of the code (such as many regular expressions) were adapted +from the source of rdtool, under the terms of the GNU General Public +License, version 2 or later. +These parts are originally: + + Copyright © 2004 MoonWolf <moonwolf@moonwolf.com> + Copyright © 2011-2012 Youhei SASAKI <uwabami@gfd-dennou.org> + +The initialize code was adapted from the source of Locale::Po4a::Text, +under the terms of the GNU General Public License, version 2 or later. +This code was originally: + + Copyright © 2005-2008 Nicolas FRANÇOIS <nicolas.francois@centraliens.net> + +=cut diff --git a/lib/Locale/Po4a/Sgml.pm b/lib/Locale/Po4a/Sgml.pm new file mode 100644 index 0000000..45eb360 --- /dev/null +++ b/lib/Locale/Po4a/Sgml.pm @@ -0,0 +1,1372 @@ +#!/usr/bin/perl -w + +# Po4a::Sgml.pm +# +# extract and translate translatable strings from an sgml based document. +# +# This code is an adapted version of sgmlspl (SGML postprocessor for the +# SGMLS and NSGMLS parsers) which was: +# +# Copyright © 1995 David Megginson <dmeggins@aix1.uottawa.ca> +# +# The adaptation for po4a was done by Denis Barbier <barbier@linuxfr.org>, +# Martin Quinson (mquinson#debian.org) and others. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Sgml - convert SGML documents from/to PO files + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::Sgml is a module to help the translation of documentation in +the SGML format into other [human] languages. + +This module uses B<onsgmls>(1) to parse the SGML files. Make sure it is +installed. +Also make sure that the DTD of the SGML files are installed in the system. + +=head1 OPTIONS ACCEPTED BY THIS MODULE + +=over 4 + +=item B<debug> + +Space-separated list of keywords indicating which category of extra debug +messages should be shown. Possible values are: C<entities>, C<generic>, +C<onsgml>, C<refs> and C<tag>. + +=item B<verbose> + +Give more information about what's going on. + +=item B<translate> + +Space-separated list of extra tags (beside the DTD provided ones) whose +content should form an extra msgid, i.e that should be translated. + +=item B<section> + +Space-separated list of extra tags (beside the DTD provided ones) +containing other tags, some of them being of category B<translate>. + +=item B<indent> + +Space-separated list of tags which increase the indentation level. This will +affect the identation in the resulting document. + +=item B<verbatim> + +The layout within those tags should not be changed. The paragraph won't get +wrapped, and no extra indentation space or new line will be added for +cosmetic purpose. + +=item B<empty> + +Tags not needing to be closed. + +=item B<ignore> + +Tags ignored and considered as plain char data by po4a. That is to say that +they can be part of an msgid. For example, E<lt>bE<gt> is a good candidate +for this category since putting it in the B<translate> section would create a +msgids with only its content, (and it's ususally not a whole sentence), which +is bad. + +=item B<attributes> + +A space-separated list of attributes that need to be translated. You can +specify the attributes by their name (for example, C<lang>), but you can also +prefix it with a tag hierarchy, to specify that this attribute will only be +translated when it is into the specified tag. For example: +C<< <bbb><aaa>lang >> specifies that the lang attribute will only be +translated if it is in an C<< <aaa> >> tag, which is in a C<< <bbb> >> tag. +The tag names are actually regular expressions so you can also write things +like C<< <aaa|bbb>lang >> to only translate C<lang> attributes that are in +an C<< <aaa> >> or a C<< <bbb> >> tag. + +=item B<qualify> + +A space-separated list of attributes for which the translation must be +qualified by the attribute name, i.e. the text extracted for the transalation +will include both the attributes name and it's value. e.g. for a tag like +C<< <aaa lang_en="foo"> >> translators will be presented with the string +C<lang_en="foo">. Note that this also automatically adds the given attribute +into the B<attributes> list too. + + +=item B<force> + +Proceed even if the DTD is unknown or if B<onsgmls> finds errors in the input +file. + +=item B<include-all> + +By default, msgids containing only one entity (like C<&version;>) are skipped +for the translators' comfort. Activating this option prevents this +optimisation. It can be useful if the document contains a construction like +C<< <title>Á</title> >>, even if I doubt such things to ever happen... + +=item B<ignore-inclusion> + +Space-separated list of entities that won't be inlined. +Use this option with caution: it may cause B<onsgmls> (used internally) to add +tags and render the output document invalid. + +=back + +=head1 STATUS OF THIS MODULE + +The result is perfect. I.e., the generated documents are exactly the +same as the originals. But there are still some problems: + +=over 2 + +=item * + +The error output of B<onsgmls> is redirected to /dev/null by default, which is clearly +bad. I don't know how to avoid that. + +The problem is that I have to "protect" the conditional inclusions (i.e. the +C<E<lt>! [ %foo [> and C<]]E<gt>> stuff) from B<onsgmls>. Otherwise +B<onsgmls> eats them, and I don't know how to restore them in the final +document. To prevent that, I rewrite them to C<{PO4A-beg-foo}> and +C<{PO4A-end}>. + +The problem with this is that the C<{PO4A-end}> and such I add are invalid in +the document (not in a E<lt>pE<gt> tag or so). + +If you want to view the B<onsgmls> output, just add the following to your command line (or po4a configuration line): + + -o debug=onsgmls + +=item * + +It does work only with the DebianDoc and DocBook DTD. Adding support for a +new DTD should be very easy. The mechanism is the same for every DTD, you just +have to give a list of the existing tags and some of their characteristics. + +I agree, this needs some more documentation, but it is still considered as +beta, and I hate to document stuff which may/will change. + +=item * + +Warning, support for DTDs is quite experimental. I did not read any +reference manual to find the definition of every tag. I did add tag +definition to the module 'till it works for some documents I found on the +net. If your document use more tags than mine, it won't work. But as I said +above, fixing that should be quite easy. + +I did test DocBook against the SAG (System Administrator Guide) only, but +this document is quite big, and should use most of the DocBook +specificities. + +For DebianDoc, I tested some of the manuals from the DDP, but not all yet. + +=item * + +In case of file inclusion, string reference of messages in PO files +(i.e. lines like C<#: en/titletoc.sgml:9460>) will be wrong. + +This is because I preprocess the file to protect the conditional inclusion +(i.e. the C<E<lt>! [ %foo [> and C<]]E<gt>> stuff) and some entities (like +C<&version;>) from B<onsgmls> because I want them verbatim to the generated +document. For that, I make a temp copy of the input file and do all the +changes I want to this before passing it to B<onsgmls> for parsing. + +So that it works, I replace the entities asking for a file inclusion by the +content of the given file (so that I can protect what needs to be in a subfile +also). But nothing is done so far to correct the references (i.e., filename +and line number) afterward. I'm not sure what the best thing to do is. + +=back + +=cut + +package Locale::Po4a::Sgml; + +use 5.16.0; +use strict; +use warnings; + +require Exporter; +use vars qw(@ISA @EXPORT); +@ISA = qw(Locale::Po4a::TransTractor); +@EXPORT = qw(); + +use Locale::Po4a::TransTractor; +use Locale::Po4a::Common; + +eval qq{use SGMLS}; +if ($@) { + die wrap_mod( + "po4a::sgml", + dgettext( + "po4a", + "The needed module SGMLS.pm was not found and needs to be installed. It can be found on the CPAN, in package libsgmls-perl on debian, etc." + ) + ); +} + +use File::Temp; + +my %debug = ( + 'tag' => 0, + 'generic' => 0, + 'entities' => 0, + 'refs' => 0, + 'onsgmls' => 0 +); + +my $xmlprolog = undef; # the '<?xml ... ?>' line if existing + +sub initialize { + my $self = shift; + my %options = @_; + + $self->{options}{'translate'} = ''; + $self->{options}{'section'} = ''; + $self->{options}{'indent'} = ''; + $self->{options}{'empty'} = ''; + $self->{options}{'verbatim'} = ''; + $self->{options}{'ignore'} = ''; + $self->{options}{'ignore-inclusion'} = ''; + + $self->{options}{'include-all'} = ''; + + $self->{options}{'force'} = ''; + + $self->{options}{'verbose'} = ''; + $self->{options}{'debug'} = ''; + + foreach my $opt ( keys %options ) { + if ( $options{$opt} ) { + die wrap_mod( "po4a::sgml", dgettext( "po4a", "Unknown option: %s" ), $opt ) + unless exists $self->{options}{$opt}; + $self->{options}{$opt} = $options{$opt}; + } + } + if ( $options{'debug'} ) { + foreach ( split /\s+/, $options{'debug'} ) { + die wrap_mod( "po4a::sgml", dgettext( "po4a", "Unknown debug category: %s. Known categories:\n%s" ), + $_, join( " ", keys %debug ) ) + unless exists $debug{$_}; + $debug{$_} = 1; + } + } +} + +sub read { + my ( $self, $filename, $refname, $charset ) = @_; + + push @{ $self->{DOCPOD}{infile} }, $filename; + $self->Locale::Po4a::TransTractor::read( $filename, $refname, $charset ); +} + +sub parse { + my $self = shift; + map { $self->parse_file($_) } @{ $self->{DOCPOD}{infile} }; +} + +# +# Filter out some uninteresting strings for translation +# +sub translate { + my ($self) = (shift); + my ( $string, $ref, $type ) = ( shift, shift, shift ); + my (%options) = @_; + + # don't translate entries composed of one entity + if ( ( ( $string =~ /^&[^;]*;$/ ) || ( $options{'wrap'} && $string =~ /^\s*&[^;]*;\s*$/ ) ) + && !( $self->{options}{'include-all'} ) ) + { + warn wrap_mod( "po4a::sgml", dgettext( "po4a", "msgid skipped to help translators (contains only an entity)" ) ) + unless $self->verbose() <= 0; + return $string . ( $options{'wrap'} ? "\n" : "" ); + } + + # don't translate entries composed of tags only + if ( $string =~ /^(((<[^>]*>)|\s)*)$/ + && !( $self->{options}{'include-all'} ) ) + { + warn wrap_mod( "po4a::sgml", dgettext( "po4a", "msgid skipped to help translators (contains only tags)" ) ) + unless $self->verbose() <= 0; + return $string . ( $options{'wrap'} ? "\n" : "" ); + } + + # don't translate entries composed of marked section tags only + if ( ( $string =~ /^(?:<!\s*\[\s*[^\[]+\s*\[|\]\s*]\s*>|\s)*$/ ) + && !( $self->{options}{'include-all'} ) ) + { + warn wrap_mod( + "po4a::sgml", + dgettext( + "po4a", + "msgid skipped to " + . "help translators (contains only opening or closing " + . "tags of marked sections)" + ), + $string + ) unless $self->verbose() <= 0; + return $string . ( $options{'wrap'} ? "\n" : "" ); + } + + $string = $self->SUPER::translate( $string, $ref, $type, %options ); + + $string = $self->post_trans( $string, $ref, $type ); + + return $string; +} + +sub post_trans { + my ( $self, $str, $ref, $type ) = @_; + + # Change ascii non-breaking space to an + my $nbs_out = "\xA0"; + my $enc_length = Encode::from_to( $nbs_out, "latin1", $self->get_out_charset ); + $str =~ s/\Q$nbs_out/ /g if defined $enc_length; + + return $str; +} + +# +# Make sure our cruft is removed from the file +# +sub pushline { + my ( $self, $line ) = @_; + $line =~ s/{PO4A-amp}/&/g; + $self->SUPER::pushline($line); +} + +sub set_tags_kind { + my $self = shift; + my (%kinds) = @_; + + foreach (qw(translate empty section verbatim ignore attributes qualify)) { + $self->{SGML}->{k}{$_} = $self->{options}{$_} ? $self->{options}{$_} . ' ' : ''; + + # Remove the default behavior for the tags defined with the + # options. + foreach my $k ( keys %kinds ) { + foreach my $t ( split( " ", $self->{SGML}->{k}{$_} ) ) { + $kinds{$k} =~ s/\b$t\b//; + } + } + } + + foreach ( keys %kinds ) { + die "po4a::sgml: internal error: set_tags_kind called with unrecognized arg $_" + if ( $_ !~ /^(translate|empty|verbatim|ignore|indent|attributes|qualify)$/ ); + + $self->{SGML}->{k}{$_} .= $kinds{$_}; + } +} + +# +# Do the actual work, using the SGMLS package and settings done elsewhere. +# +sub parse_file { + my ( $self, $mastername ) = @_; + my ($prolog); + + # Rewrite the file to: + # - protect optional inclusion marker (i.e. "<![ %str [" and "]]>") + # - protect entities from expansion (ie "&release;") + my $origfile = ""; + my $i = 0; + while ( $i < @{ $self->{TT}{doc_in} } ) { + $origfile .= ${ $self->{TT}{doc_in} }[$i]; + $i += 2; + } + + unless ( $self->{options}{'force'} ) { + + # Detect if we can find the DTD + my ( $tmpfh, $tmpfile ) = File::Temp::tempfile( + "po4a-XXXX", + SUFFIX => ".sgml", + DIR => File::Spec->tmpdir(), + UNLINK => 0 + ); + print $tmpfh $origfile; + close $tmpfh + or die wrap_mod( "po4a::sgml", dgettext( "po4a", "Cannot close tempfile: %s" ), $! ); + if ( system("onsgmls -p $tmpfile") ) { + unlink($tmpfile); + die wrap_mod( + "po4a::sgml", + dgettext( + "po4a", "Error while running onsgmls -p. Please check if onsgmls and the DTD are installed." + ) + ); + } + unlink($tmpfile); + } + + # Detect the XML pre-prolog + if ( $origfile =~ s/^(\s*<\?xml[^?]*\?>)// ) { + warn wrap_mod( + "po4a::sgml", + dgettext( + "po4a", + "Trying to handle a XML document as a SGML one. " + . "Feel lucky if it works, help us implementing a proper XML backend if it does not." + ), + $mastername + ) unless $self->verbose() <= 0; + $xmlprolog = $1; + } + + # Get the prolog + { + $prolog = $origfile; + my $lvl; # number of '<' seen without matching '>' + my $pos = 0; # where in the document (in chars) while detecting prolog boundaries + + unless ( $prolog =~ s/^(.*<!DOCTYPE).*$/$1/is ) { + die wrap_mod( + "po4a::sgml", + dgettext( + "po4a", + "This file is not a master SGML document (no DOCTYPE). " + . "It may be a file to be included by another one, in which case it should not be passed to po4a directly. Text from included files is extracted/translated when handling the master file including them." + ) + ); + } + $pos += length($prolog); + $lvl = 1; + while ( $lvl != 0 ) { + + # Eat comments in the prolog, since there may be some '>' or '<' in them. + if ( $origfile =~ m/^.{$pos}(<!--.*?-->)/s ) { + print "Found a comment in the prolog: $1\n" if ( $debug{'generic'} ); + $pos += length($1); + + # take care of the line numbers + my @a = split( /\n/, $1 ); + shift @a; # nb line - 1 + while ( defined( shift @a ) ) { + $prolog .= "\n"; + } + next; + } + + # Search the closing '>' + my ($c) = substr( $origfile, $pos, 1 ); + $lvl++ if ( $c eq '<' ); + $lvl-- if ( $c eq '>' ); + $prolog = "$prolog$c"; + $pos++; + } + } + + # Add the definition of new tags that will be used for the + # conditionnal inclusions + if ( $origfile =~ /^.*<!DOCTYPE[^[>]*\[/is ) { + $origfile =~ + s/^(.*<!DOCTYPE[^[>]*\[)/$1 <!ELEMENT PO4ABEG - o empty> <!ATTLIST PO4ABEG name CDATA #REQUIRED> <!ELEMENT PO4AEND - o empty>/is; + } + + print STDERR "PROLOG=$prolog\n------------\n" if ( $debug{'generic'} ); + + # Configure the tags for this dtd + if ( $prolog =~ /debiandoc/i ) { + $self->set_tags_kind( + "translate" => "author version abstract title" . "date copyrightsummary heading p " . "example tag title", + "empty" => "date ref manref url toc", + "verbatim" => "example", + "ignore" => "package prgn file tt em var " + . "name email footnote po4aend po4abeg " + . "strong ftpsite ftppath qref", + "indent" => "appendix " . "book " + . "chapt copyright " + . "debiandoc " + . "enumlist " . "item " . "list " + . "sect sect1 sect2 sect3 sect4 " + . "tag taglist titlepag toc" + ); + + } elsif ( $prolog =~ /docbook/i ) { + $self->set_tags_kind( + "translate" => "abbrev appendixinfo artheader attribution " + . "biblioentry biblioset " + . "chapterinfo collab collabname confdates confgroup conftitle " . "date " + . "edition editor entry example " + . "figure " + . "glosssee glossseealso glossterm " + . "holder " + . "member msgaud msglevel msgorig " + . "orgdiv orgname othername " + . "pagenums para phrase pubdate publishername primary " + . "refclass refdescriptor refentrytitle refmiscinfo refname refpurpose releaseinfo remark revnumber revremark " + . "screeninfo seg secondary see seealso segtitle simpara substeps subtitle synopfragmentref synopsis " + . "term tertiary title titleabbrev " + . "contrib epigraph", + "empty" => "audiodata colspec graphic imagedata textdata sbr spanspec videodata xref", + "indent" => "abstract answer appendix article articleinfo audioobject author authorgroup " + . "bibliodiv bibliography blockquote blockinfo book bookinfo bridgehead " + . "callout calloutlist caption caution chapter copyright " + . "dedication docinfo " + . "entry " + . "formalpara " + . "glossary glossdef glossdiv glossentry glosslist group " + . "imageobject important index indexterm informaltable itemizedlist " + . "keyword keywordset " + . "legalnotice listitem lot " + . "mediaobject msg msgentry msginfo msgexplan msgmain msgrel msgsub msgtext " . "note " + . "objectinfo orderedlist " + . "part partintro preface procedure publisher " + . "qandadiv qandaentry qandaset question " + . "reference refentry refentryinfo refmeta refnamediv refsect1 refsect1info refsect2 refsect2info refsect3 refsect3info refsection refsectioninfo refsynopsisdiv refsynopsisdivinfo revision revdescription row " + . "screenshot sect1 sect1info sect2 sect2info sect3 sect3info sect4 sect4info sect5 sect5info section sectioninfo seglistitem segmentedlist set setindex setinfo shortcut simplelist simplemsgentry simplesect step synopfragment " + . "table tbody textobject tgroup thead tip toc " + . "variablelist varlistentry videoobject " + . "warning", + "verbatim" => "address cmdsynopsis holder literallayout programlisting " + . "refentrytitle refname refpurpose screen term title", + "ignore" => "acronym action affiliation anchor application arg author authorinitials " + . "city citation citerefentry citetitle classname co command computeroutput constant corpauthor country " + . "database po4abeg po4aend " + . "email emphasis envar errorcode errorname errortext errortype exceptionname " + . "filename firstname firstterm footnote footnoteref foreignphrase function " + . "glossterm guibutton guiicon guilabel guimenu guimenuitem guisubmenu " + . "hardware " + . "indexterm informalexample inlineequation inlinegraphic inlinemediaobject interface interfacename isbn " + . "keycap keycode keycombo keysym " + . "link lineannotation literal " + . "manvolnum markup medialabel menuchoice methodname modespec mousebutton " + . "nonterminal " + . "olink ooclass ooexception oointerface option optional othercredit " + . "parameter personname phrase productname productnumber prompt property pubsnumber " + . "quote " + . "remark replaceable returnvalue revhistory " + . "sgmltag sidebar structfield structname subscript superscript surname symbol systemitem " + . "token trademark type " + . "ulink userinput " + . "varname volumenum " + . "wordasword " . "xref " . "year", + "attributes" => "<(article|book)>lang" + ); + + } else { + if ( $self->{options}{'force'} ) { + warn wrap_mod( "po4a::sgml", + dgettext( "po4a", "DTD of this file is unknown, but proceeding as requested." ) ); + $self->set_tags_kind(); + } else { + die wrap_mod( "po4a::sgml", + dgettext( "po4a", "DTD of this file is unknown. (supported: DebianDoc, DocBook). The prolog follows:" ) + . "\n$prolog" ); + } + } + + # Hash of the file entities that won't be included + my %ignored_inclusion = (); + foreach ( split / /, $self->{options}{'ignore-inclusion'} ) { + $ignored_inclusion{$_} = 1; + } + + # Prepare the reference indirection stuff + my @refs; + my $length = ( $origfile =~ tr/\n/\n/ ); + print "XX Prepare reference indirection stuff\n" if $debug{'refs'}; + for ( my $i = 1 ; $i <= $length ; $i++ ) { + push @refs, "$mastername:$i"; + print "$mastername:$i\n" if $debug{'refs'}; + } + + # protect the conditional inclusions in the file + $origfile =~ s/<!\[\s*IGNORE\s*\[/{PO4A-beg-IGNORE}/g; # cond. incl. starts + $origfile =~ s/<!\[\s*CDATA\s*\[/{PO4A-beg-CDATA}/g; # cond. incl. starts + $origfile =~ s/<!\[\s*RCDATA\s*\[/{PO4A-beg-RCDATA}/g; # cond. incl. starts + $origfile =~ s/<!\[\s*([^\[\s]+)\s*\[/<po4abeg name="$1">/g; # cond. incl. starts + $origfile =~ s/\]\]>/<po4aend>/g; # cond. incl. end + + # Remove <![ IGNORE [ sections + # FIXME: we don't support included PO4A-beg- + my $tmp1 = $origfile; + while ( $tmp1 =~ m/^(.*?)(\{PO4A-beg-\s*IGNORE\s*}(?:.+?)<po4aend>)(.*)$/s ) { + my ( $begin, $ignored, $end ) = ( $1, $2, $3 ); + my @begin = split( /\n/, $begin ); + my @ignored = split( /\n/, $ignored ); + my $pre = scalar @begin; + my $len = ( scalar @ignored ) - 1; + $pre++ if ( $begin =~ /\n$/s ); + $len++ if ( $end =~ /^\n/s ); + + # remove the references of the ignored lines + splice @refs, $pre + 1, $len - 1; + + # remove the lines + $tmp1 = $begin . $end; + } + $origfile = $tmp1; + + # The <, >, and & in a CDATA must be escaped because they do not + # correspond to tags or entities delimiters. + $tmp1 = $origfile; + $origfile = ""; + while ( $tmp1 =~ m/^(.*?{PO4A-beg-\s*(?:CDATA|RCDATA)\s*})(.+?)(<po4aend>.*)$/s ) { + my ( $begin, $tmp ) = ( $1, $2 ); + $tmp1 = $3; + $tmp =~ s/</{PO4A-lt}/gs; + $tmp =~ s/>/{PO4A-gt}/gs; + $tmp =~ s/&/{PO4A-amp}/gs; + $origfile .= $begin . $tmp; + } + $origfile .= $tmp1; + + # Deal with the %entities; in the prolog. God damn it, this code is gross! + # Try hard not to change the number of lines to not fuck up the references + my %prologentincl; + my $moretodo = 1; + PROLOGENTITY: while ($moretodo) { # non trivial loop to deal with recursive inclusion + $moretodo = 0; + + # Unprotect not yet defined inclusions + $prolog =~ s/{PO4A-percent}/%/sg; + print STDERR "prolog=>>>>$prolog<<<<\n" + if ( $debug{'entities'} ); + while ( $prolog =~ /(.*?)<!ENTITY\s*%\s*(\S*)\s+SYSTEM\s*"([^>"]*)"\s*>(.*)$/is ) { #})"{ (Stupid editor) + print STDERR "Seen the definition entity of prolog inclusion '$2' (=$3)\n" + if ( $debug{'entities'} ); + + # Preload the content of the entity. + my $key = $2; + my $filename = $3; + my $origfilename = $filename; + my ( $begin, $end ) = ( $1, $4 ); + if ( $filename !~ m%^/% && $mastername =~ m%/% ) { + my $dir = $mastername; + $dir =~ s%/[^/]*$%%; + $filename = "$dir/$filename"; + + # origfile also needs to be fixed otherwise onsgmls won't + # find the file. + $origfile =~ s/(<!ENTITY\s*%\s*\Q$key\E\s+SYSTEM\s*")\Q$origfilename\E("\s*>)/$1$filename$2/gsi; + } + if ( defined $ignored_inclusion{$key} or !-e $filename ) { + + # We won't expand this entity. + # And we avoid onsgmls to do so. + $prolog = "$begin<!--{PO4A-ent-beg-$key}$filename" . "{PO4A-ent-end}-->$end"; + } else { + $prolog = $begin . $end; + ( -e $filename && open IN, "<$filename" ) + || die wrap_mod( "po4a::sgml", dgettext( "po4a", "Cannot open %s (content of entity %s%s;): %s" ), + $filename, '%', $key, $! ); + local $/ = undef; + $prologentincl{$key} = <IN>; + close IN; + print STDERR "Content of \%$key; is $filename (" + . ( $prologentincl{$key} =~ tr/\n/\n/ ) + . " lines long)\n" + if ( $debug{'entities'} ); + print STDERR "content: " . $prologentincl{$key} . "\n" + if ( $debug{'entities'} ); + $moretodo = 1; + next PROLOGENTITY; + } + } + while ( $prolog =~ /(.*?)<!ENTITY\s*%\s*(\S*)\s*"([^>"]*)"\s*>(.*)$/is ) { #})"{ (Stupid editor) + print STDERR "Seen the definition entity of prolog definition '$2' (=$3)\n" + if ( $debug{'entities'} ); + + # Preload the content of the entity. + my $key = $2; + $prolog = $1 . $4; + $prologentincl{$key} = $3; + print STDERR "content: " . $prologentincl{$key} . "\n" + if ( $debug{'entities'} ); + $moretodo = 1; + next PROLOGENTITY; + } + while ( $prolog =~ /^(.*?)%([^;\s]*);(.*)$/s ) { + my ( $pre, $ent, $post ) = ( $1, $2, $3 ); + + # Yeah, right, the content of the entity can be defined in a not yet loaded entity + # It's easy to build a weird case where all that shit collapses poorly. But why the + # hell are you using those strange constructs in your document, damn it? + print STDERR "Seen prolog inclusion $ent\n" if ( $debug{'entities'} ); + if ( defined( $prologentincl{$ent} ) ) { + $prolog = $pre . $prologentincl{$ent} . $post; + print STDERR "Change \%$ent; to its content in the prolog\n" + if $debug{'entities'}; + $moretodo = 1; + } else { + + # AAAARGH stupid document using %bla; and having then defined in another inclusion! + # Protect it for this pass, and unprotect it on next one + print STDERR "entity $ent not defined yet ?!\n" + if $debug{'entities'}; + $prolog = "$pre" . '{PO4A-percent}' . "$ent;$post"; + } + } + } + $prolog =~ s/<!--\{PO4A-ent-beg-(.*?)\}(.*?)\{PO4A-ent-end\}-->/<!ENTITY % $1 SYSTEM "$2">/g; + + # Unprotect undefined inclusions, and die of them + $prolog =~ s/\{PO4A-percent\}/%/sg; + if ( $prolog =~ /%([^;\s]*);/ ) { + die wrap_mod( "po4a::sgml", dgettext( "po4a", "unrecognized prolog inclusion entity: %%%s;" ), $1 ) + unless ( $ignored_inclusion{$1} ); + } + + # Protect &entities; (all but the ones asking for a file inclusion) + # search the file inclusion entities + my %entincl; + my $searchprolog = $prolog; + while ( $searchprolog =~ /(.*?)<!ENTITY\s+(\S*)\s+SYSTEM\s*"([^>"]*)"\s*>(.*)$/is ) { #})"{ + print STDERR "Seen the entity of inclusion $2 (=$3)\n" + if ( $debug{'entities'} ); + my $key = $2; + my $filename = $3; + my $origfilename = $filename; + $searchprolog = $4; + if ( $filename !~ m%^/% && $mastername =~ m%/% ) { + my $dir = $mastername; + $dir =~ s%/[^/]*$%%; + $filename = "$dir/$filename"; + + # origfile also needs to be fixed otherwise onsgmls won't find + # the file. + $origfile =~ s/(<!ENTITY\s+$key\s+SYSTEM\s*")\Q$origfilename\E("\s*>)/$1$filename$2/gsi; + } + if ( ( not defined $ignored_inclusion{$2} ) and ( -e $filename ) ) { + $entincl{$key}{'filename'} = $filename; + + # Preload the content of the entity + ( -e $filename && open IN, "<$filename" ) + || die wrap_mod( "po4a::sgml", dgettext( "po4a", "Cannot open %s (content of entity %s%s;): %s" ), + $filename, '&', $key, $! ); + local $/ = undef; + $entincl{$key}{'content'} = <IN>; + close IN; + $entincl{$key}{'length'} = ( $entincl{$key}{'content'} =~ tr/\n/\n/ ); + print STDERR "read $filename (content of \&$key;, $entincl{$key}{'length'} lines long)\n" + if ( $debug{'entities'} ); + } + } + + # Change the entities including files in the document + my $dosubstitution = 1; + while ($dosubstitution) { + $dosubstitution = 0; + foreach my $key ( keys %entincl ) { + + # The external entity can be referenced as &key; or &key + # In the second case, we must differentiate &key and &key2 + while ( $origfile =~ /^(.*?)&$key(;.*$|[^-_:.A-Za-z0-9].*$|$)/s ) { + + # Since we will include a new file, we + # must do a new round of substitutions. + $dosubstitution = 1; + my ( $begin, $end ) = ( $1, $2 ); + $end = "" unless ( defined $end ); + $end =~ s/^;//s; + + if ( $begin =~ m/.*<!--(.*?)$/s and $1 !~ m/-->/s ) { + + # This entity is commented. Just remove it. + $origfile = $begin . $end; + next; + } + + # add the refs + my $len = $entincl{$key}{'length'}; # number added by the inclusion + my $pre = ( $begin =~ tr/\n/\n/ ); # number of \n + my $post = ( $end =~ tr/\n/\n/ ); + print "XX Add a ref. pre=$pre; len=$len; post=$post\n" + if $debug{'refs'}; + + # Keep a reference of inclusion position in main file + my $main = $refs[$pre]; + + # Remove the references for the lines after the inclusion + # point. + my @endrefs = splice @refs, $pre + 1; + + # Add the references of the added lines + my $i; + for ( $i = 0 ; $i < $len ; $i++ ) { + $refs[ $i + $pre ] = "$main $entincl{$key}{'filename'}:" . ( $i + 1 ); + } + + if ( $begin !~ m/\n[ \t]*$/s ) { + if ( $entincl{$key}{'content'} =~ m/^[ \t]*\n/s ) { + + # There is nothing in the first line of the + # included file, and something on the line before + # the inclusion The line reference will be more + # informative like this: + $refs[$pre] = $main; + } + } + if ( $end !~ s/^[ \t]*\n//s ) { + if ( $entincl{$key}{'content'} =~ m/\n[ \t]*$/s ) { + + # There is something on the line after the + # inclusion, and there is an end of line at the + # end of the included file. We must add the line + # reference of the remainder on the line: + push @refs, $main; + } + } + + # Append the references removed earlier (lines after the + # inclusion point). + push @refs, @endrefs; + + # Do the substitution + $origfile = "$begin" . $entincl{$key}{'content'} . "$end"; + print STDERR "substitute $key\n" if ( $debug{'entities'} ); + } + } + } + $origfile =~ s/\G(.*?)&([A-Za-z_:][-_:.A-Za-z0-9]*|#[0-9]+|#x[0-9a-fA-F]+)\b/$1\{PO4A-amp\}$2/gs; + if ( defined($xmlprolog) && length($xmlprolog) ) { + $origfile =~ s/\/>/\{PO4A-close\}>/gs; + } + + if ( $debug{'refs'} ) { + print "XX Resulting shifts\n"; + for ( my $i = 0 ; $i < scalar @refs ; $i++ ) { + print "$mastername:" . ( $i + 1 ) . " -> $refs[$i]\n"; + } + } + + my ( $tmpfh, $tmpfile ) = File::Temp::tempfile( + "po4a-XXXX", + SUFFIX => ".sgml", + DIR => File::Spec->tmpdir(), + UNLINK => 0 + ); + print $tmpfh $origfile; + close $tmpfh or die wrap_mod( "po4a::sgml", dgettext( "po4a", "Cannot close tempfile: %s" ), $! ); + + my $cmd = "onsgmls -l -E 0 -wno-valid $tmpfile" . ( $debug{'onsgmls'} ? "" : " 2>/dev/null" ) . " |"; + print STDERR "CMD=$cmd\n" if ( $debug{'generic'} or $debug{'onsgmls'} ); + + open( IN, $cmd ) || die wrap_mod( "po4a::sgml", dgettext( "po4a", "Cannot run onsgmls: %s" ), $! ); + + # The kind of tags + my ( %translate, %empty, %verbatim, %indent, %exist, %attribute, %qualify ); + foreach ( split( / /, ( $self->{SGML}->{k}{'translate'} || '' ) ) ) { + $translate{ uc $_ } = 1; + $indent{ uc $_ } = 1; + $exist{ uc $_ } = 1; + } + foreach ( split( / /, ( $self->{SGML}->{k}{'empty'} || '' ) ) ) { + $empty{ uc $_ } = 1; + $exist{ uc $_ } = 1; + } + foreach ( split( / /, ( $self->{SGML}->{k}{'verbatim'} || '' ) ) ) { + $translate{ uc $_ } = 1; + $verbatim{ uc $_ } = 1; + $exist{ uc $_ } = 1; + } + foreach ( split( / /, ( $self->{SGML}->{k}{'indent'} || '' ) ) ) { + $translate{ uc $_ } = 1; + $indent{ uc $_ } = 1; + $exist{ uc $_ } = 1; + } + foreach ( split( / /, ( $self->{SGML}->{k}{'ignore'} ) || '' ) ) { + $exist{ uc $_ } = 1; + } + foreach ( split( / /, ( $self->{SGML}->{k}{'attributes'} || '' ) ) ) { + my ( $attr, $tags ); + if (m/(^.*>)(\w+)/) { + $attr = uc $2; + $tags = $1; + } else { + $attr = uc $_; + $tags = ".*"; + } + if ( exists $attribute{$attr} ) { + $attribute{$attr} .= "|$tags"; + } else { + $attribute{$attr} = $tags; + } + } + foreach ( split( / /, ( $self->{SGML}->{k}{'qualify'} ) || '' ) ) { + $qualify{ uc $_ } = 1; + $attribute{ uc $_ } = '.*' unless exists $attribute{ uc $_ }; + } + + # What to do before parsing + + # push the XML prolog if existing + $self->pushline( $xmlprolog . "\n" ) if ( defined($xmlprolog) && length($xmlprolog) ); + + # Put the prolog into the file, allowing for entity definition translation + # <!ENTITY myentity "definition_of_my_entity"> + # and push("<!ENTITY myentity \"".$self->translate("definition_of_my_entity") + if ( $prolog =~ m/(.*?\[)(.*)(\]>)/s ) { + warn "Pre=~~$1~~;Post=~~$3~~\n" if ( $debug{'entities'} ); + $self->pushline( $1 . "\n" ) if ( length($1) ); + $prolog = $2; + my ($post) = $3; + while ( $prolog =~ m/^(.*?)<!ENTITY\s+(\S*)\s+"([^"]*)"\s*>(.*)$/is ) { #" ){ + $self->pushline($1) if length($1); + $self->pushline( "<!ENTITY $2 \"" . $self->translate( $3, "", "definition of entity \&$2;" ) . "\">" ); + warn "Seen text entity $2\n" if ( $debug{'entities'} ); + $prolog = $4; + } + $prolog .= $post; + $self->pushline( $prolog . "\n" ) if ( length($prolog) ); + } else { + warn "No entity declaration detected in ~~$prolog~~...\n" if ( $debug{'entities'} ); + $self->pushline($prolog) if length($prolog); + } + + # The parse object. + # Damn SGMLS. It makes me do crude things. + no strict "subs"; + my $parse = new SGMLS(IN); + use strict; + + # Some values for the parsing + my @open = (); # opened translation container tags + my $verb = 0; # can we wrap or not + my $verb_last_ref; + my $seenfootnote = 0; + my $indent = 0; # indent level + my $lastchar = ''; # + my $buffer = ""; # what we will soon handle + + # Keep a reference to the last line indicated by onsgmls + my $line = 0; + + # Unfortunately, onsgmls do not mention all the line changes. We have + # to keep track of the number of lines seen in the "record ends". + my $adds = 0; + + # If the last line received contains only spaces, do not take it into + # account for the line reference of the paragraph. + my $empty_last_cdata = 0; + + # run the appropriate handler for each event + EVENT: while ( my $event = $parse->next_event ) { + + # get the line reference to build po entries + if ( $line != $parse->line ) { + + # onsgmls informs us of that the line changed. Reset $adds and + # $empty_last_cdata + $adds = 0; + $empty_last_cdata = 0; + $line = $parse->line; + } + my $ref = $refs[ $parse->line - 1 + $adds - $empty_last_cdata ]; + + # In verbatim mode, keep the current line reference. + if ($verb) { + $ref = $refs[ $parse->line - 1 ]; + } + my $type; + + if ( $event->type eq 'start_element' ) { + die wrap_ref_mod( $ref, "po4a::sgml", dgettext( "po4a", "Unknown tag %s" ), $event->data->name ) + unless $exist{ $event->data->name }; + + $lastchar = ">"; + + # Which tag did we see? + my $tag = ''; + $tag .= '<' . lc( $event->data->name() ); + foreach my $attr ( sort $event->data->attribute_names() ) { + + my $val = ${ $event->data->attributes() }{$attr}; + my $value = $val->value(); + + # if ($val->type() eq 'IMPLIED') { + # $tag .= ' '.lc($attr).'="'.lc($attr).'"'; + # } els + if ( $val->type() eq 'CDATA' + || $val->type() eq 'IMPLIED' ) + { + if ( defined $value && length($value) ) { + my $lattr = lc $attr; + my $uattr = uc $attr; + if ( exists $attribute{$uattr} ) { + my $context = ""; + foreach my $o (@open) { + next if ( !defined $o or $o =~ m%^</% ); + $o =~ s/ .*/>/; + $context .= $o; + } + $context = join( "", $context, "<", lc( $event->data->name() ), ">" ); + if ( $context =~ /^($attribute{$uattr})$/ ) { + if ( $qualify{$uattr} ) { + my $translated = + $self->translate( "$lattr=$value", $ref, "attribute $context$lattr" ); + if ( $translated =~ s/^$lattr=// ) { + $value = $translated; + } else { + die wrap_mod( "po4a::sgml", + dgettext( "po4a", "bad translation '%s' for '%s' in '%s'" ), + $translated, $context . $lattr, $ref ); + } + } else { + $value = $self->translate( $value, $ref, "attribute $context$lattr" ); + } + } + } + if ( $value =~ m/\"/ ) { + $value = "'" . $value . "'"; + } else { + $value = '"' . $value . '"'; + } + $tag .= " $lattr=$value"; + } + } elsif ( $val->type() eq 'NOTATION' ) { + } else { + $tag .= ' ' . lc($attr) . '="' . lc($value) . '"' + if ( defined $value && length($value) ); + } + } + $tag .= '>'; + + # debug + print STDERR "Seen $tag, open level=" . ( scalar @open ) . ", verb=$verb\n" + if ( $debug{'tag'} ); + + if ( $event->data->name() eq 'FOOTNOTE' ) { + + # we want to put the <para> inside the <footnote> in the same msgid + $seenfootnote = 1; + } + + if ($seenfootnote) { + $buffer .= $tag; + next EVENT; + } + if ( $translate{ $event->data->name() } ) { + + # Build the type + if ( scalar @open > 0 ) { + $type = $open[$#open] . $tag; + } else { + $type = $tag; + } + + # do the job + if ( @open > 0 ) { + $self->end_paragraph( $buffer, $ref, $type, $verb, $indent, @open ); + } else { + $self->pushline($buffer) if $buffer; + } + $buffer = ""; + push @open, $tag; + } elsif ( $indent{ $event->data->name() } ) { + die wrap_ref_mod( $ref, "po4a::sgml", + dgettext( "po4a", "Closing tag for a translation container missing before %s" ), $tag ) + if ( scalar @open ); + } + + if ( $verbatim{ $event->data->name() } ) { + $verb++; + + # Keep a reference to the line that openned the verbatim + # section. This is needed to check if its data starts on + # the same line. + $verb_last_ref = $ref; + } + if ($verb) { + + # Tag in a verbatim section. Check if it appeared at + # the same line than the previous data. If not, it + # means that an end of line must be added to the + # buffer. + if ( $ref ne $verb_last_ref ) { + + # FIXME: Does it work if $verb > 1 + $buffer .= "\n"; + $verb_last_ref = $ref; + } + } + + if ( $indent{ $event->data->name() } ) { + + # push the indenting space only if not in verb before that tag + # push trailing "\n" only if not in verbose afterward + $self->pushline( ( $verb > 1 ? "" : ( " " x $indent ) ) . $tag . ( $verb ? "" : "\n" ) ); + $indent++ unless $empty{ $event->data->name() }; + } else { + $tag =~ s/<po4abeg name="([^"]+)">/<![ $1 [/; #"; Stupid emacs + $tag =~ s/<po4aend>/]]>/; + $buffer .= $tag; + } + } # end of type eq 'start_element' + + elsif ( $event->type eq 'end_element' ) { + my $tag = ( + $empty{ $event->data->name() } + ? '' + : '</' . lc( $event->data->name() ) . '>' + ); + + if ($verb) { + + # Tag in a verbatim section. Check if it appeared at + # the same line than the previous data. If not, it + # means that an end of line must be added to the + # buffer. + if ( $ref ne $verb_last_ref ) { + + # FIXME: Does it work if $verb > 1 + $buffer .= "\n"; + $verb_last_ref = $ref; + } + } + print STDERR "Seen $tag, level=" . ( scalar @open ) . ", verb=$verb\n" + if ( $debug{'tag'} ); + + $lastchar = ">"; + + if ( $event->data->name() eq 'FOOTNOTE' ) { + + # we want to put the <para> inside the <footnote> in the same msgid + $seenfootnote = 0; + } + + if ($seenfootnote) { + $buffer .= $tag; + next EVENT; + } + if ( $translate{ $event->data->name() } ) { + $type = $open[$#open] . $tag; + $self->end_paragraph( $buffer, $ref, $type, $verb, $indent, @open ); + $buffer = ""; + pop @open; + if ( @open > 0 ) { + pop @open; + push @open, $tag; + } + } elsif ( $indent{ $event->data->name() } ) { + die wrap_ref_mod( $ref, "po4a::sgml", + dgettext( "po4a", "Closing tag for a translation container missing before %s" ), $tag ) + if ( scalar @open ); + } + + unless ( $event->data->name() =~ m/^(PO4ABEG|PO4AEND)$/si ) { + if ( $indent{ $event->data->name() } ) { + $indent--; + + # add indenting space only when not in verbatim + # add the tailing \n only if out of verbatim after that tag + $self->pushline( ( $verb ? "" : ( " " x $indent ) ) . $tag . ( $verb > 1 ? "" : "\n" ) ); + } else { + $buffer .= $tag; + } + $verb-- if $verbatim{ $event->data->name() }; + } + } # end of type eq 'end_element' + + elsif ( $event->type eq 'cdata' ) { + my $cdata = $event->data; + $empty_last_cdata = ( $cdata =~ m/^\s*$/ ); + $cdata =~ s/{PO4A-lt}/</g; + $cdata =~ s/{PO4A-gt}/>/g; + $cdata =~ s/{PO4A-amp}/&/g; + $cdata =~ s/{PO4A-end}/\]\]>/g; + $cdata =~ s/{PO4A-beg-([^\}]+)}/<!\[$1\[/g; + if ($verb) { + + # Check if this line of data appear on the same line + # than the previous tag. If not, append an end of line + # to the buffer. + if ( $ref ne $verb_last_ref ) { + $buffer .= "\n"; + $verb_last_ref = $ref; + } + } else { + $cdata =~ s/\\t/ /g; + $cdata =~ s/\s+/ /g; + $cdata =~ s/^\s//s if $lastchar eq ' '; + } + $lastchar = substr( $cdata, -1, 1 ); + $buffer .= $cdata; + if ( defined($xmlprolog) && length($xmlprolog) ) { + $buffer =~ s/>PO4A-close\}>/\/>/sg; + $buffer =~ s/PO4A-close\}>//sg; # This should not be necessary + } + } # end of type eq 'cdata' + + elsif ( $event->type eq 'sdata' ) { + my $sdata = $event->data; + $sdata =~ s/^\[//; + $sdata =~ s/\s*\]$//; + $lastchar = substr( $sdata, -1, 1 ); + $buffer .= '&' . $sdata . ';'; + } # end of type eq 'sdata' + + elsif ( $event->type eq 're' ) { + + # End of record, the line reference shall be incremented. + $adds += 1; + if ($verb) { + + # Check if this line of data appear on the same line + # than the previous tag. If not, append an end of line + # to the buffer. + if ( $ref ne $verb_last_ref ) { + $buffer .= "\n"; + $verb_last_ref = $ref; + } + $buffer .= "\n"; + } elsif ( $lastchar ne ' ' ) { + $buffer .= " "; + } + $lastchar = ' '; + } #end of type eq 're' + + elsif ( $event->type eq 'conforming' ) { + + } elsif ( $event->type eq 'pi' ) { + my $pi = $event->data; + $buffer .= "<?$pi>"; + + } else { + die wrap_ref_mod( + $refs[ $parse->line ], + "po4a::sgml", dgettext( "po4a", "Unknown SGML event type: %s" ), + $event->type + ); + } + } + + # What to do after parsing + $self->pushline($buffer); + close(IN); + if ( $? != 0 and $self->verbose() > 0 ) { + warn wrap_mod( + "po4a::sgml", + dgettext( + "po4a", + "Warning: onsgmls produced some errors. " + . "This is usually caused by po4a, which modifies the input " + . "and restores it afterwards, causing the input of onsgmls " + . "to be invalid. This is usually safe, but you may wish " + . "to verify the generated document with onsgmls -wno-valid." + ) + ); + unless ( $debug{'onsgmls'} ) { + warn wrap_mod( + "po4a::sgml", + dgettext( + "po4a", + "To see the error message, " + . "rerun po4a with this additional argument:\n" + . " -o debug=onsgmls" + ) + ); + } + } + unlink($tmpfile) unless ( $debug{'refs'} or $debug{'onsgmls'} ); +} + +sub end_paragraph { + my ( $self, $para, $ref, $type, $verb, $indent ) = ( shift, shift, shift, shift, shift, shift ); + my (@open) = @_; + die "Internal error: no paragraph to end here!!" + unless scalar @open; + + return unless defined($para) && length($para); + + if ( ( $para =~ m/^\s*$/s ) and ( not $verb ) ) { + + # In non-verbatim environments, a paragraph with only spaces is + # like an empty paragraph + return; + } + + # unprotect &entities; + $para =~ s/{PO4A-amp}/&/g; + + # remove the name"\|\|" onsgmls added as attributes + $para =~ s/ name=\"\\\|\\\|\"//g; + $para =~ s/ moreinfo=\"none\"//g; + + # Extract the leading and trailing spaces. They will be restored only + # in verbatim environments. + my ( $leading_spaces, $trailing_spaces ) = ( "", "" ); + if ($verb) { + + # In the verbatim mode, we can ignore empty lines, but not the + # leading spaces or tabulations. Otherwise, the PO will look + # weird. + if ( $para =~ m/^(\s*\n)(.*?)(\s*)$/s ) { + $leading_spaces = $1; + $para = $2; + $trailing_spaces = $3; + } + } else { + if ( $para =~ m/^(\s*)(.*?)(\s*)$/s ) { + $leading_spaces = $1; + $para = $2; + $trailing_spaces = $3; + } + } + + $para = $self->translate( + $para, $ref, $type, + 'wrap' => !$verb, + 'wrapcol' => ( 75 - $indent ) + ); + + if ($verb) { + $para = $leading_spaces . $para . $trailing_spaces; + } else { + $para =~ s/^\s+//s; + my $toadd = " " x ( $indent + 1 ); + $para =~ s/^/$toadd/mg; + $para .= "\n"; + } + + $self->pushline($para); +} + +1; + +=head1 AUTHORS + +This module is an adapted version of sgmlspl (SGML postprocessor for the +ONSGMLS parser) which was: + + Copyright © 1995 David Megginson <dmeggins@aix1.uottawa.ca> + +The adaptation for po4a was done by: + + Denis Barbier <barbier@linuxfr.org> + Martin Quinson (mquinson#debian.org) + +=head1 COPYRIGHT AND LICENSE + + Copyright © 1995 David Megginson <dmeggins@aix1.uottawa.ca>. + Copyright © 2002-2005 SPI, Inc. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). diff --git a/lib/Locale/Po4a/TeX.pm b/lib/Locale/Po4a/TeX.pm new file mode 100644 index 0000000..b68adab --- /dev/null +++ b/lib/Locale/Po4a/TeX.pm @@ -0,0 +1,1756 @@ +#!/usr/bin/perl -w + +# Copyright © 2004, 2005 Nicolas FRANÇOIS <nicolas.francois@centraliens.net> +# +# This file is part of po4a. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with po4a; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::TeX - convert TeX documents and derivatives from/to PO files + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::TeX is a module to help the translation of TeX documents into +other [human] languages. It can also be used as a base to build modules for +TeX-based documents. + +Users should probably use the LaTeX module, which inherits from the TeX module +and contains the definitions of common LaTeX commands. + +=head1 TRANSLATING WITH PO4A::TEX + +This module can be used directly to handle generic TeX documents. +This will split your document in smaller blocks (paragraphs, verbatim +blocks, or even smaller like titles or indexes). + +There are some options (described in the next section) that can customize +this behavior. If this doesn't fit to your document format you're encouraged +to write your own derivative module from this, to describe your format's details. +See the section B<WRITING DERIVATIVE MODULES> below, for the process description. + +This module can also be customized by lines starting with "% po4a:" in the +TeX file. This process is described in the B<INLINE CUSTOMIZATION> section. + +=cut + +package Locale::Po4a::TeX; + +use 5.16.0; +use strict; +use warnings; + +require Exporter; +use vars qw(@ISA @EXPORT); +@ISA = qw(Locale::Po4a::TransTractor); +@EXPORT = qw(%commands %environments + $RE_ESCAPE $ESCAPE $RE_VERBATIM + $no_wrap_environments + $verbatim_environments + %separated_command + %separated_environment + %translate_buffer_env + &add_comment + &generic_command + ®ister_generic_command + ®ister_generic_environment); + +use Locale::Po4a::TransTractor; +use Locale::Po4a::Common; +use File::Basename qw(dirname); +use Carp qw(croak); + +use Encode; +use Encode::Guess; + +# hash of known commands and environments, with parsing sub. +# See end of this file +use vars qw(%commands %environments); + +# hash to describe the number of parameters and which one have to be +# translated. Used by generic commands +our %command_parameters = (); +our %environment_parameters = (); + +# hash to describe the separators of environments. +our %env_separators = (); + +# The escape character used to introduce commands. +our $RE_ESCAPE = "\\\\"; +our $ESCAPE = "\\"; + +# match the beginning of a verbatim block +our $RE_VERBATIM = "\\\\begin\\{(?:verbatim)\\*?\\}"; + +# match the beginning of a comment. +# NOTE: It must contain a group, with chars preceding the comment +our $RE_PRE_COMMENT = "(?<!\\\\)(?:\\\\\\\\)*"; +our $RE_COMMENT = "\\\%"; + +# Space separated list of environments that should not be re-wrapped. +our $no_wrap_environments = "verbatim"; +our $verbatim_environments = "verbatim"; + +# hash with the commands that have to be separated (or have to be joined). +# 3 modes are currently used: +# '*' The command is separated if it appear at an extremity of a +# paragraph +# '+' The command is separated, but its arguments are joined together +# with the command name for the translation +# '-' The command is not separated, unless it appear alone on a paragraph +# (e.g. \strong) +our %separated_command = (); +our %separated_environment = (); + +=head1 OPTIONS ACCEPTED BY THIS MODULE + +These are this module's particular options: + +=over 4 + +=item B<debug> + +Activate debugging for some internal mechanisms of this module. +Use the source to see which parts can be debugged. + +=item B<no_wrap> + +Comma-separated list of environments which should not be re-wrapped. + +Note that there is a difference between verbatim and no_wrap environments. +There is no command and comments analysis in verbatim blocks. + +If this environment was not already registered, po4a will consider that +this environment does not take any parameters. + +=item B<exclude_include> + +Colon-separated list of files that should not be included by \input and +\include. + +=item B<definitions> + +The name of a file containing definitions for po4a, as defined in the +B<INLINE CUSTOMIZATION> section. +You can use this option if it is not possible to put the definitions in +the document being translated. + +=item B<verbatim> + +Comma-separated list of environments which should be taken as verbatim. + +If this environment was not already registered, po4a will consider that +this environment does not take any parameters. + +=back + +Use these options to override the default behavior of the defined commands. + +=head1 INLINE CUSTOMIZATION + +The TeX module can be customized with lines starting by B<% po4a:>. +These lines are interpreted as commands to the parser. +The following commands are recognized: + +=over 4 + +=item B<% po4a: command> I<command1> B<alias> I<command2> + +Indicates that the arguments of the I<command1> command should be +treated as the arguments of the I<command2> command. + +=item B<% po4a: command> I<command1> I<parameters> + +This describes in detail the parameters of the I<command1> +command. +This information will be used to check the number of arguments and their +types. + +You can precede the I<command1> command by + +=over 4 + +=item an asterisk (B<*>) + +po4a will extract this command from paragraphs (if it is located at +the beginning or the end of a paragraph). +The translators will then have to translate the parameters that are marked +as translatable. + +=item a plus (B<+>) + +As for an asterisk, the command will be extracted if it appear at an +extremity of a block, but the parameters won't be translated separately. +The translator will have to translate the command concatenated to all its +parameters. +This keeps more context, and is useful for commands with small +words in parameter, which can have multiple meanings (and translations). + +Note: In this case you don't have to specify which parameters are +translatable, but po4a must know the type and number of parameters. + +=item a minus (B<->) + +In this case, the command won't be extracted from any block. +But if it appears alone on a block, then only the parameters marked as +translatable will be presented to the translator. +This is useful for font commands. These commands should generally not be +separated from their paragraph (to keep the context), but there is no +reason to annoy the translator with them if a whole string is enclosed in +such a command. + +=back + +The I<parameters> argument is a set of [] (to indicate an optional +argument) or {} (to indicate a mandatory argument). +You can place an underscore (_) between these brackets to indicate that +the parameter must be translated. For example: + % po4a: command *chapter [_]{_} + +This indicates that the chapter command has two parameters: an optional +(short title) and a mandatory one, which must both be translated. +If you want to specify that the href command has two mandatory parameters, +that you don't want to translate the URL (first parameter), and that you +don't want this command to be separated from its paragraph (which allow +the translator to move the link in the sentence), you can use: + % po4a: command -href {}{_} + +In this case, the information indicating which arguments must be +translated is only used if a paragraph is only composed of this href +command. + +=item B<% po4a: environment> I<env> I<parameters> + +This defines the parameters accepted by the I<env> environment and specifies the ones to be translated. +This information is later used to check the number of arguments of the +\begin command. +The syntax of the I<parameters> argument is the same as described for the +others commands. +The first parameter of the \begin command is the name of the environment. +This parameter must not be specified in the list of parameters. Here are +some examples: + % po4a: environment multicols {} + % po4a: environment equation + +As for the commands, I<env> can be preceded by a plus (+) to indicate +that the \begin command must be translated with all its arguments. + +=item B<% po4a: separator> I<env> B<">I<regex>B<"> + +Indicates that an environment should be split according to the given +regular expression. + +The regular expression is delimited by quotes. +It should not create any back-reference. +You should use (?:) if you need a group. +It may also need some escapes. + +For example, the LaTeX module uses the "(?:&|\\\\)" regular expression to +translate separately each cell of a table (lines are separated by '\\' and +cells by '&'). + +The notion of environment is expanded to the type displayed in the PO file. +This can be used to split on "\\\\" in the first mandatory argument of the +title command. In this case, the environment is title{#1}. + +=item B<% po4a: verbatim environment> I<env> + +Indicate that I<env> is a verbatim environment. +Comments and commands will be ignored in this environment. + +If this environment was not already registered, po4a will consider that +this environment does not take any parameters. + +=back + +=cut + +# Directory name of the main file. +# It is the directory where included files will be searched. +# See read_file. +my $my_dirname; + +# Array of files that should not be included by read_file. +# See read_file. +our @exclude_include; + +my %type_end = ( '{' => '}', '[' => ']', ' ' => '' ); + +######################### +#### DEBUGGING STUFF #### +######################### +my %debug = ( + 'pretrans' => 0, # see pre-conditioning of translation + 'postrans' => 0, # see post-conditioning of translation + 'translate' => 0, # see translation + 'extract_commands' => 0, # see commands extraction + 'commands' => 0, # see command subroutines + 'environments' => 0, # see environment subroutines + 'translate_buffer' => 0 # see buffer translation +); + +=head1 WRITING DERIVATE MODULES + +=over 4 + +=item B<pre_trans> + +=cut + +sub pre_trans { + my ( $self, $str, $ref, $type ) = @_; + + # Preformatting, so that translators don't see + # strange chars + my $origstr = $str; + print STDERR "pre_trans($str)=" + if ( $debug{'pretrans'} ); + + # Accentuated characters + # FIXME: only do this if the encoding is UTF-8? + # $str =~ s/${RE_ESCAPE}`a/à/g; +## $str =~ s/${RE_ESCAPE}c{c}/ç/g; # not in texinfo: @,{c} + # $str =~ s/${RE_ESCAPE}^e/ê/g; + # $str =~ s/${RE_ESCAPE}'e/é/g; + # $str =~ s/${RE_ESCAPE}`e/è/g; + # $str =~ s/${RE_ESCAPE}`u/ù/g; + # $str =~ s/${RE_ESCAPE}"i/ï/g; + # # Non breaking space. FIXME: should we change $\sim$ to ~ + # $str =~ s/~/\xA0/g; # FIXME: not in texinfo: @w{ } + + print STDERR "$str\n" if ( $debug{'pretrans'} ); + return $str; +} + +=item B<post_trans> + +=cut + +sub post_trans { + my ( $self, $str, $ref, $type ) = @_; + my $transstr = $str; + + print STDERR "post_trans($str)=" + if ( $debug{'postrans'} ); + + # Accentuated characters + # $str =~ s/à/${ESCAPE}`a/g; +## $str =~ s/ç/$ESCAPEc{c}/g; # FIXME: not in texinfo + # $str =~ s/ê/${ESCAPE}^e/g; + # $str =~ s/é/${ESCAPE}'e/g; + # $str =~ s/è/${ESCAPE}`e/g; + # $str =~ s/ù/${ESCAPE}`u/g; + # $str =~ s/ï/${ESCAPE}"i/g; + # # Non breaking space. FIXME: should we change ~ to $\sim$ + # $str =~ s/\xA0/~/g; # FIXME: not in texinfo + + print STDERR "$str\n" if ( $debug{'postrans'} ); + return $str; +} + +# Comments are extracted in the parse function. +# They are stored in the @comments array, and then displayed as a PO +# comment with the first translated string of the paragraph. +my @comments = (); + +=item B<add_comment> + +Add a string as a comment to be added around the next translated element. +This is mostly useful to the texinfo module, as comments are automatically handled in TeX. + +=cut + +sub add_comment { + my ( $self, $comment ) = @_; + push @comments, $comment; +} + +=item B<translate> + +Wrapper around Transtractor's translate, with pre- and post-processing +filters. + +Comments of a paragraph are inserted as a PO comment for the first +translated string of this paragraph. + +=cut + +sub translate { + my ( $self, $str, $ref, $type ) = @_; + my (%options) = @_; + my $origstr = $str; + print STDERR "translate($str)=" + if ( $debug{'translate'} ); + + return $str unless ( defined $str ) && length($str); + return $str if ( $str eq "\n" ); + + $str = pre_trans( $self, $str, $ref || $self->{ref}, $type ); + + # add comments (if any and not already added to the PO) + if (@comments) { + $options{'comment'} .= join( '\n', @comments ); + + @comments = (); + } + + # FIXME: translate may append a newline, keep the trailing spaces so we can + # recover them. + my $spaces = ""; + if ( $options{'wrap'} and $str =~ m/^(.*?)(\s+)$/s ) { + $str = $1; + $spaces = $2; + } + + # Translate this + $str = $self->SUPER::translate( $str, $ref || $self->{ref}, $type || $self->{type}, %options ); + + # FIXME: translate may append a newline, see above + if ( $options{'wrap'} ) { + chomp $str; + $str .= $spaces; + } + + $str = post_trans( $self, $str, $ref || $self->{ref}, $type ); + + print STDERR "'$str'\n" if ( $debug{'translate'} ); + return $str; +} + +########################### +### COMMANDS SEPARATION ### +########################### + +=item B<get_leading_command>($buffer) + +This function returns: + +=over 4 + +=item A command name + +If no command is found at the beginning of the given buffer, this string +will be empty. Only commands that can be separated are considered. +The %separated_command hash contains the list of these commands. + +=item A variant + +This indicates if a variant is used. For example, an asterisk (*) can +be added at the end of sections command to specify that they should +not be numbered. In this case, this field will contain "*". If there +is no variant, the field is an empty string. + +=item An array of tuples (type of argument, argument) + +The type of argument can be either '{' (for mandatory arguments) or '[' +(for optional arguments). + +=item The remaining buffer + +The rest of the buffer after the removal of this leading command and +its arguments. If no command is found, the original buffer is not +touched and returned in this field. + +=back + +=cut + +sub get_leading_command { + my ( $self, $buffer ) = ( shift, shift ); + my $command = ""; # the command name + my $variant = ""; # a varriant for the command (e.g. an asterisk) + my @args; # array of arguments + print STDERR "get_leading_command($buffer)=" + if ( $debug{'extract_commands'} ); + + if ( $buffer =~ m/^$RE_ESCAPE([[:alnum:]]+)(\*?)(.*)$/s + && defined $separated_command{$1} ) + { + # The buffer begin by a comand (possibly preceded by some + # whitespaces). + $command = $1; + $variant = $2; + $buffer = $3; + + # read the arguments (if any) + while ( $buffer =~ m/^\s*$RE_PRE_COMMENT([\[\{])(.*)$/s ) { + my $type = $1; + my $arg = ""; + my $count = 1; + $buffer = $2; + + # stop reading the buffer when the number of ] (or }) matches the + # the number of [ (or {). + while ( $count > 0 ) { + if ( $buffer =~ m/^(.*?$RE_PRE_COMMENT)([\[\]\{\}])(.*)$/s ) { + $arg .= $1; + $buffer = $3; + if ( $2 eq $type ) { + $count++; + } elsif ( $2 eq $type_end{$type} ) { + $count--; + } + if ( $count > 0 ) { + $arg .= $2; + } + } else { + die wrap_ref_mod( $self->{ref}, "po4a::tex", dgettext( "po4a", "un-balanced %s in '%s'" ), + $type, $buffer ); + } + } + push @args, ( $type, $arg ); + } + } + if ( defined $command and length $command ) { + + # verify the number of arguments + my ( $check, $reason, $remainder ) = check_arg_count( $self, $command, \@args ); + if ( not $check ) { + die wrap_ref_mod( + $self->{ref}, + "po4a::tex", + dgettext( "po4a", "Error while checking the number of " . "arguments of the '%s' command: %s" ) . "\n", + $command, + $reason + ); + } + + if (@$remainder) { + + # FIXME: we should also keep the spaces to be idempotent + my ( $temp, $type, $arg ); + while (@$remainder) { + $type = shift @$remainder; + $arg = shift @$remainder; + $temp .= $type . $arg . $type_end{$type}; + + # And remove the same number of arguments from @args + pop @args; + pop @args; + } + $buffer = $temp . $buffer; + } + } + + print STDERR "($command,$variant,@args,$buffer)\n" + if ( $debug{'extract_commands'} ); + return ( $command, $variant, \@args, $buffer ); +} + +=item B<get_trailing_command>($buffer) + +The same as B<get_leading_command>, but for commands at the end of a buffer. + +=cut + +sub get_trailing_command { + my ( $self, $buffer ) = ( shift, shift ); + my $orig_buffer = $buffer; + print STDERR "get_trailing_command($buffer)=" + if ( $debug{'extract_commands'} ); + + my @args; + my $command = ""; + my $variant = ""; + + # While the buffer ends by }, consider it is a mandatory argument + # and extract this argument. + while ($buffer =~ m/^(.*$RE_PRE_COMMENT(\{).*)$RE_PRE_COMMENT\}$/s + or $buffer =~ m/^(.*$RE_PRE_COMMENT(\[).*)$RE_PRE_COMMENT\]$/s ) + { + my $arg = ""; + my $count = 1; + $buffer = $1; + my $type = $2; + + # stop reading the buffer when the number of } (or ]) matches the + # the number of { (or [). + while ( $count > 0 ) { + if ( $buffer =~ m/^(.*$RE_PRE_COMMENT)([\{\}\[\]])(.*)$/s ) { + $arg = $3 . $arg; + $buffer = $1; + if ( $2 eq $type ) { + $count--; + } elsif ( $2 eq $type_end{$type} ) { + $count++; + } + if ( $count > 0 ) { + $arg = $2 . $arg; + } + } else { + die wrap_ref_mod( $self->{ref}, "po4a::tex", dgettext( "po4a", "un-balanced %s in '%s'" ), + $type_end{$type}, $buffer ); + } + } + unshift @args, ( $type, $arg ); + } + + # There should now be a command, maybe followed by an asterisk. + if ( $buffer =~ m/^(.*$RE_PRE_COMMENT)$RE_ESCAPE([[:alnum:]]+)(\*?)\s*$/s + && defined $separated_command{$2} ) + { + $buffer = $1; + $command = $2; + $variant = $3; + my ( $check, $reason, $remainder ) = check_arg_count( $self, $command, \@args ); + if ( not $check ) { + die wrap_ref_mod( + $self->{ref}, + "po4a::tex", + dgettext( "po4a", "Error while checking the number of " . "arguments of the '%s' command: %s" ) . "\n", + $command, + $reason + ); + } + if (@$remainder) { + + # There are some arguments after the command. + # We can't extract this comand. + $command = ""; + } + } + + # sanitize return values if no command was found. + if ( !length($command) ) { + $command = ""; + $variant = ""; + undef @args; + $buffer = $orig_buffer; + } + + # verify the number of arguments + + print STDERR "($command,$variant,@args,$buffer)\n" + if ( $debug{'extract_commands'} ); + return ( $command, $variant, \@args, $buffer ); +} + +=item B<translate_buffer> + +Recursively translate a buffer by separating leading and trailing +commands (those which should be translated separately) from the +buffer. + +If a function is defined in %translate_buffer_env for the current +environment, this function will be used to translate the buffer instead of +translate_buffer(). + +=cut + +our %translate_buffer_env = (); + +sub translate_buffer { + my ( $self, $buffer, $no_wrap, @env ) = ( shift, shift, shift, @_ ); + + if ( @env and defined $translate_buffer_env{ $env[-1] } ) { + return &{ $translate_buffer_env{ $env[-1] } }( $self, $buffer, $no_wrap, @env ); + } + + print STDERR "translate_buffer($buffer,$no_wrap,@env)=" + if ( $debug{'translate_buffer'} ); + + my ( $command, $variant ) = ( "", "" ); + my $args; + my $translated_buffer = ""; + my $orig_buffer = $buffer; + my $t = ""; # a temporary string + + if ( $buffer =~ /^\s*$/s ) { + print STDERR "($buffer,@env)\n" + if ( $debug{'translate_buffer'} ); + return ( $buffer, @env ); + } + + # verbatim blocks. + # Buffers starting by \end{verbatim} are handled after. + if ( in_verbatim(@env) and $buffer !~ m/^\n?\Q$ESCAPE\Eend\{$env[-1]\*?\}/ ) { + if ( $buffer =~ m/^(.*?)(\n?\Q$ESCAPE\Eend\{$env[-1]\*?\}.*)$/s ) { + + # end of a verbatim block + my ( $begin, $end ) = ( $1 ? $1 : "", $2 ); + my ( $t1, $t2 ) = ( "", "" ); + if ( defined $begin ) { + $t1 = $self->translate( $begin, $self->{ref}, $env[-1], "wrap" => 0 ); + } + ( $t2, @env ) = translate_buffer( $self, $end, $no_wrap, @env ); + print STDERR "($t1$t2,@env)\n" + if ( $debug{'translate_buffer'} ); + return ( $t1 . $t2, @env ); + } else { + $translated_buffer = $self->translate( $buffer, $self->{ref}, $env[-1], "wrap" => 0 ); + print STDERR "($translated_buffer,@env)\n" + if ( $debug{'translate_buffer'} ); + return ( $translated_buffer, @env ); + } + } + + # early detection of verbatim environment + if ( $buffer =~ /^($RE_VERBATIM\n?)(.*)$/s and length $2 ) { + my ( $begin, $end ) = ( $1, $2 ); + my ( $t1, $t2 ) = ( "", "" ); + ( $t1, @env ) = translate_buffer( $self, $begin, $no_wrap, @env ); + ( $t2, @env ) = translate_buffer( $self, $end, $no_wrap, @env ); + + print STDERR "($t1$t2,@env)\n" + if ( $debug{'translate_buffer'} ); + return ( $t1 . $t2, @env ); + } + + # detect \begin and \end (if they are not commented) + if ( + $buffer =~ /^((?:.*?\n)? # $1 is + (?:[^%] # either not a % + | # or + (?<!\\)(?:\\\\)*\\%)*? # a % preceded by an odd nb of \ + ) # $2 is a \begin{ with the end of the line + (${RE_ESCAPE}(?:begin|end)\{.*)$/sx + and length $1 + ) + { + my ( $begin, $end ) = ( $1, $2 ); + my ( $t1, $t2 ) = ( "", "" ); + if ( is_closed($begin) ) { + ( $t1, @env ) = translate_buffer( $self, $begin, $no_wrap, @env ); + ( $t2, @env ) = translate_buffer( $self, $end, $no_wrap, @env ); + + print STDERR "($t1$t2,@env)\n" + if ( $debug{'translate_buffer'} ); + return ( $t1 . $t2, @env ); + } + } + + # remove comments from the buffer. + # Comments are stored in an array and shown as comments in the PO. + while ( $buffer =~ m/($RE_PRE_COMMENT)$RE_COMMENT([^\n]*)(\n[ \t]*)(.*)$/s ) { + my $comment = $2; + my $end = ""; + if ( $4 =~ m/^\n/s and $buffer !~ m/^$RE_COMMENT/s ) { + + # a line with comments, followed by an empty line. + # Keep the empty line, but remove the comment. + # This is an empirical heuristic, but seems to work;) + $end = "\n"; + } + if ( defined $comment and $comment !~ /^\s*$/s ) { + push @comments, $comment; + } + $buffer =~ s/($RE_PRE_COMMENT)$RE_COMMENT([^\n]*)(\n[ \t]*)/$1$end/s; + } + + # translate leading commands. + do { + # keep the leading space to put them back after the translation of + # the command. + my $spaces = ""; + if ( $buffer =~ /^(\s+)(.*?)$/s ) { + $spaces = $1; + + # $buffer = $2; # FIXME: this also remove trailing spaces!! + $buffer =~ s/^\s*//s; + } + my $buffer_save = $buffer; + ( $command, $variant, $args, $buffer ) = get_leading_command( $self, $buffer ); + if ( + ( length $command ) + and ( defined $separated_command{$command} ) + and ( $separated_command{$command} eq '-' ) + and ( ( not( defined($buffer) ) ) + or ( $buffer !~ m/^\s*$/s ) ) + ) + { + # This command can be separated only if alone on a buffer. + # We need to remove the trailing commands first, and see if it + # will be alone on this buffer. + $buffer = $buffer_save; + $command = ""; + } + if ( length($command) ) { + + # call the command subroutine. + # These command subroutines will probably call translate_buffer + # with the content of each argument that need a translation. + if ( defined( $commands{$command} ) ) { + ( $t, @env ) = &{ $commands{$command} }( $self, $command, $variant, $args, \@env, $no_wrap ); + $translated_buffer .= $spaces . $t; + + # Handle spaces after a command. + $spaces = ""; + if ( $buffer =~ /^(\s+)(.*?)$/s ) { + $spaces = $1; + + # $buffer = $2; # FIXME: this also remove trailing spaces!! + $buffer =~ s/^\s*//s; + } + $translated_buffer .= $spaces; + } else { + die wrap_ref_mod( $self->{ref}, "po4a::tex", dgettext( "po4a", "Unknown command: '%s'" ), $command ); + } + } else { + $buffer = $spaces . $buffer; + } + } while ( length($command) ); + + # array of trailing commands, which will be translated later. + my @trailing_commands = (); + do { + my $spaces = ""; + if ( $buffer =~ /^(.*?)(\s+)$/s ) { + $buffer = $1; + $spaces = $2; + } + my $buffer_save = $buffer; + ( $command, $variant, $args, $buffer ) = get_trailing_command( $self, $buffer ); + if ( + ( length $command ) + and ( defined $separated_command{$command} ) + and ( $separated_command{$command} eq '-' ) + and ( ( not defined $buffer ) + or ( $buffer !~ m/^\s*$/s ) ) + ) + { + # We can extract this command. + $command = ""; + $buffer = $buffer_save; + } + if ( length($command) ) { + unshift @trailing_commands, ( $command, $variant, $args, $spaces ); + } else { + $buffer .= $spaces; + } + } while ( length($command) ); + + # Now, $buffer is just a block that can be translated. + + # environment specific treatment + if ( @env and defined $env_separators{ $env[-1] } ) { + my $re_separator = $env_separators{ $env[-1] }; + my $buf_begin = ""; + + # FIXME: the separator may have to be translated. + while ( $buffer =~ m/^(.*?)(\s*$re_separator\s*)(.*)$/s ) { + my ( $begin, $sep, $end ) = ( $1, $2, $3 ); + $buf_begin .= $begin; + if ( is_closed($buf_begin) ) { + my $t = ""; + ( $t, @env ) = translate_buffer( $self, $buf_begin, $no_wrap, @env ); + $translated_buffer .= $t . $sep; + $buf_begin = ""; + } else { + + # the command is in a command argument + $buf_begin .= $sep; + } + $buffer = $end; + } + $buffer = $buf_begin . $buffer; + } + + # finally, translate + if ( length($buffer) ) { + my $wrap = 1; + my ( $e1, $e2 ); + NO_WRAP_LOOP: foreach $e1 (@env) { + foreach $e2 ( split( ' ', $no_wrap_environments ) ) { + if ( $e1 eq $e2 ) { + $wrap = 0; + last NO_WRAP_LOOP; + } + } + } + $wrap = 0 if ( defined $no_wrap and $no_wrap == 1 ); + + # Keep spaces at the end of the buffer. + my $spaces_end = ""; + if ( $buffer =~ /^(.*?)(\s+)$/s ) { + $spaces_end = $2; + $buffer = $1; + } + if ( $wrap and $buffer =~ s/^(\s+)//s ) { + $translated_buffer .= $1; + } + $translated_buffer .= + $self->translate( $buffer, $self->{ref}, @env ? $env[-1] : "Plain text", "wrap" => $wrap ); + + # Restore spaces at the end of the buffer. + $translated_buffer .= $spaces_end; + } + + # append the translation of the trailing commands + while (@trailing_commands) { + my $command = shift @trailing_commands; + my $variant = shift @trailing_commands; + my $args = shift @trailing_commands; + my $spaces = shift @trailing_commands; + if ( defined( $commands{$command} ) ) { + ( $t, @env ) = &{ $commands{$command} }( $self, $command, $variant, $args, \@env, $no_wrap ); + $translated_buffer .= $t . $spaces; + } else { + die wrap_ref_mod( $self->{ref}, "po4a::tex", dgettext( "po4a", "Unknown command: '%s'" ), $command ); + } + } + + print STDERR "($translated_buffer,@env)\n" + if ( $debug{'translate_buffer'} ); + return ( $translated_buffer, @env ); +} + +################################ +#### EXTERNAL CUSTOMIZATION #### +################################ + +=item B<read> + +Overloads Transtractor's read(). + +=cut + +sub read ($$$$) { + my $self = shift; + my $filename = shift; + my $refname = shift; + my $charset = shift; + + # keep the directory name of the main file. + $my_dirname = dirname($filename); + + push @{ $self->{TT}{doc_in} }, read_file( $self, $filename, $refname, $charset ); +} + +=item B<read_file> + +Recursively read a file, appending included files which are not listed in the +@exclude_include array. Included files are searched using the B<kpsewhich> +command from the Kpathsea library. + +Except from the file inclusion part, it is a cut and paste from +Transtractor's read. + +=cut + +# TODO: fix DOS end of lines +sub read_file { + my $self = shift; + my $filename = shift + or croak wrap_mod( "po4a::tex", dgettext( "po4a", "Cannot read from file without having a filename" ) ); + my $refname = shift // $filename; + my $charset = shift || 'UTF-8'; + my $linenum = 0; + my @entries = (); + + open( my $in, "<:encoding($charset)", $filename ) + or croak wrap_mod( "po4a::tex", dgettext( "po4a", "Cannot read from %s: %s" ), $filename, $! ); + while ( defined( my $textline = <$in> ) ) { + $linenum++; + my $ref = "$refname:$linenum"; + + # TODO: add support for includeonly + # The next regular expression matches \input or \includes that are + # not commented (but can be preceded by a \%. + while ( + $textline =~ /^((?:[^%]|(?<!\\)(?:\\\\)*\\%)*) + \\(include|input) + \{([^\{]*)\}(.*)$/x + ) + { + my ( $begin, $newfilename, $end ) = ( $1, $3, $4 ); + my $tag = $2; + my $include = 1; + foreach my $f (@exclude_include) { + if ( $f eq $newfilename ) { + $include = 0; + $begin .= "\\$tag" . "{$newfilename}"; + $textline = $end; + last; + } + } + if ( $include and ( $tag eq "include" ) ) { + $begin .= "\\clearpage"; + } + if ( $begin !~ /^\s*$/ ) { + push @entries, ( $begin, $ref ); + } + if ($include) { + + # search the file + open( KPSEA, "kpsewhich " . $newfilename . " |" ); + my $newfilepath = <KPSEA>; + + if ( $newfilename ne "" and ( $newfilepath // '' ) eq '' ) { + die wrap_mod( + "po4a::tex", + dgettext( + "po4a", + "Cannot find '%s' with kpsewhich. To prevent this file to be included, add '-o exclude_include=%s' to the options, either on the command line or in your po4a.conf file." + ), + $newfilename, + $newfilename + ); + } + + push @entries, read_file( $self, $newfilepath, $newfilename, $charset ); + if ( $tag eq "include" ) { + $textline = "\\clearpage" . $end; + } else { + $textline = $end; + } + } + } + if ( length($textline) ) { + my @entry = ( $textline, $ref ); + push @entries, @entry; + } + } + close $in + or croak wrap_mod( "po4a::tex", dgettext( "po4a", "Cannot close %s after reading: %s" ), $filename, $! ); + + return @entries; +} + +=back + + +=over 4 + +=item B<parse_definition_file> + +Subroutine for parsing a file with po4a directives (definitions for +new commands). + +=cut + +sub parse_definition_file { + my ( $self, $filename, $only_try ) = @_; + my $filename_org = $filename; + + open( KPSEA, "kpsewhich " . $filename . " |" ); + $filename = <KPSEA>; + + if ( not defined $filename ) { + warn wrap_mod( "po4a::tex", dgettext( "po4a", "kpsewhich cannot find %s" ), $filename_org ); + if ( defined $only_try && $only_try ) { + return; + } else { + exit 1; + } + } + + if ( !open( IN, "<$filename" ) ) { + warn wrap_mod( "po4a::tex", dgettext( "po4a", "Cannot open %s: %s" ), $filename, $! ); + if ( defined $only_try && $only_try ) { + return; + } else { + exit 1; + } + } + while (<IN>) { + if (/^\s*%\s*po4a\s*:/) { + parse_definition_line( $self, $_ ); + } + } +} + +=item B<parse_definition_line> + +Parse a definition line of the form "% po4a: ". + +See the B<INLINE CUSTOMIZATION> section for more details. + +=cut + +sub parse_definition_line { + my ( $self, $line ) = @_; + $line =~ s/^\s*%\s*po4a\s*:\s*//; + + if ( $line =~ /^command\s+([-*+]?)(\w+)\s+(.*)$/ ) { + my $command = $2; + $line = $3; + if ($1) { + $separated_command{$command} = $1; + } + if ( $line =~ /^alias\s+(\w+)\s*$/ ) { + if ( defined( $commands{$1} ) ) { + $commands{$command} = $commands{$1}; + $command_parameters{$command} = $command_parameters{$1}; + } else { + die wrap_mod( "po4a::tex", dgettext( "po4a", "Cannot use an alias to the unknown command '%s'" ), $2 ); + } + } elsif ( $line =~ /^(-1|\d+),(-1|\d+),(-1|[ 0-9]*),(-1|[ 0-9]*?)\s*$/ ) { + die wrap_ref_mod( + $self->{ref}, + "po4a::tex", + dgettext( + "po4a", "You are using the old definitions format (%s). Please update this definition line." + ), + $_[1] + ); + } elsif ( $line =~ m/^((?:\{_?\}|\[_?\])*)\s*$/ ) { + register_generic_command("$command,$1"); + } + } elsif ( $line =~ /^environment\s+([+]?\w+\*?)(.*)$/ ) { + my $env = $1; + $line = $2; + if ( $line =~ m/^\s*((?:\{_?\}|\[_?\])*)\s*$/ ) { + register_generic_environment("$env,$1"); + } + } elsif ( $line =~ /^separator\s+(\w+(?:\[#[0-9]+\])?)\s+\"(.*)\"\s*$/ ) { + my $env = $1; # This is not necessarily an environment. + # It can also be something like 'title[#1]'. + $env_separators{$env} = $2; + } elsif ( $line =~ /^verbatim\s+environment\s+(\w+)\s*$/ ) { + register_verbatim_environment($1); + } +} + +=item B<is_closed> + +=cut + +sub is_closed { + my $paragraph = shift; + + # FIXME: [ and ] are more difficult to handle, because it is not easy to detect if it introduce an optional argument + my $tmp = $paragraph; + my $closing = 0; + my $opening = 0; + + # FIXME: { and } should not be counted in verbatim blocks + # Remove comments + $tmp =~ s/$RE_PRE_COMMENT$RE_COMMENT.*//mg; + while ( $tmp =~ /^.*?$RE_PRE_COMMENT\{(.*)$/s ) { + $opening += 1; + $tmp = $1; + } + $tmp = $paragraph; + + # Remove comments + $tmp =~ s/$RE_PRE_COMMENT$RE_COMMENT.*//mg; + while ( $tmp =~ /^.*?$RE_PRE_COMMENT\}(.*)$/s ) { + $closing += 1; + $tmp = $1; + } + return $opening eq $closing; +} + +sub in_verbatim { + foreach my $e1 (@_) { + foreach my $e2 ( split( ' ', $verbatim_environments ) ) { + if ( $e1 eq $e2 ) { + return 1; + } + } + } + + return 0; +} + +############################# +#### MAIN PARSE FUNCTION #### +############################# + +=item B<parse> + +=cut + +sub parse { + my $self = shift; + my ( $line, $ref ); + my $paragraph = ""; # Buffer where we put the paragraph while building + my @env = (); # environment stack + my $t = ""; + + LINE: + undef $self->{type}; + ( $line, $ref ) = $self->shiftline(); + + while ( defined($line) ) { + chomp($line); + $self->{ref} = "$ref"; + + if ( $line =~ /^\s*%\s*po4a\s*:/ ) { + parse_definition_line( $self, $line ); + goto LINE; + } + + my $closed = is_closed($paragraph); + + #FIXME: what happens if a \begin{verbatim} or \end{verbatim} is in the + # middle of a line. (This is only an issue if the verbatim + # environment contains an un-closed bracket) + if ( + ( + $closed and ( $line =~ /^\s*$/ + or $line =~ /^\s*$RE_VERBATIM\s*$/ ) + ) + or ( in_verbatim(@env) and $line =~ /^\s*\Q$ESCAPE\Eend\{$env[-1]\}\s*$/ ) + ) + { + # An empty line. This indicates the end of the current + # paragraph. + $paragraph .= $line . "\n"; + if ( length($paragraph) ) { + ( $t, @env ) = translate_buffer( $self, $paragraph, undef, @env ); + $self->pushline($t); + $paragraph = ""; + @comments = (); + } + } else { + + # continue the same paragraph + $paragraph .= $line . "\n"; + } + + # Reinit the loop + ( $line, $ref ) = $self->shiftline(); + undef $self->{type}; + } + + if ( length($paragraph) ) { + ( $t, @env ) = translate_buffer( $self, $paragraph, undef, @env ); + $self->pushline($t); + $paragraph = ""; + } +} # end of parse + +=item B<docheader> + +=back + +=cut + +sub docheader { + return "% This file was generated with po4a. Translate the source file.\n" . "%\n"; +} + +#################################### +#### DEFINITION OF THE COMMANDS #### +#################################### + +=head1 INTERNAL FUNCTIONS used to write derivative parsers + +Command and environment functions take the following arguments (in +addition to the $self object): + +=over + +=item A command name + +=item A variant + +=item An array of (type, argument) tuples + +=item The current environment + +=back + +The first 3 arguments are extracted by get_leading_command or +get_trailing_command. + +Command and environment functions return the translation of the command +with its arguments and a new environment. + +Environment functions are called when a \begin command is found. They are +called with the \begin command and its arguments. + +The TeX module only proposes one command function and one environment +function: generic_command and generic_environment. + +generic_command uses the information specified by +register_generic_command or by adding definition to the TeX file: + % po4a: command I<command1> I<parameters> + +generic_environment uses the information specified by +register_generic_environment or by adding definition to the TeX file: + % po4a: environment I<env> I<parameters> + +Both functions will only translate the parameters that were specified as +translatable (with a '_'). +generic_environment will append the name of the environment to the +environment stack and generic_command will append the name of the command +followed by an identifier of the parameter (like {#7} or [#2]). + +=cut + +# definition of environment related commands + +$commands{'begin'} = sub { + my $self = shift; + my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift ); + my $no_wrap = shift || 0; + print "begin($command,$variant,@$args,@$env,$no_wrap)=" + if ( $debug{'commands'} || $debug{'environments'} ); + my ( $t, @e ) = ( "", () ); + + my $envir = $args->[1]; + if ( defined($envir) and $envir =~ /^(.*)\*$/ ) { + $envir = $1; + } + + if ( defined($envir) && defined( $environments{$envir} ) ) { + ( $t, @e ) = &{ $environments{$envir} }( $self, $command, $variant, $args, $env, $no_wrap ); + } else { + die wrap_ref_mod( $self->{ref}, "po4a::tex", dgettext( "po4a", "unknown environment: '%s'" ), $args->[1] ); + } + + print "($t, @e)\n" + if ( $debug{'commands'} || $debug{'environments'} ); + return ( $t, @e ); +}; + +# Use register_generic to set the type of arguments. The function is then +# overwritten: +register_generic_command("*end,{}"); +$commands{'end'} = sub { + my $self = shift; + my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift ); + my $no_wrap = shift || 0; + print "end($command,$variant,@$args,@$env,$no_wrap)=" + if ( $debug{'commands'} || $debug{'environments'} ); + + # verify that this environment was the last pushed environment. + if ( !@$env || @$env[-1] ne $args->[1] ) { + + # a begin may have been hidden in the middle of a translated + # buffer. FIXME: Just warn for now. + warn wrap_ref_mod( $self->{'ref'}, "po4a::tex", dgettext( "po4a", "unmatched end of environment '%s'" ), + $args->[1] ); + } else { + pop @$env; + } + + my ( $t, @e ) = generic_command( $self, $command, $variant, $args, $env, $no_wrap ); + + print "($t, @$env)\n" + if ( $debug{'commands'} || $debug{'environments'} ); + return ( $t, @$env ); +}; +$separated_command{'begin'} = '*'; + +sub generic_command { + my $self = shift; + my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift ); + my $no_wrap = shift || 0; + print "generic_command($command,$variant,@$args,@$env,$no_wrap)=" + if ( $debug{'commands'} || $debug{'environments'} ); + + my ( $t, @e ) = ( "", () ); + my $translated = ""; + + # the number of arguments is checked during the extraction of the + # arguments + + if ( ( not( defined $separated_command{$command} ) ) + or $separated_command{$command} ne '+' ) + { + # Use the information from %command_parameters to only translate + # the needed parameters + $translated = "$ESCAPE$command$variant"; + + # handle arguments + my @arg_types = @{ $command_parameters{$command}{'types'} }; + my @arg_translated = @{ $command_parameters{$command}{'translated'} }; + my ( $type, $opt ); + my @targs = @$args; + my $count = 0; + while (@targs) { + $type = shift @targs; + $opt = shift @targs; + my $have_to_be_translated = 0; + TEST_TYPE: + if ( $count >= scalar @arg_types ) { + + # The number of arguments does not match, + # and a variable number of arguments was not specified + die wrap_ref_mod( $self->{ref}, "po4a::tex", + dgettext( "po4a", "Wrong number of arguments for " . "the '%s' command." ) . "\n", $command ); + } elsif ( $type eq $arg_types[$count] ) { + $have_to_be_translated = $arg_translated[$count]; + $count++; + } elsif ( $type eq '{' and $arg_types[$count] eq '[' ) { + + # an optionnal argument was not provided, + # try with the next argument. + $count++; + goto TEST_TYPE; + } else { + my $reason = + dgettext( "po4a", "An optional argument " . "was provided, but a mandatory one " . "is expected." ); + die wrap_ref_mod( $self->{ref}, "po4a::tex", dgettext( "po4a", "Command '%s': %s" ) . "\n", + $command, $reason ); + } + if ($have_to_be_translated) { + ( $t, @e ) = translate_buffer( $self, $opt, $no_wrap, + ( @$env, $command . $type . "#" . $count . $type_end{$type} ) ); + } else { + $t = $opt; + } + $translated .= $type . $t . $type_end{$type}; + } + } else { + + # Translate the command with all its arguments joined + my $tmp = "$ESCAPE$command$variant"; + my ( $type, $opt ); + while (@$args) { + $type = shift @$args; + $opt = shift @$args; + $tmp .= $type . $opt . $type_end{$type}; + } + @e = @$env; + my $wrap = 1; + $wrap = 0 if ( defined $no_wrap and $no_wrap == 1 ); + $translated = $self->translate( $tmp, $self->{ref}, @e ? $e[-1] : "Plain text", "wrap" => $wrap ); + } + + print "($translated, @$env)\n" + if ( $debug{'commands'} || $debug{'environments'} ); + return ( $translated, @$env ); +} + +sub register_generic_command { + if ( $_[0] =~ m/^(.*),((\{_?\}|\[_?\]| _? )*)$/ ) { + my $command = $1; + my $arg_types = $2; + if ( $command =~ /^([-*+])(.*)$/ ) { + $command = $2; + $separated_command{$command} = $1; + } + my @types = (); + my @translated = (); + while ( defined $arg_types + and length $arg_types + and $arg_types =~ m/^(?:([\{\[ ])(_?)[\}\] ])(.*)$/ ) + { + push @types, $1; + push @translated, ( $2 eq "_" ) ? 1 : 0; + $arg_types = $3; + } + $command_parameters{$command}{'types'} = \@types; + $command_parameters{$command}{'translated'} = \@translated; + $command_parameters{$command}{'nb_args'} = ""; + $commands{$command} = \&generic_command; + } else { + die wrap_mod( "po4a::tex", + dgettext( "po4a", "register_generic_command: unsupported " . "format: '%s'." ) . "\n", $_[0] ); + } +} + +######################################## +#### DEFINITION OF THE ENVIRONMENTS #### +######################################## +sub generic_environment { + my $self = shift; + my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift ); + my $no_wrap = shift; + print "generic_environment($command,$variant,$args,$env,$no_wrap)=" + if ( $debug{'environments'} ); + my ( $t, @e ) = ( "", () ); + my $translated = ""; + + # The first argument (the name of the environment is never translated) + # For the others, @types and @translated are used. + $translated = "$ESCAPE$command$variant"; + my @targs = @$args; + my $type = shift @targs; + my $opt = shift @targs; + my $new_env = $opt; + $translated .= $type . $new_env . $type_end{$type}; + if ( ( not( defined $separated_environment{$new_env} ) ) + or $separated_environment{$new_env} ne '+' ) + { + # Use the information from %command_parameters to only translate + # the needed parameters + my @arg_types = @{ $environment_parameters{$new_env}{'types'} }; + my @arg_translated = @{ $environment_parameters{$new_env}{'translated'} }; + + my $count = 0; + while (@targs) { + $type = shift @targs; + $opt = shift @targs; + my $have_to_be_translated = 0; + TEST_TYPE: + if ( $count >= scalar @arg_types ) { + die wrap_ref_mod( $self->{ref}, "po4a::tex", + dgettext( "po4a", "Wrong number of arguments for " . "the '%s' command." ) . "\n", $command ); + } elsif ( $type eq $arg_types[$count] ) { + $have_to_be_translated = $arg_translated[$count]; + $count++; + } elsif ( $type eq '{' and $arg_types[$count] eq '[' ) { + + # an optionnal argument was not provided, + # try with the next argument. + $count++; + goto TEST_TYPE; + } else { + my $reason = + dgettext( "po4a", "An optional argument " . "was provided, but a mandatory one " . "is expected." ); + die wrap_ref_mod( $self->{ref}, "po4a::tex", dgettext( "po4a", "Command '%s': %s" ) . "\n", + $command, $reason ); + } + + if ($have_to_be_translated) { + ( $t, @e ) = translate_buffer( $self, $opt, $no_wrap, + ( @$env, $new_env . $type . "#" . $count . $type_end{$type} ) ); + } else { + $t = $opt; + } + $translated .= $type . $t . $type_end{$type}; + + } + } else { + + # Translate the \begin command with all its arguments joined + my ( $type, $opt ); + my $buf = $translated; + while (@targs) { + $type = shift @targs; + $opt = shift @targs; + $buf .= $type . $opt . $type_end{$type}; + } + @e = @$env; + my $wrap = 1; + $wrap = 0 if $no_wrap == 1; + $translated = $self->translate( $buf, $self->{ref}, @e ? $e[-1] : "Plain text", "wrap" => $wrap ); + } + @e = ( @$env, $new_env ); + + print "($translated,@e)\n" + if ( $debug{'environments'} ); + return ( $translated, @e ); +} + +sub check_arg_count { + my $self = shift; + my $command = shift; + my $args = shift; + my @targs = @$args; + my $check = 1; + my @remainder = (); + my $reason = ""; + my ( $type, $arg ); + my @arg_types; + + if ( $command eq 'begin' ) { + $type = shift @targs; + + # The name of the environment is mandatory + if ( ( not defined $type ) + or ( $type ne '{' ) ) + { + $reason = dgettext( "po4a", "The first argument of \\begin is mandatory." ); + $check = 0; + } + my $env = shift @targs; + if ( not defined $environment_parameters{$env} ) { + die wrap_ref_mod( $self->{ref}, "po4a::tex", dgettext( "po4a", "unknown environment: '%s'" ), $env ); + } + @arg_types = @{ $environment_parameters{$env}{'types'} }; + } else { + @arg_types = @{ $command_parameters{$command}{'types'} }; + } + + my $count = 0; + while ( $check and @targs ) { + $type = shift @targs; + $arg = shift @targs; + TEST_TYPE: + if ( $count >= scalar @arg_types ) { + + # Too many arguments some will remain + @remainder = ( $type, $arg, @targs ); + last; + } elsif ( $type eq $arg_types[$count] ) { + $count++; + } elsif ( $type eq '{' and $arg_types[$count] eq '[' ) { + + # an optionnal argument was not provided, + # try with the next argument. + $count++; + goto TEST_TYPE; + } else { + $check = 0; + $reason = dgettext( "po4a", "An optional argument was " . "provided, but a mandatory one is expected." ); + } + } + + return ( $check, $reason, \@remainder ); +} + +sub register_generic_environment { + print "register_generic_environment($_[0])\n" + if ( $debug{'environments'} ); + if ( $_[0] =~ m/^(.*),((?:\{_?\}|\[_?\])*)$/ ) { + my $env = $1; + my $arg_types = $2; + if ( $env =~ /^([+])(.*)$/ ) { + $separated_environment{$2} = $1; + $env = $2; + } + my @types = (); + my @translated = (); + while ( defined $arg_types + and length $arg_types + and $arg_types =~ m/^(?:([\{\[])(_?)[\}\]])(.*)$/ ) + { + push @types, $1; + push @translated, ( $2 eq "_" ) ? 1 : 0; + $arg_types = $3; + } + $environment_parameters{$env} = { + 'types' => \@types, + 'translated' => \@translated + }; + $environments{$env} = \&generic_environment; + } +} + +sub register_verbatim_environment { + my $env = shift; + $no_wrap_environments .= " $env"; + $verbatim_environments .= " $env"; + $RE_VERBATIM = "\\\\begin\\{(?:" . join( "|", split( / /, $verbatim_environments ) ) . ")\\*?\\}"; + register_generic_environment("$env,") + unless ( defined $environments{$env} ); +} + +#################################### +### INITIALIZATION OF THE PARSER ### +#################################### +sub initialize { + my $self = shift; + my %options = @_; + + $self->{options}{'definitions'} = ''; + $self->{options}{'exclude_include'} = ''; + $self->{options}{'no_wrap'} = ''; + $self->{options}{'verbatim'} = ''; + $self->{options}{'debug'} = ''; + $self->{options}{'verbose'} = ''; + $self->{options}{'no-warn'} = 0; # TexInfo option to not warn about the state of the module + + %debug = (); + + # FIXME: %commands and %separated_command should also be restored to their + # default values. + + foreach my $opt ( keys %options ) { + if ( $options{$opt} ) { + die wrap_mod( "po4a::tex", dgettext( "po4a", "Unknown option: %s" ), $opt ) + unless exists $self->{options}{$opt}; + $self->{options}{$opt} = $options{$opt}; + } + } + + if ( $options{'debug'} ) { + foreach ( $options{'debug'} ) { + $debug{$_} = 1; + } + } + + if ( $options{'exclude_include'} ) { + foreach ( split( /:/, $options{'exclude_include'} ) ) { + push @exclude_include, $_; + } + } + + if ( $options{'no_wrap'} ) { + foreach ( split( /,/, $options{'no_wrap'} ) ) { + $no_wrap_environments .= " $_"; + register_generic_environment("$_,") + unless ( defined $environments{$_} ); + } + } + + if ( $options{'verbatim'} ) { + foreach ( split( /,/, $options{'verbatim'} ) ) { + register_verbatim_environment($_); + } + } + + if ( $options{'definitions'} ) { + $self->parse_definition_file( $options{'definitions'} ); + } +} + +=head1 STATUS OF THIS MODULE + +This module needs more tests. + +It was tested on a book and with the Python documentation. + +=head1 TODO LIST + +=over 4 + +=item Automatic detection of new commands + +The TeX module could parse the newcommand arguments and try to guess the +number of arguments, their type and whether or not they should be +translated. + +=item Translation of the environment separator + +When \item is used as an environment separator, the item argument is +attached to the following string. + +=item Some commands should be added to the environment stack + +These commands should be specified by couples. +This can be used to specify commands beginning or ending a verbatim +environment. + +=item Others + +Various other points are tagged TODO in the source. + +=back + +=head1 KNOWN BUGS + +Various points are tagged FIXME in the source. + +=head1 SEE ALSO + +L<Locale::Po4a::LaTeX(3pm)|Locale::Po4a::LaTeX>, +L<Locale::Po4a::TransTractor(3pm)|Locale::Po4a::TransTractor>, +L<po4a(7)|po4a.7> + +=head1 AUTHORS + + Nicolas François <nicolas.francois@centraliens.net> + +=head1 COPYRIGHT AND LICENSE + +Copyright © 2004, 2005 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut + +1; + +__END__ + +# LocalWords: Charset charset po UTF gettext msgid nostrip LaTeX diff --git a/lib/Locale/Po4a/Texinfo.pm b/lib/Locale/Po4a/Texinfo.pm new file mode 100644 index 0000000..ca8b289 --- /dev/null +++ b/lib/Locale/Po4a/Texinfo.pm @@ -0,0 +1,613 @@ +#!/usr/bin/perl -w + +# Copyright © 2004-2007 Nicolas FRANÇOIS <nicolas.francois@centraliens.net> +# +# This file is part of po4a. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with po4a; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Texinfo - convert Texinfo documents and derivates from/to PO files + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::Texinfo is a module to help the translation of Texinfo documents into +other [human] languages. + +This module contains the definitions of common Texinfo commands and environments. + +Only the comments starting with 'TRANSLATORS' are added to the PO files to guide the translators. + +=head1 STATUS OF THIS MODULE + +This module is still beta and not ready for production use. +Please send patches to contribute, not bug reports as we don't know how to deal with them. + +=head1 OPTIONS ACCEPTED BY THIS MODULE + +These are this module's particular options: + +=over 4 + +=item no-warn + +Do not warn about the current state of this module. + +=back + +=head1 SEE ALSO + +L<Locale::Po4a::TeX(3pm)|Locale::Po4a::TeX>, +L<Locale::Po4a::TransTractor(3pm)|Locale::Po4a::TransTractor>, +L<po4a(7)|po4a.7> + +=head1 AUTHORS + + Nicolas François <nicolas.francois@centraliens.net> + +=head1 COPYRIGHT AND LICENSE + +Copyright © 2004-2007 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut + +package Locale::Po4a::Texinfo; + +use 5.16.0; +use strict; +use warnings; + +require Exporter; +use vars qw($VERSION @ISA @EXPORT); +$VERSION = $Locale::Po4a::TeX::VERSION; +@ISA = qw(Locale::Po4a::TeX); +@EXPORT = qw(); + +use Locale::Po4a::Common; +use Locale::Po4a::TeX; +use subs qw( + &parse_definition_file + ®ister_generic_command &is_closed &translate_buffer + ®ister_verbatim_environment + &generic_command + &in_verbatim); +*parse_definition_file = \&Locale::Po4a::TeX::parse_definition_file; +*register_generic_command = \&Locale::Po4a::TeX::register_generic_command; +*register_verbatim_environment = \&Locale::Po4a::TeX::register_verbatim_environment; +*generic_command = \&Locale::Po4a::TeX::generic_command; +*is_closed = \&Locale::Po4a::TeX::is_closed; +*in_verbatim = \&Locale::Po4a::TeX::in_verbatim; +*translate_buffer = \&Locale::Po4a::TeX::translate_buffer; +use vars qw($RE_ESCAPE $ESCAPE + $RE_VERBATIM + $RE_COMMENT $RE_PRE_COMMENT + $no_wrap_environments $separated_commands + %commands %environments + %command_categories %separated + %env_separators %debug + %translate_buffer_env + @exclude_include @comments); +*RE_ESCAPE = \$Locale::Po4a::TeX::RE_ESCAPE; +*ESCAPE = \$Locale::Po4a::TeX::ESCAPE; +*RE_VERBATIM = \$Locale::Po4a::TeX::RE_VERBATIM; +*RE_COMMENT = \$Locale::Po4a::TeX::RE_COMMENT; +*RE_PRE_COMMENT = \$Locale::Po4a::TeX::RE_PRE_COMMENT; +*no_wrap_environments = \$Locale::Po4a::TeX::no_wrap_environments; +*separated_commands = \$Locale::Po4a::TeX::separated_commands; +*commands = \%Locale::Po4a::TeX::commands; +*environments = \%Locale::Po4a::TeX::environments; +*command_categories = \%Locale::Po4a::TeX::command_categories; +*separated = \%Locale::Po4a::TeX::separated; +*env_separators = \%Locale::Po4a::TeX::env_separators; +*debug = \%Locale::Po4a::TeX::debug; +*translate_buffer_env = \%Locale::Po4a::TeX::translate_buffer_env; +*exclude_include = \@Locale::Po4a::TeX::exclude_include; +*comments = \@Locale::Po4a::TeX::comments; + +$ESCAPE = "\@"; +$RE_ESCAPE = "\@"; +$RE_VERBATIM = "\@example"; +$RE_COMMENT = "\\\@(?:c|comment)\\b"; +$RE_PRE_COMMENT = "(?<!\@)(?:\@\@)*"; + +my %break_line = (); + +# translate_line_command indicate if the arguments to the command handled +# by line_command() should be translated: +# undefined: arguments are not translated +# 0: there should be no arguments +# 1: arguments should be translated +my %translate_line_command = (); + +foreach ( + qw/example smallexample tex display smalldisplay verbatim format smallformat + flushleft flushright lisp smalllisp ignore/ + ) +{ + register_verbatim_environment($_); + $commands{$_} = \&environment_line_command; + $translate_line_command{$_} = 0; # There should be no arguments + $break_line{$_} = 1; +} + +my $docheader_pushed = 0; + +# The header shall not be written before the Texinfo header (which include +# the \input command that define the texinfo macros) +sub docheader { + return ""; +} + +sub push_docheader { + return if $docheader_pushed; + my $self = shift; + $self->pushline(<<END); +\@c =========================================================================== +\@c +\@c This file was generated with po4a. Translate the source file. +\@c +\@c =========================================================================== + +END + $docheader_pushed = 1; +} + +sub parse { + my $self = shift; + my ( $line, $ref ); + my $paragraph = ""; # Buffer where we put the paragraph while building + my @env = (); # environment stack + my $t = ""; + $docheader_pushed = 0; + + print STDERR "The TexInfo module of po4a is not ready for production use, and needs a new maintainer.\n" + . "Please contact the po4a team if you want to help: send us patches, not bug reports.\n" + . "(use -o no-warn to remove this message)\n" + unless $self->{options}{'no-warn'}; + LINE: + undef $self->{type}; + ( $line, $ref ) = $self->shiftline(); + + while ( defined($line) ) { + chomp($line); + $self->{ref} = "$ref"; + + if ( $line =~ /^\s*@\s*po4a\s*:/ ) { + parse_definition_line( $self, $line ); + goto LINE; + } + + my $closed = 1; + if ( !in_verbatim(@env) ) { + $closed = is_closed($paragraph); + } + + # if (not $closed) { + # print "not closed. line: '$line'\n para: '$paragraph'\n"; + # } + + if ( $closed and $line =~ /^\s*$/ ) { + + # An empty line. This indicates the end of the current + # paragraph. + $paragraph .= $line . "\n"; + if ( length($paragraph) ) { + ( $t, @env ) = translate_buffer( $self, $paragraph, undef, @env ); + $self->pushline($t); + $paragraph = ""; + } + } elsif ( $line =~ m/^\\input / ) { + if ( length($paragraph) ) { + ( $t, @env ) = translate_buffer( $self, $paragraph, undef, @env ); + $self->pushline($t); + $paragraph = ""; + } + $self->pushline( $line . "\n" ); + $self->push_docheader(); + } elsif ( $line =~ m/^$RE_COMMENT/ ) { + if ( $line =~ m/^\@(?:c|comment).*?TRANSLATORS:(.*)$/ ) { + $self->add_comment($1); + } + $self->push_docheader(); + $self->pushline( $line . "\n" ); + } elsif ( $closed + and ( $line =~ /^@([^ ]*?)(?: +(.*))?$/ ) + and ( defined $commands{$1} ) + and ( $break_line{$1} ) ) + { + if ( length($paragraph) ) { + ( $t, @env ) = translate_buffer( $self, $paragraph, undef, @env ); + $self->pushline($t); + $paragraph = ""; + } + my $arg = $2; + my @args = (); + if ( defined $arg and length $arg ) { + + # FIXME: keep the spaces ? + $arg =~ s/\s*$//s; + @args = ( " ", $arg ); + } + ( $t, @env ) = &{ $commands{$1} }( $self, $1, "", \@args, \@env, 1 ); + $self->pushline( $t . "\n" ); + } else { + + # continue the same paragraph + $paragraph .= $line . "\n"; + } + + # Reinit the loop + ( $line, $ref ) = $self->shiftline(); + undef $self->{type}; + } + + if ( length($paragraph) ) { + ( $t, @env ) = translate_buffer( $self, $paragraph, undef, @env ); + $self->pushline($t); + $paragraph = ""; + } +} # end of parse + +sub line_command { + my $self = shift; + my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift ); + my $no_wrap = shift; + print "line_command($command,$variant,@$args,@$env,$no_wrap)=" + if ( $debug{'commands'} ); + + my $translated = $ESCAPE . $command; + my $line = $args->[1]; + if ( defined $line and length $line ) { + if ( defined $translate_line_command{$command} + and $translate_line_command{$command} ) + { + # $no_wrap could be forced to 1, but it should already be set + my ( $t, $e ) = $self->translate_buffer( $line, $no_wrap, @$env, $command ); + $translated .= " " . $t; + } else { + $translated .= " " . $line; + } + } + print "($translated,@$env)\n" + if ( $debug{'commands'} ); + return ( $translated, @$env ); +} + +sub defindex_line_command { + my $self = shift; + my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift ); + my $no_wrap = shift; + print "line_command($command,$variant,@$args,@$env,$no_wrap)=" + if ( $debug{'commands'} ); + my $idx = $$args[1] . "index"; + $commands{$idx} = \&line_command; + $break_line{$idx} = 1; + $translate_line_command{$idx} = 1; + + return line_command( $self, $command, $variant, $args, $env, $no_wrap ); +} + +sub translate_buffer_menu { + my ( $self, $buffer, $no_wrap, @env ) = ( shift, shift, shift, @_ ); + print STDERR "translate_buffer_menu($buffer,$no_wrap,@env)=" + if ( $debug{'translate_buffer'} ); + + my $translated_buffer = ""; + my $spaces = ""; + if ( $buffer =~ m/(\s*)$/s ) { + $spaces = $1; + } + + while ( $buffer =~ m/^(.*?)((?:\n|^)\* )(.*)$/s ) { + my $sep = $2; + $buffer = $3; + my ( $t, @e ) = $self->translate_buffer_menuentry( $1, $no_wrap, @env, "menuentry" ); + $translated_buffer .= $t . $sep; + } + my ( $t, @e ) = $self->translate_buffer_menuentry( $buffer, $no_wrap, @env, "menuentry" ); + $translated_buffer .= $t; + + $translated_buffer .= $spaces; + + print STDERR "($translated_buffer,@env)\n" + if ( $debug{'translate_buffer'} ); + return ( $translated_buffer, @env ); +} +$translate_buffer_env{"menu"} = \&translate_buffer_menu; +$translate_buffer_env{"detailmenu"} = \&translate_buffer_menu; +$translate_buffer_env{"direntry"} = \&translate_buffer_menu; + +my $menu_width = 78; +my $menu_sep_width = 30; + +sub translate_buffer_menuentry { + my ( $self, $buffer, $no_wrap, @env ) = ( shift, shift, shift, @_ ); + print STDERR "translate_buffer_menuentry($buffer,$no_wrap,@env)=" + if ( $debug{'translate_buffer'} ); + + my $translated_buffer = ""; + + if ( $buffer =~ m/^(.*?)(::)(?:\s+(.*))?$/s + or $buffer =~ m/^(.*?: .*?)(\.)\s+(.*)$/s ) + { + my ( $name, $sep, $description ) = ( $1, $2, $3 ); + my ( $t, @e ) = $self->translate_buffer( $name, $no_wrap, @env ); + $translated_buffer = $t . $sep . " "; + my $l = length($translated_buffer) + 2; + if ( $l < $menu_sep_width - 1 ) { + $translated_buffer .= ' ' x ( $menu_sep_width - 1 - $l ); + $l = $menu_sep_width - 1; + } + if ($description) { + ( $t, @e ) = $self->translate_buffer( $description, $no_wrap, @env ); + } + + # Replace newlines with space for proper wrapping + # See https://github.com/mquinson/po4a/issues/122 + $t =~ s/\n/ /sg; + + # Remove trailing spaces + $t =~ s/\s*$//; + $t = Locale::Po4a::Po::wrap( $t, $menu_width - $l - 2 ); + my $spaces = ' ' x ( $l + 2 ); + $t =~ s/\n/\n$spaces/sg; + $translated_buffer .= $t; + } else { + + # FIXME: no-wrap if a line start by a space + my ( $t, @e ) = $self->translate_buffer( $buffer, $no_wrap, @env ); + $translated_buffer = $t; + } + + print STDERR "($translated_buffer,@env)\n" + if ( $debug{'translate_buffer'} ); + return ( $translated_buffer, @env ); +} + +sub translate_buffer_ignore { + my ( $self, $buffer, $no_wrap, @env ) = ( shift, shift, shift, @_ ); + print STDERR "translate_buffer_ignore($buffer,$no_wrap,@env);\n" + if ( $debug{'translate_buffer'} ); + return ( $buffer, @env ); +} +$translate_buffer_env{"ignore"} = \&translate_buffer_ignore; + +foreach ( + qw(appendix section cindex findex kindex opindex pindex tindex vindex subsection + dircategory subtitle include + exdent center unnumberedsec + heading unnumbered unnumberedsubsec + unnumberedsubsubsec appendixsec appendixsubsec + appendixsubsubsec majorheading chapheading subheading + subsubheading shorttitlepage + subsubsection top item itemx chapter settitle + title author) + ) +{ + $commands{$_} = \&line_command; + $break_line{$_} = 1; + $translate_line_command{$_} = 1; +} +foreach ( + qw(c comment clear set setfilename setchapternewpage vskip synindex + syncodeindex need fonttextsize printindex headings finalout sp + definfoenclose) + ) +{ + $commands{$_} = \&line_command; + $break_line{$_} = 1; +} +foreach (qw(defcodeindex defindex)) { + $commands{$_} = \&defindex_line_command; + $break_line{$_} = 1; +} + +# definfoenclose: command definition => translate? +foreach ( + qw(insertcopying page bye summarycontents shortcontents contents + noindent) + ) +{ + $commands{$_} = \&line_command; + $break_line{$_} = 1; + $translate_line_command{$_} = 0; +} + +foreach ( + qw(defcv deffn + defivar defmac defmethod defop + defopt defspec deftp deftypecv + deftypefn deftypefun + deftypeivar deftypemethod + deftypeop deftypevar deftypevr + defun defvar defvr) + ) +{ + $commands{$_} = \&environment_line_command; + $translate_line_command{$_} = 1; + $break_line{$_} = 1; +} +foreach ( + qw(defcvx deffnx defivarx defmacx defmethodx defopx defoptx + defspecx deftpx deftypecvx deftypefnx deftypefunx deftypeivarx + deftypemethodx deftypeopx deftypevarx deftypevrx defunx + defvarx defvrx) + ) +{ + $commands{$_} = \&line_command; + $translate_line_command{$_} = 1; + $break_line{$_} = 1; +} + +foreach ( + qw(titlefont w i r b sansserif sc slanted strong t cite email + footnote indicateurl emph ref xref pxref inforef kbd key + acronym), + + # The following commands could cause problems since their arguments + # have a semantic and a translator could decide not to translate code but + # still translate theses short words if they appear in another context. + qw(file command dfn dmn option math code samp var) + ) +{ + register_generic_command("-$_,{_}"); +} + +register_generic_command("*anchor,{_}"); +register_generic_command("*refill,"); + +$translate_line_command{'node'} = 1; +$no_wrap_environments .= " node"; +$break_line{'node'} = 1; + +# @node Comments, Minimum, Conventions, Overview +$commands{'node'} = sub { + my $self = shift; + my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift ); + my $no_wrap = shift; + print "node($command,$variant,@$args,@$env,$no_wrap)=" + if ( $debug{'commands'} ); + + my $translated = $ESCAPE . $command; + my $line = $args->[1]; + if ( defined $line and length $line ) { + my @pointers = split( /, */, $line ); + my @t; + foreach (@pointers) { + push @t, $self->translate( $_, $self->{ref}, $command, "wrap" => 0 ); + } + $translated .= " " . join( ", ", @t ); + } + + print "($translated,@$env)\n" + if ( $debug{'commands'} ); + return ( $translated, @$env ); +}; + +sub environment_command { + my $self = shift; + my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift ); + my $no_wrap = shift; + print "environment_command($command,$variant,@$args,@$env,$no_wrap)=" + if ( $debug{'commands'} ); + my ( $t, @e ) = ( "", () ); + + ( $t, @e ) = generic_command( $self, $command, $variant, $args, $env, $no_wrap ); + @e = ( @$env, $command ); + + print "($t,@e)\n" + if ( $debug{'commands'} ); + return ( $t, @e ); +} + +sub environment_line_command { + my $self = shift; + my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift ); + my $no_wrap = shift; + print "environment_command_line($command,$variant,@$args,@$env,$no_wrap)=" + if ( $debug{'commands'} ); + my ( $t, @e ) = ( "", () ); + + ( $t, @e ) = line_command( $self, $command, $variant, $args, $env, $no_wrap ); + @e = ( @$env, $command ); + + print "($t,@e)\n" + if ( $debug{'commands'} ); + return ( $t, @e ); +} + +## push the environment in the environment stack, and do not translate +## the command +#sub push_environment { +# my $self = shift; +# my ($command,$variant,$args,$env) = (shift,shift,shift,shift); +# print "push_environment($command,$variant,@$args,@$env)=" +# if ($debug{'environments'}); +# +# my ($t,@e) = generic_command($self,$command,$variant,$args,$env); +# +# print "($t,@e)\n" +# if ($debug{'environments'}); +# return ($t,@e); +#} +# +foreach ( + qw(detailmenu menu titlepage group copying + documentdescription cartouche + direntry + ifdocbook ifhtml ifinfo ifplaintext iftex ifxml + ifnotdocbook ifnothtml ifnotinfo ifnotplaintext ifnottex ifnotxml) + ) +{ + $commands{$_} = \&environment_line_command; + $translate_line_command{$_} = 0; + $break_line{$_} = 1; +} +foreach (qw(enumerate multitable ifclear ifset)) { + $commands{$_} = \&environment_line_command; + $break_line{$_} = 1; +} +foreach ( + qw(quotation smallquotation + indentedblock smallindentedblock + raggedright) + ) +{ + $commands{$_} = \&environment_line_command; + $translate_line_command{$_} = 1; + $break_line{$_} = 1; +} + +$env_separators{'format'} = "(?:(?:^|\n)\\\*|END-INFO-DIR-ENTRY|START-INFO-DIR-ENTRY)"; +$env_separators{'multitable'} = "(?:\@item|\@tab)"; + +my $end_command = $commands{'end'}; +register_generic_command("*end, "); +$commands{'end'} = $end_command; +$break_line{'end'} = 1; + +register_generic_command("*macro, "); +$commands{'macro'} = \&environment_command; +$break_line{'macro'} = 1; +register_generic_command("*itemize, "); +$commands{'itemize'} = \&environment_command; +$break_line{'itemize'} = 1; +register_generic_command("*table, "); +$commands{'table'} = \&environment_command; +$break_line{'table'} = 1; + +# TODO: is_closed, use a regexp: \ does not escape the closing brace. +# TBC on LaTeX. +# In Texinfo, it appears with the "code" command. Maybe this command should +# be used as verbatim. (Expressions.texi) + +# TODO: @include @ignore + +# TBC: node Indices + +1; diff --git a/lib/Locale/Po4a/Text.pm b/lib/Locale/Po4a/Text.pm new file mode 100644 index 0000000..b6633c2 --- /dev/null +++ b/lib/Locale/Po4a/Text.pm @@ -0,0 +1,1072 @@ +#!/usr/bin/perl -w + +# Po4a::Text.pm +# +# extract and translate translatable strings from a text documents +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Text - convert text documents from/to PO files + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::Text is a module to help the translation of text documents into +other [human] languages. + +Paragraphs are split on empty lines (or lines containing only spaces or +tabulations). + +If a paragraph contains a line starting by a space (or tabulation), this +paragraph won't be rewrapped. + +=cut + +package Locale::Po4a::Text; + +use 5.16.0; +use strict; +use warnings; + +require Exporter; +use vars qw(@ISA @EXPORT); +@ISA = qw(Locale::Po4a::TransTractor); +@EXPORT = qw(); + +use Locale::Po4a::TransTractor; +use Locale::Po4a::Common; +use YAML::Tiny; +use Syntax::Keyword::Try; + +=head1 OPTIONS ACCEPTED BY THIS MODULE + +These are this module's particular options: + +=over + +=item B<keyvalue> + +Treat paragraphs that look like a colon-separated key-value pair as verbatim +(with the C<no-wrap> flag in the PO file). A key-value pair string is a string +like C<key: value>, containing one or more non-colon and non-space characters +followed by a colon followed by at least one non-space character before the +end of the line. + +=cut + +my $keyvalue = 0; + +=item B<nobullets> + +Deactivate the detection of bullets. + +By default, when a bullet is detected, the bullet paragraph is not considered +as a verbatim paragraph (with the C<no-wrap> flag in the PO file). Instead, the +corresponding paragraph is rewrapped in the translation. + +=cut + +my $bullets = 1; + +=item B<tabs=>I<mode> + +Specify how tabulations shall be handled. The I<mode> can be any of: + +=over + +=item B<split> + +Lines with tabulations introduce breaks in the current paragraph. + +=item B<verbatim> + +Paragraph containing tabulations will not be re-wrapped. + +=back + +By default, tabulations are considered as spaces. + +=cut + +my $tabs = ""; + +=item B<breaks=>I<regex> + +A regular expression matching lines which introduce breaks. +The regular expression will be anchored so that the whole line must match. + +=cut + +my $breaks; + +=item B<debianchangelog> + +Handle the header and footer of +released versions, which only contain non translatable information. + +=cut + +my $debianchangelog = 0; + +=item B<fortunes> + +Handle the fortunes format, which separate fortunes with a line which +consists in '%' or '%%', and use '%%' as the beginning of a comment. + +=cut + +my $fortunes = 0; + +=item B<markdown> + +Handle some special markup in Markdown-formatted texts. + +=cut + +my $markdown = 0; + +=item B<yfm_keys> (markdown-only) + +Comma-separated list of keys to process for translation in the YAML Front Matter +section. All other keys are skipped. Keys are matched with a case-sensitive +match. If B<yfm_paths> and B<yfm_keys> are used together, values are included if +they are matched by at least one of the options. Array values are always translated, +unless the B<yfm_skip_array> option is provided. + +=cut + +my %yfm_keys = (); + +=item B<yfm_lenient> (markdown only) + +Allow the YAML Front Matter parser to fail on malformated headers. This is +particularly helpful when your file starts with a horizontal ruler instead +of a YAML Front Matter, but you insist on using three dashes only for your +ruler. + +=cut + +my $yfm_lenient = 0; + +=item B<yfm_paths> (markdown only) + +=item B<yfm_paths> + +Comma-separated list of hash paths to process for extraction in the YAML +Front Matter section, all other paths are skipped. Paths are matched with a +case-sensitive match. If B<yfm_paths> and B<yfm_keys> are used together, +values are included if they are matched by at least one of the options. +Arrays values are always returned unless the B<yfm_skip_array> option is +provided. + +=cut + +my %yfm_paths = (); + +=item B<yfm_skip_array> (markdown-only) + +Do not translate array values in the YAML Front Matter section. + +=cut + +my $yfm_skip_array = 0; + +=item B<control>[B<=>I<field_list>] + +Handle Debian's control files. +A comma-separated list of fields to be translated can be provided. + +=cut + +my %control = (); + +=item B<neverwrap> + +Prevent po4a from wrapping any lines. This means that every content is handled verbatim, even simple paragraphs. + +=cut + +my $defaultwrap = 1; + +my $parse_func = \&parse_fallback; + +my @comments = (); + +=back + +=cut + +sub initialize { + my $self = shift; + my %options = @_; + + $self->{options}{'control'} = ""; + $self->{options}{'breaks'} = 1; + $self->{options}{'debianchangelog'} = 1; + $self->{options}{'debug'} = 1; + $self->{options}{'fortunes'} = 1; + $self->{options}{'markdown'} = 1; + $self->{options}{'yfm_keys'} = ''; + $self->{options}{'yfm_lenient'} = 0; + $self->{options}{'yfm_paths'} = ''; + $self->{options}{'yfm_skip_array'} = 0; + $self->{options}{'nobullets'} = 0; + $self->{options}{'keyvalue'} = 1; + $self->{options}{'tabs'} = 1; + $self->{options}{'verbose'} = 1; + $self->{options}{'neverwrap'} = 1; + + foreach my $opt ( keys %options ) { + die wrap_mod( "po4a::text", dgettext( "po4a", "Unknown option: %s" ), $opt ) + unless exists $self->{options}{$opt}; + $self->{options}{$opt} = $options{$opt}; + } + + $keyvalue = 1 if ( defined $options{'keyvalue'} ); + $bullets = 0 if ( defined $options{'nobullets'} ); + $tabs = $options{'tabs'} if ( defined $options{'tabs'} ); + $breaks = $options{'breaks'} if ( defined $options{'breaks'} ); + $defaultwrap = 0 if ( defined $options{'neverwrap'} ); + + $parse_func = \&parse_debianchangelog if ( defined $options{'debianchangelog'} ); + $parse_func = \&parse_fortunes if ( defined $options{'fortunes'} ); + + if ( defined $options{'markdown'} ) { + $parse_func = \&parse_markdown; + $markdown = 1; + map { + $_ =~ s/^\s+|\s+$//g; # Trim the keys before using them + $yfm_keys{$_} = 1 + } ( split( ',', $self->{options}{'yfm_keys'} ) ); + map { + $_ =~ s/^\s+|\s+$//g; # Trim the keys before using them + $yfm_paths{$_} = 1 + } ( split( ',', $self->{options}{'yfm_paths'} ) ); + + # map { print STDERR "key $_\n"; } (keys %yfm_keys); + $yfm_skip_array = $self->{options}{'yfm_skip_array'}; + $yfm_lenient = $self->{options}{'yfm_lenient'}; + } else { + foreach my $opt (qw(yfm_keys yfm_lenient yfm_skip_array)) { + die wrap_mod( "po4a::text", dgettext( "po4a", "Option %s is only valid when parsing markdown files." ), + $opt ) + if exists $options{$opt}; + } + } + + if ( defined $options{'control'} ) { + $parse_func = \&parse_control; + if ( $options{'control'} eq "1" ) { + $control{''} = 1; + } else { + foreach my $tag ( split( ',', $options{'control'} ) ) { + $control{$tag} = 1; + } + } + } +} + +sub parse_fallback { + my ( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) = @_; + if ( + ( $line =~ /^\s*$/ ) + or ( defined $breaks + and $line =~ m/^$breaks$/ ) + ) + { + # Break paragraphs on lines containing only spaces + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = $defaultwrap unless defined( $self->{verbatim} ); + $self->pushline( $line . "\n" ); + undef $self->{controlkey}; + } elsif ( $line =~ /^-- $/ ) { + + # Break paragraphs on email signature hint + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = $defaultwrap; + $self->pushline( $line . "\n" ); + } elsif ( $line =~ /^=+$/ + or $line =~ /^_+$/ + or $line =~ /^-+$/ ) + { + $wrapped_mode = 0; + $paragraph .= $line . "\n"; + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = $defaultwrap; + } elsif ( $tabs eq "split" and $line =~ m/\t/ and $paragraph !~ m/\t/s ) { + $wrapped_mode = 0; + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = "$line\n"; + $wrapped_mode = 0; + } elsif ( $tabs eq "split" and $line !~ m/\t/ and $paragraph =~ m/\t/s ) { + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = "$line\n"; + $wrapped_mode = $defaultwrap; + } else { + if ( $line =~ /^\s/ ) { + + # A line starting by a space indicates a non-wrap + # paragraph + $wrapped_mode = 0; + } + if ( + $markdown + and ( + $line =~ /\S $/ # explicit newline + or $line =~ /"""$/ + ) + ) + { # """ textblock inside macro begin + # Markdown markup needing separation _after_ this line + $end_of_paragraph = 1; + } else { + undef $self->{bullet}; + undef $self->{indent}; + } + + # TODO: comments + $paragraph .= $line . "\n"; + } + return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ); +} + +sub parse_debianchangelog { + my ( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) = @_; + if ( + $expect_header + and $line =~ /^(\w[-+0-9a-z.]*)\ \(([^\(\) \t]+)\) # src, version + \s+([-+0-9a-z.]+); # distribution + \s*urgency\s*\=\s*(.*\S)\s*$/ix + ) + { # + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $self->pushline("$line\n"); + $expect_header = 0; + } elsif ( $line =~ + m/^ \-\- (.*) <(.*)> ((\w+\,\s*)?\d{1,2}\s+\w+\s+\d{4}\s+\d{1,2}:\d\d:\d\d\s+[-+]\d{4}(\s+\([^\\\(\)]+\)))$/ ) + { + # Found trailer + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $self->pushline("$line\n"); + $expect_header = 1; + } else { + return parse_fallback( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ); + } + return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ); +} + +sub parse_fortunes { + my ( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) = @_; + + # Always include paragraphs in no-wrap mode, + # because the formatting of the fortunes + # is usually hand-crafted and matters. + $wrapped_mode = 0; + + # Check if there are more lines in the file. + my $last_line_of_file = 0; + my ( $nextline, $nextref ) = $self->shiftline(); + if ( defined $nextline ) { + + # There is a next line, put it back. + $self->unshiftline( $nextline, $nextref ); + } else { + + # Nope, no more lines available. + $last_line_of_file = 1; + } + + # Is the line the end of a fortune or the last line of the file? + if ( $line =~ m/^%%?\s*$/ or $last_line_of_file ) { + + # Add the last line to the paragraph + if ($last_line_of_file) { + $paragraph .= $line; + } + + # Remove the last newline for the translation. + chomp($paragraph); + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + + # Add the last newline again for the output. + $self->pushline("\n"); + + # Also add the separator line, if this is not the end of the file. + if ( !$last_line_of_file ) { + $self->pushline("$line\n"); + } + } else { + $paragraph .= $line . "\n"; + } + return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ); +} + +sub parse_control { + my ( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) = @_; + if ( $line =~ m/^([^ :]*): *(.*)$/ ) { + warn wrap_mod( "po4a::text", dgettext( "po4a", "Unrecognized section: %s" ), $paragraph ) + unless $paragraph eq ""; + my $tag = $1; + my $val = $2; + my $t; + if ( $control{''} or $control{$tag} ) { + $t = $self->translate( + $val, $self->{ref}, + $tag . ( defined $self->{controlkey} ? ", " . $self->{controlkey} : "" ), + "wrap" => 0 + ); + } else { + $t = $val; + } + if ( not defined $self->{controlkey} ) { + $self->{controlkey} = "$tag: $val"; + } + $self->pushline("$tag: $t\n"); + $paragraph = ""; + $wrapped_mode = $defaultwrap; + $self->{bullet} = ""; + $self->{indent} = " "; + } elsif ( $line eq " ." ) { + do_paragraph( $self, $paragraph, $wrapped_mode, + "Long Description" . ( defined $self->{controlkey} ? ", " . $self->{controlkey} : "" ) ); + $paragraph = ""; + $self->pushline( $line . "\n" ); + $self->{bullet} = ""; + $self->{indent} = " "; + } elsif ( $line =~ m/^ Link: +(.*)$/ ) { + do_paragraph( $self, $paragraph, $wrapped_mode, + "Long Description" . ( defined $self->{controlkey} ? ", " . $self->{controlkey} : "" ) ); + my $link = $1; + my $t1 = $self->translate( "Link: ", $self->{ref}, "Link", "wrap" => 0 ); + my $t2 = $self->translate( + $link, $self->{ref}, + "Link" . ( defined $self->{controlkey} ? ", " . $self->{controlkey} : "" ), + "wrap" => 0 + ); + $self->pushline(" $t1$t2\n"); + $paragraph = ""; + } elsif ( defined $self->{indent} + and $line =~ m/^$self->{indent}\S/ ) + { + $paragraph .= $line . "\n"; + $self->{type} = "Long Description" . ( defined $self->{controlkey} ? ", " . $self->{controlkey} : "" ); + } else { + return parse_fallback( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ); + } + return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ); +} + +# Support pandoc's format of specifying bibliographic information. +# +# If the first line starts with a percent sign, the following +# is considered to be title, author, and date. +# +# If the information spans multiple lines, the following +# lines must be indented with space. +# If information is omitted, it's just a percent sign +# and a blank line. +# +# Examples with missing title resp. missing authors: +# +# % +# % Author +# +# % My title +# % +# % June 14, 2018 +sub parse_markdown_bibliographic_information { + my ( $self, $line, $ref ) = @_; + my ( $nextline, $nextref ); + + # The first match is always the title or an empty string (no title). + if ( $line =~ /^%(.*)$/ ) { + my $title = $1; + + # Remove leading and trailing whitespace + $title =~ s/^\s+|\s+$//g; + + # If there's some text, look for continuation lines + if ( length($title) ) { + ( $nextline, $nextref ) = $self->shiftline(); + while ( $nextline =~ /^\s+(.+)$/ ) { + $nextline = $1; + $nextline =~ s/^\s+|\s+$//g; + $title .= " " . $nextline; + ( $nextline, $nextref ) = $self->shiftline(); + } + + # Now the title should be complete, give it to translation. + my $t = $self->translate( $title, $ref, "Pandoc title block", "wrap" => $defaultwrap ); + $t = Locale::Po4a::Po::wrap($t); + my $first_line = 1; + foreach my $translated_line ( split /\n/, $t ) { + if ($first_line) { + $first_line = 0; + $self->pushline( "% " . $translated_line . "\n" ); + } else { + $self->pushline( " " . $translated_line . "\n" ); + } + } + } else { + + # Title has been empty, fetch the next line + # if that are the authors. + $self->pushline("%\n"); + ( $nextline, $nextref ) = $self->shiftline(); + } + + # The next line can contain the author or an empty string. + if ( $nextline =~ /^%(.*)$/ ) { + my $author_ref = $nextref; + my $authors = $1; + + # If there's some text, look for continuation lines + if ( length($authors) ) { + ( $nextline, $nextref ) = $self->shiftline(); + while ( $nextline =~ /^\s+(.+)$/ ) { + $nextline = $1; + $authors .= ";" . $nextline; + ( $nextline, $nextref ) = $self->shiftline(); + } + + # Now the authors should be complete, split them by semicolon + my $first_line = 1; + foreach my $author ( split /;/, $authors ) { + $author =~ s/^\s+|\s+$//g; + + # Skip empty authors + next unless length($author); + my $t = $self->translate( $author, $author_ref, "Pandoc title block" ); + if ($first_line) { + $first_line = 0; + $self->pushline( "% " . $t . "\n" ); + } else { + $self->pushline( " " . $t . "\n" ); + } + } + } else { + + # Authors has been empty, fetch the next line + # if that is the date. + $self->pushline("%\n"); + ( $nextline, $nextref ) = $self->shiftline(); + } + + # The next line can contain the date. + if ( $nextline =~ /^%(.*)$/ ) { + my $date = $1; + + # Remove leading and trailing whitespace + $date =~ s/^\s+|\s+$//g; + my $t = $self->translate( $date, $nextref, "Pandoc title block" ); + $self->pushline( "% " . $t . "\n" ); + + # Now we're done with the bibliographic information + return; + } + } + + # The line did not start with a percent sign, to stop + # parsing bibliographic information and return the + # line to the normal parsing. + $self->unshiftline( $nextline, $nextref ); + return; + } +} + +# Support YAML Front Matter in Markdown documents +# +# If the text starts with a YAML ---\n separator, the full text until +# the next YAML ---\n separator is considered YAML metadata. The ...\n +# "end of document" separator can be used at the end of the YAML +# block. +# +sub parse_markdown_yaml_front_matter { + my ( $self, $line, $blockref ) = @_; + my $yfm; + my @saved_ctn; + my ( $nextline, $nextref ) = $self->shiftline(); + push @saved_ctn, ( $nextline, $nextref ); + while ( defined($nextline) ) { + last if ( $nextline =~ /^(---|\.\.\.)$/ ); + $yfm .= $nextline; + ( $nextline, $nextref ) = $self->shiftline(); + if ( $nextline =~ /: [\[\{]/ ) { + die wrap_mod( + "po4a::text", + dgettext( + "po4a", + "Inline lists and dictionaries on a single line are not correctly handled the parser we use (YAML::Tiny): they are interpreted as regular strings. " + . "Please use multi-lines definitions instead. Offending line:\n %s" + ), + $nextline + ); + + } + push @saved_ctn, ( $nextline, $nextref ); + } + + my $yamlarray; # the parsed YFM content + my $yamlres; # containing the parse error, if any + try { + $yamlarray = YAML::Tiny->read_string($yfm); + } catch { + $yamlres = $@; + } + + if ( defined($yamlres) ) { + if ($yfm_lenient) { + $yamlres =~ s/ at .*$//; # Remove the error localisation in YAML::Tiny die message, if any (for our test) + warn wrap_mod( + "po4a::text", + dgettext( + "po4a", + "Proceeding even if the YAML Front Matter could not be parsed. Remove the 'yfm_lenient' option for a stricter behavior.\nIgnored error: %s" + ), + $yamlres + ); + my $len = ( scalar @saved_ctn ) - 1; + while ( $len >= 0 ) { + $self->unshiftline( $saved_ctn[ $len - 1 ], $saved_ctn[$len] ); + + # print STDERR "Unshift ".$saved_ctn[ $len - 1] ." | ". $saved_ctn[$len] ."\n"; + $len -= 2; + } + return 0; # Not a valid YAML + } else { + die wrap_mod( + "po4a::text", + dgettext( + "po4a", + "Could not get the YAML Front Matter from the file. If you did not intend to add a YAML front matter " + . "but an horizontal ruler, please use '----' instead, or pass the 'yfm_lenient' option.\nError: %s\nContent of the YFM: %s" + ), + $yamlres, $yfm + ); + } + } + + $self->handle_yaml( 1, $blockref, $yamlarray, \%yfm_keys, $yfm_skip_array, \%yfm_paths ); + $self->pushline("---\n"); + return 1; # Valid YAML +} + +sub parse_markdown { + my ( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) = @_; + if ($expect_header) { + + # It is only possible to find and parse the bibliographic + # information or the YAML Front Matter from the first line. + # Anyway, stop expecting header information for the next run. + $expect_header = 0; + if ( $line =~ /^%(.*)$/ ) { + parse_markdown_bibliographic_information( $self, $line, $ref ); + return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ); + } elsif ( $line =~ /^---$/ ) { + if ( parse_markdown_yaml_front_matter( $self, $line, $ref ) ) { # successfully parsed + return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ); + } + + # If it wasn't a YFM paragraph after all, stop expecting a header and keep going + } + } + if ( ( $line =~ m/^(={4,}|-{4,})$/ ) + and ( defined($paragraph) ) + and ( $paragraph =~ m/^[^\n]*\n$/s ) + and ( length($paragraph) == ( length($line) + 1 ) ) ) + { + # XXX: There can be any number of underlining according + # to the documentation. This detection, which avoid + # translating the formatting, is only supported if + # the underlining has the same size as the header text. + # Found title + $wrapped_mode = 0; + my $level = $line; + $level =~ s/^(.).*$/$1/; + + # Remove the trailing newline from the title + chomp($paragraph); + my $t = $self->translate( + $paragraph, $self->{ref}, "Title $level", + "wrap" => 0, + "flags" => "markdown-text" + ); + + # Add the newline again for the output + $self->pushline( $t . "\n" ); + $paragraph = ""; + $wrapped_mode = $defaultwrap; + $self->pushline( ( $level x length($t) ) . "\n" ); + } elsif ( $line =~ m/^(#{1,6})( +)(.*?)( +\1)?$/ ) { + my $titlelevel1 = $1; + my $titlespaces = $2; + my $title = $3; + my $titlelevel2 = $4 || ""; + + # Found one line title + do_paragraph( $self, $paragraph, $wrapped_mode ); + $wrapped_mode = 0; + $paragraph = ""; + my $t = $self->translate( + $title, $self->{ref}, "Title $titlelevel1", + "wrap" => 0, + "flags" => "markdown-text" + ); + $self->pushline( $titlelevel1 . $titlespaces . $t . $titlelevel2 . "\n" ); + $wrapped_mode = $defaultwrap; + } elsif ( $line =~ /^[ ]{0,3}([*_-])\s*(?:\1\s*){2,}$/ ) { + + # Horizontal rule + do_paragraph( $self, $paragraph, $wrapped_mode ); + $self->pushline( $line . "\n" ); + $paragraph = ""; + $end_of_paragraph = 1; + } elsif ( $line =~ /^([ ]{0,3})(\[[^\]]+\]:[ \t]?.+)$/ ) { + my $indentation = $1; + my $linkreference = $2; + + # Link reference + # TODO: support multiline link reference definition + # TODO: treat link title properly + # https://spec.commonmark.org/0.30/#link-reference-definitions + do_paragraph( $self, $paragraph, $wrapped_mode ); + $wrapped_mode = 0; + $paragraph = ""; + my $t = $self->translate( + $linkreference, $self->{ref}, "Link reference", + "wrap" => 0, + "flags" => "link-reference" + ); + $self->pushline( $indentation . $t . "\n" ); + $wrapped_mode = $defaultwrap; + } elsif ( $line =~ /^([ ]{0,3})(([~`])\3{2,})(\s*)([^`]*)\s*$/ ) { + my $fence_space_before = $1; + my $fence = $2; + my $fencechar = $3; + my $fence_space_between = $4; + my $info_string = $5; + + # fenced code block + my $type = "Fenced code block" . ( $info_string ? " ($info_string)" : "" ); + do_paragraph( $self, $paragraph, $wrapped_mode ); + $wrapped_mode = 0; + $paragraph = ""; + $self->pushline("$line\n"); + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + my ( $nextline, $nextref ) = $self->shiftline(); + + while ( $nextline !~ /^\s{0,3}$fence$fencechar*\s*$/ ) { + $paragraph .= "$nextline"; + ( $nextline, $nextref ) = $self->shiftline(); + } + do_paragraph( $self, $paragraph, $wrapped_mode, $type ); + $self->pushline($nextline); + $paragraph = ""; + $end_of_paragraph = 1; + } elsif ( $line =~ /^([ ]{0,3})(([:])\3{2,})(\s*)([^`]*)\s*$/ ) { + my $fence_space_before = $1; + my $fence = $2; + my $fencechar = $3; + my $fence_space_between = $4; + my @info_string = ($5); + + # print STDERR "----------------\n"; + # print STDERR "line: $line\n"; + # print STDERR "fence: '$fence'; fencechar: '$fencechar'; info: '$info_string'\n"; + + # fenced div block (fenced with ::: where code blocks are fenced with ` or ~) + # https://pandoc.org/MANUAL.html#divs-and-spans + my $info = join( "|", map { chomp $_; $_ } @info_string ); + my $type = "Fenced div block" . ( $info ? " ($info)" : "" ); + do_paragraph( $self, $paragraph, $wrapped_mode ); + $wrapped_mode = 0; + $paragraph = ""; + $self->pushline("$line\n"); + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + + my $lvl = 1; + while ( $lvl > 0 ) { + my ( $nextline, $nextref ) = $self->shiftline(); + die wrap_mod( + "po4a::text", + dgettext( + "po4a", "Malformed fenced div block: Block starting at %s not closed before the end of the file." + ), + $ref + ) unless ( defined($nextline) ); + + # print STDERR "within $lvl: $nextline"; + if ( $nextline =~ /^\s*:::+\s*$/ ) { + my $info = join( "|", map { chomp $_; $_ } @info_string ); + $type = "Fenced div block" . ( $info ? " ($info)" : "" ); + if ( $paragraph ne "" ) { + do_paragraph( $self, $paragraph, $wrapped_mode, $type ); + $paragraph = ""; + } + $self->pushline($nextline); + $lvl--; + while ( scalar @info_string > $lvl ) { + pop @info_string; + } + } elsif ( $nextline =~ /^([ ]{0,3})(([:])\3{2,})(\s*)([^`]*)\s*$/ ) { + if ( $paragraph ne "" ) { + do_paragraph( $self, $paragraph, $wrapped_mode, $type ); + $paragraph = ""; + } + $self->pushline($nextline); + push @info_string, $5; + $lvl++; + } else { + $paragraph .= $nextline; + } + } + $paragraph = ""; + $end_of_paragraph = 1; + + # print STDERR "Out now ------------\n"; + } elsif ( + $line =~ /^\s*\[\[\!\S+\s*$/ # macro begin + or $line =~ /^\s*"""\s*\]\]\s*$/ + ) + { # """ textblock inside macro end + # Avoid translating Markdown lines containing only markup + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = $defaultwrap; + $self->pushline("$line\n"); + } elsif ( $line =~ /^\s*\[\[\!\S[^\]]*\]\]\s*$/ ) { # sole macro + # Preserve some Markdown markup as a single line + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = "$line\n"; + $wrapped_mode = 0; + $end_of_paragraph = 1; + } elsif ( $line =~ /^"""/ ) { # """ textblock inside macro end + # Markdown markup needing separation _before_ this line + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = "$line\n"; + $wrapped_mode = $defaultwrap; + } else { + return parse_fallback( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ); + } + return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ); +} + +sub parse { + my $self = shift; + my ( $line, $ref ); + my $paragraph = ""; + my $wrapped_mode = $defaultwrap; + my $expect_header = 1; + my $end_of_paragraph = 0; + ( $line, $ref ) = $self->shiftline(); + my $file = $ref; + $file =~ s/:[0-9]+$// if defined($line); + + while ( defined($line) ) { + $ref =~ m/^(.*):[0-9]+$/; + if ( $1 ne $file ) { + $file = $1; + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = $defaultwrap; + $expect_header = 1; + } + + chomp($line); + $self->{ref} = "$ref"; + ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) = + &$parse_func( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ); + + # paragraphs starting by a bullet, or numbered + # or paragraphs with a line containing more than 3 consecutive spaces + # are considered as verbatim paragraphs + $wrapped_mode = 0 if ( $paragraph =~ m/^(\*|[0-9]+[.)] )/s + or $paragraph =~ m/[ \t][ \t][ \t]/s ); + + # Paragraphs starting with a table formating (GH extension) are also considered verbatim + $wrapped_mode = 0 if ( $paragraph =~ m/^\|/ ); + + $wrapped_mode = 0 if ( $tabs eq "verbatim" + and $paragraph =~ m/\t/s ); + + # Also consider keyvalue paragraphs verbatim, if requested + $wrapped_mode = 0 if ( $keyvalue == 1 + and $paragraph =~ m/^[^ :]+:.*[^\s].*$/s ); + if ($markdown) { + + # Some Markdown markup can (or might) not survive wrapping + $wrapped_mode = 0 + if ( + $paragraph =~ /^>/ms # blockquote + or $paragraph =~ /^( {8}|\t)/ms # monospaced + or $paragraph =~ /^\$(\S+[{}]\S*\s*)+/ms # Xapian macro + or $paragraph =~ /<(?![a-z]+[:@])/ms # maybe html (tags but not wiki <URI>) + or $paragraph =~ /^[^<]+>/ms # maybe html (tag with vertical space) + or $paragraph =~ /\S $/ms # explicit newline + or $paragraph =~ /\[\[\!\S[^\]]+$/ms # macro begin + ); + } + if ($end_of_paragraph) { + do_paragraph( $self, $paragraph, $wrapped_mode ); + $paragraph = ""; + $wrapped_mode = $defaultwrap; + $end_of_paragraph = 0; + } + ( $line, $ref ) = $self->shiftline(); + } + if ( length $paragraph ) { + do_paragraph( $self, $paragraph, $wrapped_mode ); + } +} + +sub do_paragraph { + my ( $self, $paragraph, $wrap ) = ( shift, shift, shift ); + my $type = shift || $self->{type} || "Plain text"; + my $flags = ""; + if ( $type eq "Plain text" and $markdown ) { + $flags = "markdown-text"; + } + + return if ( $paragraph eq "" ); + + $wrap = 0 unless $defaultwrap; + + # DEBUG + # $type .= " verbatim: '".($self->{verbatim}//"NONE")."' bullet: '$bullets' wrap: '$wrap' indent: '".($self->{indent}//"NONE")."' type: '".($self->{type}//"NONE")."'"; + # print STDERR "$type\n"; + + if ( $bullets and not defined $self->{verbatim} ) { + + # Detect bullets + # | * blah blah + # |<spaces> blah + # | ^-- aligned + # <empty line> + # + # The leading spaces are optional, and other bullets are supported: + # - blah o blah + blah + # 1. blah 1) blah (1) blah + TEST_BULLET: + if ( $paragraph =~ m/^(\s*)((?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+)([^\n]*\n)(.*)$/s ) { + my $para = $5; + my $bullet = $2; + my $indent1 = $1; + my $indent2 = "$1" . ( ' ' x length $bullet ); + my $text = $4; + while ( $para !~ m/^$indent2(?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+/ + and $para =~ s/^$indent2(\S[^\n]*\n)//s ) + { + $text .= $1; + } + + # TODO: detect if a line starts with the same bullet + if ( $text !~ m/\S[ \t][ \t][ \t]+\S/s ) { + my $bullet_regex = quotemeta( $indent1 . $bullet ); + $bullet_regex =~ s/[0-9]+/\\d\+/; + if ( $para eq '' + or $para =~ m/^(\s*)((?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+)([^\n]*\n)(.*)$/s + or $para =~ m/^$bullet_regex\S/s ) + { + my $trans = $self->translate( + $text, + $self->{ref}, + "Bullet: '$indent1$bullet'", + "flags" => "markdown-text", + "wrap" => $defaultwrap, + "wrapcol" => -( length $indent2 ) + ); + $trans =~ s/^/$indent1$bullet/s; + $trans =~ s/\n(.)/\n$indent2$1/sg; + $self->pushline( $trans . "\n" ); + if ( $para eq '' ) { + return; + } else { + + # Another bullet + $paragraph = $para; + goto TEST_BULLET; + } + } + } + } + } + + my $end = ""; + if ($wrap) { + $paragraph =~ s/^(.*?)(\n*)$/$1/s; + $end = $2 || ""; + } + my $t = $self->translate( + $paragraph, + $self->{ref}, + $type, + "comment" => join( "\n", @comments ), + "flags" => $flags, + "wrap" => $wrap + ); + @comments = (); + if ( defined $self->{bullet} ) { + my $bullet = $self->{bullet}; + my $indent1 = $self->{indent}; + my $indent2 = $indent1 . ( ' ' x length($bullet) ); + $t =~ s/^/$indent1$bullet/s; + $t =~ s/\n(.)/\n$indent2$1/sg; + } + $self->pushline( $t . $end ); +} + +1; + +=head1 STATUS OF THIS MODULE + +Tested successfully on simple text files and NEWS.Debian files. + +=head1 AUTHORS + + Nicolas François <nicolas.francois@centraliens.net> + +=head1 COPYRIGHT AND LICENSE + + Copyright © 2005-2008 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>. + + Copyright © 2008-2009, 2018 Jonas Smedegaard <dr@jones.dk>. + Copyright © 2020 Martin Quinson <mquinson#debian.org>. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut + +__END__ + +# LocalWords: Charset charset po UTF gettext msgid nostrip GPL diff --git a/lib/Locale/Po4a/TransTractor.pm b/lib/Locale/Po4a/TransTractor.pm new file mode 100644 index 0000000..0bc5b8f --- /dev/null +++ b/lib/Locale/Po4a/TransTractor.pm @@ -0,0 +1,1337 @@ +#!/usr/bin/perl -w + +require Exporter; + +package Locale::Po4a::TransTractor; +use DynaLoader; + +use 5.16.0; +use strict; +use warnings; + +use subs qw(makespace); +use vars qw($VERSION @ISA @EXPORT); +$VERSION = "0.72"; +@ISA = qw(DynaLoader); +@EXPORT = qw(new process translate + read write readpo writepo + getpoout setpoout get_in_charset get_out_charset handle_yaml); + +# Try to use a C extension if present. +eval("bootstrap Locale::Po4a::TransTractor $VERSION"); + +use Carp qw(croak confess); +use Locale::Po4a::Po; +use Locale::Po4a::Common; + +use File::Path; # mkdir before write +use File::Spec; + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::TransTractor - generic trans(lator ex)tractor. + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +This class is the ancestor of every po4a parser used to parse a document, to +search translatable strings, to extract them to a PO file and to replace them by +their translation in the output document. + +More formally, it takes the following arguments as input: + +=over 2 + +=item - + +a document to translate; + +=item - + +a PO file containing the translations to use. + +=back + +As output, it produces: + +=over 2 + +=item - + +another PO file, resulting of the extraction of translatable strings from +the input document; + +=item - + +a translated document, with the same structure than the one in input, but +with all translatable strings replaced with the translations found in the +PO file provided in input. + +=back + +Here is a graphical representation of this: + + Input document --\ /---> Output document + \ / (translated) + +-> parse() function -----+ + / \ + Input PO --------/ \---> Output PO + (extracted) + +=head1 FUNCTIONS YOUR PARSER SHOULD OVERRIDE + +=over 4 + +=item parse() + +This is where all the work takes place: the parsing of input documents, the +generation of output, and the extraction of the translatable strings. This +is pretty simple using the provided functions presented in the section +B<INTERNAL FUNCTIONS> below. See also the B<SYNOPSIS>, which presents an +example. + +This function is called by the process() function below, but if you choose +to use the new() function, and to add content manually to your document, +you will have to call this function yourself. + +=item docheader() + +This function returns the header we should add to the produced document, +quoted properly to be a comment in the target language. See the section +B<Educating developers about translations>, from L<po4a(7)|po4a.7>, for what +it is good for. + +=back + +=cut + +sub docheader { } + +sub parse { } + +=head1 SYNOPSIS + +The following example parses a list of paragraphs beginning with "<p>". For the sake +of simplicity, we assume that the document is well formatted, i.e. that '<p>' +tags are the only tags present, and that this tag is at the very beginning +of each paragraph. + + sub parse { + my $self = shift; + + PARAGRAPH: while (1) { + my ($paragraph,$pararef)=("",""); + my $first=1; + my ($line,$lref)=$self->shiftline(); + while (defined($line)) { + if ($line =~ m/<p>/ && !$first--; ) { + # Not the first time we see <p>. + # Reput the current line in input, + # and put the built paragraph to output + $self->unshiftline($line,$lref); + + # Now that the document is formed, translate it: + # - Remove the leading tag + $paragraph =~ s/^<p>//s; + + # - push to output the leading tag (untranslated) and the + # rest of the paragraph (translated) + $self->pushline( "<p>" + . $self->translate($paragraph,$pararef) + ); + + next PARAGRAPH; + } else { + # Append to the paragraph + $paragraph .= $line; + $pararef = $lref unless(length($pararef)); + } + + # Reinit the loop + ($line,$lref)=$self->shiftline(); + } + # Did not get a defined line? End of input file. + return; + } + } + +Once you've implemented the parse function, you can use your document +class, using the public interface presented in the next section. + +=head1 PUBLIC INTERFACE for scripts using your parser + +=head2 Constructor + +=over 4 + +=item process(%) + +This function can do all you need to do with a po4a document in one +invocation. Its arguments must be packed as a hash. ACTIONS: + +=over 3 + +=item a. + +Reads all the PO files specified in po_in_name + +=item b. + +Reads all original documents specified in file_in_name + +=item c. + +Parses the document + +=item d. + +Reads and applies all the addenda specified + +=item e. + +Writes the translated document to file_out_name (if given) + +=item f. + +Writes the extracted PO file to po_out_name (if given) + +=back + +ARGUMENTS, beside the ones accepted by new() (with expected type): + +=over 4 + +=item file_in_name (@) + +List of filenames where we should read the input document. + +=item file_in_charset ($) + +Charset used in the input document (if it isn't specified, use UTF-8). + +=item file_out_name ($) + +Filename where we should write the output document. + +=item file_out_charset ($) + +Charset used in the output document (if it isn't specified, use UTF-8). + +=item po_in_name (@) + +List of filenames where we should read the input PO files from, containing +the translation which will be used to translate the document. + +=item po_out_name ($) + +Filename where we should write the output PO file, containing the strings +extracted from the input document. + +=item addendum (@) + +List of filenames where we should read the addenda from. + +=item addendum_charset ($) + +Charset for the addenda. + +=back + +=item new(%) + +Create a new po4a document. Accepted options (in the hash passed as a parameter): + +=over 4 + +=item verbose ($) + +Sets the verbosity. + +=item debug ($) + +Sets the debugging. + +=item wrapcol ($) + +The column at which we should wrap text in output document (default: 76). + +The negative value means not to wrap lines at all. + +=back + +Also it accepts next options for underlying Po-files: B<porefs>, +B<copyright-holder>, B<msgid-bugs-address>, B<package-name>, +B<package-version>, B<wrap-po>. + +=cut + +sub process { + my $self = shift; + ## Parameters are passed as an hash to avoid long and error-prone parameter lists + my %params = @_; + + # Parameter checking + foreach ( keys %params ) { + confess "Unexpected parameter to process(): $_. Please report that bug." + unless ( $_ eq 'po_in_name' + || $_ eq 'po_out_name' + || $_ eq 'file_in_name' + || $_ eq 'file_in_charset' + || $_ eq 'file_out_name' + || $_ eq 'file_out_charset' + || $_ eq 'addendum' + || $_ eq 'addendum_charset' + || $_ eq 'srcdir' + || $_ eq 'destdir' + || $_ eq 'calldir' ); + } + + $self->{TT}{'file_in_charset'} = $params{'file_in_charset'} // 'UTF-8'; + $self->{TT}{'file_out_charset'} = $params{'file_out_charset'} // 'UTF-8'; + $self->{TT}{'addendum_charset'} = $params{'addendum_charset'}; + + our ( $destdir, $srcdir, $calldir ) = ( $params{'destdir'}, $params{'srcdir'}, $params{'calldir'} ); + + sub _input_file { + my $filename = $_[0]; + return $filename if ( File::Spec->file_name_is_absolute($filename) ); + foreach ( ( $destdir, $srcdir, $calldir ) ) { + next unless defined $_; + my $p = File::Spec->catfile( $_, $filename ); + return $p if -e $p; + } + return $filename; + } + + sub _output_file { + my $filename = $_[0]; + return $filename if ( File::Spec->file_name_is_absolute($filename) ); + foreach ( ( $destdir, $calldir ) ) { + next unless defined $_; + return File::Spec->catfile( $_, $filename ) if -d $_ and -w $_; + } + return $filename; + } + + foreach my $file ( @{ $params{'po_in_name'} } ) { + my $infile = _input_file($file); + print STDERR wrap_mod( "po4a::transtractor::process", "Read PO file $infile" ) + if $self->debug(); + $self->readpo($infile); + } + foreach my $file ( @{ $params{'file_in_name'} } ) { + my $infile = _input_file($file); + print STDERR wrap_mod( "po4a::transtractor::process", "Read document $infile" ) + if $self->debug(); + $self->read( $infile, $file, $params{'file_in_charset'} ); + } + print STDERR wrap_mod( "po4a::transtractor::process", "Call parse()" ) if $self->debug(); + $self->parse(); + print STDERR wrap_mod( "po4a::transtractor::process", "Done parse()" ) if $self->debug(); + foreach my $file ( @{ $params{'addendum'} } ) { + my $infile = _input_file($file); + print STDERR wrap_mod( "po4a::transtractor::process", "Apply addendum $infile" ) + if $self->debug(); + $self->addendum($file) || die "An addendum failed\n"; + } + + if ( defined $params{'file_out_name'} ) { + my $outfile = _output_file( $params{'file_out_name'} ); + print STDERR wrap_mod( "po4a::transtractor::process", "Write document $outfile" ) + if $self->debug(); + $self->write( $outfile, $self->{TT}{'file_out_charset'} ); + } + if ( defined $params{'po_out_name'} ) { + my $outfile = _output_file( $params{'po_out_name'} ); + print STDERR wrap_mod( "po4a::transtractor::process", "Write PO file $outfile" ) + if $self->debug(); + $self->writepo($outfile); + } + return $self; +} + +sub new { + ## Determine if we were called via an object-ref or a classname + my $this = shift; + my $class = ref($this) || $this; + my $self = {}; + my %options = @_; + ## Bless ourselves into the desired class and perform any initialization + bless $self, $class; + + ## initialize the plugin + # prevent the plugin from croaking on the options intended for Po.pm or ourself + $self->{options}{'porefs'} = ''; + $self->{options}{'copyright-holder'} = ''; + $self->{options}{'msgid-bugs-address'} = ''; + $self->{options}{'package-name'} = ''; + $self->{options}{'package-version'} = ''; + $self->{options}{'wrap-po'} = ''; + $self->{options}{'wrapcol'} = ''; + + # let the plugin parse the options and such + $self->initialize(%options); + + ## Create our private data + my %po_options; + $po_options{'porefs'} = $options{'porefs'}; + $po_options{'copyright-holder'} = $options{'copyright-holder'}; + $po_options{'msgid-bugs-address'} = $options{'msgid-bugs-address'}; + $po_options{'package-name'} = $options{'package-name'}; + $po_options{'package-version'} = $options{'package-version'}; + $po_options{'wrap-po'} = $options{'wrap-po'}; + + # private data + $self->{TT} = (); + $self->{TT}{po_in} = Locale::Po4a::Po->new( \%po_options ); + $self->{TT}{po_out} = Locale::Po4a::Po->new( \%po_options ); + + # Warning, $self->{TT}{doc_in} is an array of array: + # The document is split on lines, and for each array in array + # [0] is the line content, [1] is the reference $filename:$linenum + $self->{TT}{doc_in} = (); + $self->{TT}{doc_out} = (); + if ( defined $options{'verbose'} ) { + $self->{TT}{verbose} = $options{'verbose'}; + } + if ( defined $options{'debug'} ) { + $self->{TT}{debug} = $options{'debug'}; + } + if ( defined $options{'wrapcol'} ) { + if ( $options{'wrapcol'} < 0) { + $self->{TT}{wrapcol} = 'Inf'; + } else { + $self->{TT}{wrapcol} = $options{'wrapcol'}; + } + } else { + $self->{TT}{wrapcol} = 76; + } + + return $self; +} + +=back + +=head2 Manipulating document files + +=over 4 + +=item read($$) + +Add another input document data at the end of the existing array +C<< @{$self->{TT}{doc_in}} >>. The argument is the filename to read. If a second +argument is provided, it is the filename to use in the references. + +This array C<< @{$self->{TT}{doc_in}} >> holds this input document data as an +array of strings with alternating meanings. + * The string C<$textline> holding each line of the input text data. + * The string C<< $filename:$linenum >> holding its location and called as + "reference" (C<linenum> starts with 1). + +Please note that it does not parse anything. You should use the parse() +function when you're done with packing input files into the document. + +=cut + +sub read() { + my $self = shift; + my $filename = shift or confess "Cannot write to a file without filename"; + my $refname = shift or confess "Cannot write to a file without refname"; + my $charset = shift || 'UTF-8'; + my $linenum = 0; + + use warnings FATAL => 'utf8'; + use Encode qw(:fallbacks); + use PerlIO::encoding; + $PerlIO::encoding::fallback = FB_CROAK; + + my $fh; + open( $fh, "<:encoding($charset)", $filename ) + or croak wrap_msg( dgettext( "po4a", "Cannot read from %s: %s" ), $filename, $! ); + + # If we see a BOM while not in UTF-8, we want to croak. But this code is in an eval to deal with + # encoding issues. So save the BOM error until after the eval block + my $BOM_detected = 0; + + eval { + while ( defined( my $textline = <$fh> ) ) { + $linenum++; + if ( $linenum == 1 && $textline =~ m/^\N{BOM}/ ) { # UTF-8 BOM detected + $BOM_detected = 1 if ( uc($charset) ne 'UTF-8' ); # Save the error message for after the eval{} bloc + $textline =~ s/^\N{BOM}//; + } + my $ref = "$refname:$linenum"; + $textline =~ s/\r$//; + my @entry = ( $textline, $ref ); + push @{ $self->{TT}{doc_in} }, @entry; + } + }; + my $error = $@; + if ( length($error) ) { + chomp $error; + die wrap_msg( dgettext( "po4a", "Malformed encoding while reading from file %s with charset %s: %s" ), + $filename, $charset, $error ); + } + + # Croak if we need to + if ($BOM_detected) { + croak wrap_msg( + dgettext( + "po4a", + "The file %s starts with a BOM char indicating that its encoding is UTF-8, but you specified %s instead." + ), + $filename, + $charset + ); + } + + close $fh + or croak wrap_msg( dgettext( "po4a", "Cannot close %s after reading: %s" ), $filename, $! ); +} + +=item write($) + +Write the translated document to the given filename. + +This translated document data are provided by: + * C<< $self->docheader() >> holding the header text for the plugin, and + * C<< @{$self->{TT}{doc_out}} >> holding each line of the main translated text in the array. + +=cut + +sub write { + my $self = shift; + my $filename = shift or confess "Cannot write to a file without filename"; + my $charset = shift || 'UTF-8'; + + use warnings FATAL => 'utf8'; + + my $fh; + if ( $filename eq '-' ) { + $fh = \*STDOUT; + } else { + + # make sure the directory in which we should write the localized file exists + my $dir = $filename; + if ( $dir =~ m|/| ) { + $dir =~ s|/[^/]*$||; + + File::Path::mkpath( $dir, 0, 0755 ) # Croaks on error + if ( length($dir) && !-e $dir ); + } + open( $fh, ">:encoding($charset)", $filename ) + or croak wrap_msg( dgettext( "po4a", "Cannot write to %s: %s" ), $filename, $! ); + } + + map { print $fh $_ } $self->docheader(); + eval { + map { print $fh $_ } @{ $self->{TT}{doc_out} }; + + # we use the "eval {} or do {}" approach to deal with exceptions, cf https://perlmaven.com/fatal-errors-in-external-modules + # but we want it to fail only if there is an error. It seems to be some cases where "map" returns false even if there is no error. + # Thus this final 1 to evaluate to true in absence of error. + 1; + } or do { + my $error = $@ || 'Unknown failure'; + chomp $error; + if ( $charset ne 'UTF-8' && $error =~ /^"\\x\{([^"}]*)\}"/ ) { + + # Attempt to write the char that cannot be written. Very fragile code + binmode STDERR, ':encoding(UTF-8)'; + my $char = chr( hex($1) ); + die wrap_msg( + dgettext( "po4a", "Malformed encoding while writing char '%s' to file %s with charset %s: %s" ), + $char, $filename, $charset, $error ); + } else { + die wrap_msg( dgettext( "po4a", "Malformed encoding while writing to file %s with charset %s: %s" ), + $filename, $charset, $error ); + } + }; + + if ( $filename ne '-' ) { + close $fh or croak wrap_msg( dgettext( "po4a", "Cannot close %s after writing: %s" ), $filename, $! ); + } + +} + +=back + +=head2 Manipulating PO files + +=over 4 + +=item readpo($) + +Add the content of a file (which name is passed as argument) to the +existing input PO. The old content is not discarded. + +=item writepo($) + +Write the extracted PO file to the given filename. + +=item stats() + +Returns some statistics about the translation done so far. Please note that +it's not the same statistics than the one printed by msgfmt +--statistic. Here, it's stats about recent usage of the PO file, while +msgfmt reports the status of the file. It is a wrapper to the +Locale::Po4a::Po::stats_get function applied to the input PO file. Example +of use: + + [normal use of the po4a document...] + + ($percent,$hit,$queries) = $document->stats(); + print "We found translations for $percent\% ($hit from $queries) of strings.\n"; + +=back + +=cut + +sub getpoout { + return $_[0]->{TT}{po_out}; +} + +sub setpoout { + $_[0]->{TT}{po_out} = $_[1]; +} + +sub readpo { + $_[0]->{TT}{po_in}->read( $_[1] ); +} + +sub writepo { + $_[0]->{TT}{po_out}->write( $_[1] ); +} + +sub stats { + return $_[0]->{TT}{po_in}->stats_get(); +} + +=head2 Manipulating addenda + +=over 4 + +=item addendum($) + +Please refer to L<po4a(7)|po4a.7> for more information on what addenda are, +and how translators should write them. To apply an addendum to the translated +document, simply pass its filename to this function and you are done ;) + +This function returns a non-null integer on error. + +=cut + +# Internal function to read the header. +sub addendum_parse { + my $filename = shift; + my $charset = shift || 'UTF-8'; + my $header; + + my ( $errcode, $mode, $position, $boundary, $bmode, $content ) = ( 1, "", "", "", "", "" ); + + unless ( open( INS, "<:encoding($charset)", $filename ) ) { + warn wrap_msg( dgettext( "po4a", "Cannot read from %s: %s" ), $filename, $! ); + goto END_PARSE_ADDFILE; + } + + $PerlIO::encoding::fallback = FB_CROAK; + eval { + unless ( defined( $header = <INS> ) && $header ) { + warn wrap_msg( dgettext( "po4a", "Cannot read po4a header from %s." ), $filename ); + goto END_PARSE_ADDFILE; + } + } or do { + my $error = $@ || 'Unknown failure'; + chomp $error; + die wrap_msg( dgettext( "po4a", "Malformed encoding while reading from file %s with charset %s: %s" ), + $filename, $charset, $error ); + }; + + unless ( $header =~ s/PO4A-HEADER://i ) { + warn wrap_msg( dgettext( "po4a", "First line of %s does not look like a po4a header." ), $filename ); + goto END_PARSE_ADDFILE; + } + foreach my $part ( split( /;/, $header ) ) { + unless ( $part =~ m/^\s*([^=]*)=(.*)$/ ) { + warn wrap_msg( dgettext( "po4a", "Syntax error in po4a header of %s, near \"%s\"" ), $filename, $part ); + goto END_PARSE_ADDFILE; + } + my ( $key, $value ) = ( $1, $2 ); + $key = lc($key); + if ( $key eq 'mode' ) { + $mode = lc($value); + } elsif ( $key eq 'position' ) { + $position = $value; + } elsif ( $key eq 'endboundary' ) { + $boundary = $value; + $bmode = 'after'; + } elsif ( $key eq 'beginboundary' ) { + $boundary = $value; + $bmode = 'before'; + } else { + warn wrap_msg( dgettext( "po4a", "Invalid argument in the po4a header of %s: %s" ), $filename, $key ); + goto END_PARSE_ADDFILE; + } + } + + unless ( length($mode) ) { + warn wrap_msg( dgettext( "po4a", "The po4a header of %s does not define the mode." ), $filename ); + goto END_PARSE_ADDFILE; + } + unless ( $mode eq "before" || $mode eq "after" || $mode eq "eof" ) { + warn wrap_msg( + dgettext( + "po4a", + "Mode invalid in the po4a header of %s: should be 'before', 'after' or 'eof'. Instead, it is '%s'." + ), + $filename, + $mode + ); + goto END_PARSE_ADDFILE; + } + + unless ( length($position) || $mode eq "eof" ) { + warn wrap_msg( dgettext( "po4a", "The po4a header of %s does not define the position." ), $filename ); + goto END_PARSE_ADDFILE; + } + if ( $mode eq "after" && length($boundary) == 0 ) { + warn wrap_msg( dgettext( "po4a", "No ending boundary given in the po4a header, but mode=after." ) ); + goto END_PARSE_ADDFILE; + } + if ( $mode eq "eof" && length($position) ) { + warn wrap_msg( dgettext( "po4a", "No position needed when mode=eof." ) ); + goto END_PARSE_ADDFILE; + } + if ( $mode eq "eof" && length($boundary) ) { + warn wrap_msg( dgettext( "po4a", "No ending boundary needed when mode=eof." ) ); + goto END_PARSE_ADDFILE; + } + + eval { + while ( defined( my $line = <INS> ) ) { + $content .= $line; + } + }; + my $error = $@; + if ( length($error) ) { + chomp $error; + die wrap_msg( dgettext( "po4a", "Malformed encoding while reading from file %s with charset %s: %s" ), + $filename, $charset, $error ); + } + close INS; + + $errcode = 0; + END_PARSE_ADDFILE: + return ( $errcode, $mode, $position, $boundary, $bmode, $content ); +} + +sub mychomp { + my ($str) = shift; + chomp($str); + return $str; +} + +sub addendum { + my ( $self, $filename ) = @_; + + print STDERR wrap_mod( "po4a::transtractor::addendum", "Apply addendum %s", $filename ) + if $self->debug(); + unless ($filename) { + warn wrap_msg( dgettext( "po4a", "Cannot apply addendum when not given the filename" ) ); + return 0; + } + die wrap_msg( dgettext( "po4a", "Addendum %s does not exist." ), $filename ) + unless -e $filename; + + my ( $errcode, $mode, $position, $boundary, $bmode, $content ) = + addendum_parse( $filename, $self->{TT}{'addendum_charset'} ); + return 0 if ($errcode); + + # In order to make addendum more intuitive, each array item of + # @{$self->{TT}{doc_out}} must not have internal "\n". But previous parser + # code may put multiple internal "\n" to address things like placeholder + # tag handling. Let's normalize array content. + # Use internal "\n" as delimiter but keep it by using the lookbehind trick. + @{ $self->{TT}{doc_out} } = map { split /(?<=\n)/, $_ } @{ $self->{TT}{doc_out} }; + + # Bugs around addendum is hard to understand. So let's print involved data explicitly. + if ( $self->debug() ) { + print STDERR "Addendum position regex=$position\n"; + print STDERR "Addendum mode=$mode\n"; + if ( $mode eq "after" ) { + print STDERR "Addendum boundary regex=$boundary\n"; + print STDERR "Addendum boundary mode=$bmode\n"; + } + print STDERR "Addendum content (begin):\n"; + print STDERR "$content"; + print STDERR "Addendum content (end)\n"; + print STDERR "Output items searched for the addendum insertion position:\n"; + foreach my $item ( @{ $self->{TT}{doc_out} } ) { + print STDERR $item; + print STDERR "\n----- [ search item end marker with a preceding newline ] -----\n"; + } + print STDERR "Start searching addendum insertion position...\n"; + } + + unless ( $mode eq 'eof' ) { + my $found = scalar grep { /$position/ } @{ $self->{TT}{doc_out} }; + if ( $found == 0 ) { + warn wrap_msg( dgettext( "po4a", "No candidate position for the addendum %s." ), $filename ); + return 0; + } + if ( $found > 1 ) { + warn wrap_msg( dgettext( "po4a", "More than one candidate position found for the addendum %s." ), + $filename ); + return 0; + } + } + + if ( $mode eq "eof" ) { + push @{ $self->{TT}{doc_out} }, $content; + } elsif ( $mode eq "before" ) { + if ( $self->verbose() > 1 || $self->debug() ) { + map { + print STDERR wrap_msg( dgettext( "po4a", "Addendum '%s' applied before this line: %s" ), $filename, $_ ) + if (/$position/); + } @{ $self->{TT}{doc_out} }; + } + @{ $self->{TT}{doc_out} } = map { /$position/ ? ( $content, $_ ) : $_ } @{ $self->{TT}{doc_out} }; + } else { + my @newres = (); + + do { + # make sure it doesn't whine on empty document + my $line = scalar @{ $self->{TT}{doc_out} } ? shift @{ $self->{TT}{doc_out} } : ""; + push @newres, $line; + my $outline = mychomp($line); + $outline =~ s/^[ \t]*//; + + if ( $line =~ m/$position/ ) { + while ( $line = shift @{ $self->{TT}{doc_out} } ) { + last if ( $line =~ /$boundary/ ); + push @newres, $line; + } + if ( defined $line ) { + if ( $bmode eq 'before' ) { + print wrap_msg( dgettext( "po4a", "Addendum '%s' applied before this line: %s" ), + $filename, $outline ) + if ( $self->verbose() > 1 || $self->debug() ); + push @newres, $content; + push @newres, $line; + } else { + print wrap_msg( dgettext( "po4a", "Addendum '%s' applied after the line: %s." ), + $filename, $outline ) + if ( $self->verbose() > 1 || $self->debug() ); + push @newres, $line; + push @newres, $content; + } + } else { + print wrap_msg( dgettext( "po4a", "Addendum '%s' applied at the end of the file." ), $filename ) + if ( $self->verbose() > 1 || $self->debug() ); + push @newres, $content; + } + } + } while ( scalar @{ $self->{TT}{doc_out} } ); + @{ $self->{TT}{doc_out} } = @newres; + } + print STDERR wrap_mod( "po4a::transtractor::addendum", "Done with addendum %s", $filename ) + if $self->debug(); + return 1; +} + +=back + +=head1 INTERNAL FUNCTIONS used to write derivative parsers + +=head2 Getting input, providing output + +Four functions are provided to get input and return output. They are very +similar to shift/unshift and push/pop of Perl. + + * Perl shift returns the first array item and drop it from the array. + * Perl unshift prepends an item to the array as the first array item. + * Perl pop returns the last array item and drop it from the array. + * Perl push appends an item to the array as the last array item. + +The first pair is about input, while the second is about output. Mnemonic: in +input, you are interested in the first line, what shift gives, and in output +you want to add your result at the end, like push does. + +=over 4 + +=item shiftline() + +This function returns the first line to be parsed and its corresponding +reference (packed as an array) from the array C<< @{$self->{TT}{doc_in}} >> and +drop these first 2 array items. Here, the reference is provided by a string +C<< $filename:$linenum >>. + +=item unshiftline($$) + +Unshifts the last shifted line of the input document and its corresponding +reference back to the head of C<< {$self->{TT}{doc_in}} >>. + +=item pushline($) + +Push a new line to the end of C<< {$self->{TT}{doc_out}} >>. + +=item popline() + +Pop the last pushed line from the end of C<< {$self->{TT}{doc_out}} >>. + +=back + +=cut + +sub shiftline { + my ( $line, $ref ) = ( shift @{ $_[0]->{TT}{doc_in} }, shift @{ $_[0]->{TT}{doc_in} } ); + return ( $line, $ref ); +} + +sub unshiftline { + my $self = shift; + unshift @{ $self->{TT}{doc_in} }, @_; +} + +sub pushline { push @{ $_[0]->{TT}{doc_out} }, $_[1] if defined $_[1]; } +sub popline { return pop @{ $_[0]->{TT}{doc_out} }; } + +=head2 Marking strings as translatable + +One function is provided to handle the text which should be translated. + +=over 4 + +=item translate($$$) + +Mandatory arguments: + +=over 2 + +=item - + +A string to translate + +=item - + +The reference of this string (i.e. position in inputfile) + +=item - + +The type of this string (i.e. the textual description of its structural role; +used in Locale::Po4a::Po::gettextization(); see also L<po4a(7)|po4a.7>, +section B<Gettextization: how does it work?>) + +=back + +This function can also take some extra arguments. They must be organized as +a hash. For example: + + $self->translate("string","ref","type", + 'wrap' => 1); + +=over + +=item B<wrap> + +boolean indicating whether we can consider that whitespaces in string are +not important. If yes, the function canonizes the string before looking for +a translation or extracting it, and wraps the translation. + +=item B<wrapcol> + +the column at which we should wrap (default: the value of B<wrapcol> specified +during creation of the TransTractor or 76). + +The negative value will be substracted from the default. + +=item B<comment> + +an extra comment to add to the entry. + +=back + +Actions: + +=over 2 + +=item - + +Pushes the string, reference and type to po_out. + +=item - + +Returns the translation of the string (as found in po_in) so that the +parser can build the doc_out. + +=item - + +Handles the charsets to recode the strings before sending them to +po_out and before returning the translations. + +=back + +=back + +=cut + +sub translate { + my $self = shift; + my ( $string, $ref, $type ) = ( shift, shift, shift ); + my (%options) = @_; + + return "" unless length($string); + + # my %validoption; + # map { $validoption{$_}=1 } (qw(wrap wrapcoll)); + # foreach (keys %options) { + # Carp::confess "internal error: translate() called with unknown arg $_. Valid options: $validoption" + # unless $validoption{$_}; + # } + + if ( !defined $options{'wrapcol'} ) { + $options{'wrapcol'} = $self->{TT}{wrapcol} + } elsif ( $options{'wrapcol'} < 0 ) { + $options{'wrapcol'} = $self->{TT}{wrapcol} + $options{'wrapcol'}; + } + my $transstring = $self->{TT}{po_in}->gettext( + $string, + 'wrap' => $options{'wrap'} || 0, + 'wrapcol' => $options{'wrapcol'} + ); + + # the comments provided by the modules are automatic comments from the PO point of view + $self->{TT}{po_out}->push( + 'msgid' => $string, + 'reference' => $ref, + 'type' => $type, + 'automatic' => $options{'comment'}, + 'flags' => $options{'flags'}, + 'wrap' => $options{'wrap'} || 0, + ); + + if ( $options{'wrap'} || 0 ) { + $transstring =~ s/( *)$//s; + my $trailing_spaces = $1 || ""; + $transstring =~ s/(?<!\\) +$//gm; + $transstring .= $trailing_spaces; + } + + return $transstring; +} + +=head2 Misc functions + +=over 4 + +=item verbose() + +Returns if the verbose option was passed during the creation of the +TransTractor. + +=cut + +sub verbose { + if ( defined $_[1] ) { + $_[0]->{TT}{verbose} = $_[1]; + } else { + return $_[0]->{TT}{verbose} || 0; # undef and 0 have the same meaning, but one generates warnings + } +} + +=item debug() + +Returns if the debug option was passed during the creation of the +TransTractor. + +=cut + +sub debug { + return $_[0]->{TT}{debug}; +} + +=item get_in_charset() + +This function return the charset that was provided as master charset + +=cut + +sub get_in_charset() { + return $_[0]->{TT}{'file_in_charset'}; +} + +=item get_out_charset() + +This function will return the charset that should be used in the output +document (usually useful to substitute the input document's detected charset +where it has been found). + +It will use the output charset specified in the command line. If it wasn't +specified, it will use the input PO's charset, and if the input PO has the +default "CHARSET", it will return the input document's charset, so that no +encoding is performed. + +=cut + +sub get_out_charset { + my $self = shift; + + # Prefer the value specified on the command line + return $self->{TT}{'file_out_charset'} + if length( $self->{TT}{'file_out_charset'} ); + + return $self->{TT}{po_in}->get_charset if $self->{TT}{po_in}->get_charset ne 'CHARSET'; + + return $self->{TT}{'file_in_charset'} if length( $self->{TT}{'file_in_charset'} ); + + return 'UTF-8'; +} + +# Push the translation of a Yaml document or Yaml Front-Matter header, parsed by YAML::Tiny in any case +# $is_yfm is a boolean indicating whether we are dealing with a Front Matter (true value) or whole document (false value) +sub handle_yaml { + my ( $self, $is_yfm, $blockref, $yamlarray, $yfm_keys, $yfm_skip_array, $yfm_paths ) = @_; + + die "Empty YAML " . ( $is_yfm ? "Front Matter" : "document" ) unless ( length($yamlarray) > 0 ); + + my ( $indent, $ctx ) = ( 0, "" ); + foreach my $cursor (@$yamlarray) { + + # An empty document + if ( !defined $cursor ) { + $self->pushline("---\n"); + + # Do nothing + + # A scalar document + } elsif ( !ref $cursor ) { + $self->pushline("---\n"); + $self->pushline( + format_scalar( + $self->translate( + $cursor, $blockref, + "YAML " . ( $is_yfm ? "Front Matter " : "" ) . "(scalar)", + "wrap" => 0 + ) + ) + ); + + # A list at the root + } elsif ( ref $cursor eq 'ARRAY' ) { + if (@$cursor) { + $self->pushline("---\n"); + do_array( $self, $is_yfm, $blockref, $cursor, $indent, $ctx, $yfm_keys, $yfm_skip_array, $yfm_paths ); + } else { + $self->pushline("---[]\n"); + } + + # A hash at the root + } elsif ( ref $cursor eq 'HASH' ) { + if (%$cursor) { + $self->pushline("---\n"); + do_hash( $self, $is_yfm, $blockref, $cursor, $indent, $ctx, $yfm_keys, $yfm_skip_array, $yfm_paths ); + } else { + $self->pushline("--- {}\n"); + } + + } else { + die( "Cannot serialize " . ref($cursor) ); + } + } + + # Escape the string to make it valid in YAML. + # This is very similar to YAML::Tiny::_dump_scalar but does not do the internal->UTF-8 decoding, + # as the translations that we feed into this function are already in UTF-8 + sub format_scalar { + my $string = $_[0]; + my $is_key = $_[1]; + + return '~' unless defined $string; + return "''" unless length $string; + if ( Scalar::Util::looks_like_number($string) ) { + + # keys and values that have been used as strings get quoted + if ($is_key) { + return qq['$string']; + } else { + return $string; + } + } + if ( $string =~ /[\\\'\n]/ ) { + $string =~ s/\\/\\\\/g; + $string =~ s/"/\\"/g; + $string =~ s/\n/\\n/g; + return qq|"$string"|; + } + if ( $string =~ /(?:^[~!@#%&*|>?:,'"`{}\[\]]|^-+$|\s|:\z)/ ) { + return "'$string'"; + } + return $string; + } + + sub do_array { + my ( $self, $is_yfm, $blockref, $array, $indent, $ctx, $yfm_keys, $yfm_skip_array, $yfm_paths ) = @_; + foreach my $el (@$array) { + my $header = ( ' ' x $indent ) . '- '; + my $type = ref $el; + if ( !$type ) { + if ($yfm_skip_array) { + $self->pushline( $header . YAML::Tiny::_dump_scalar( "dummy", $el, 0 ) . "\n" ); + } else { + $self->pushline( + $header + . format_scalar( + $self->translate( + $el, $blockref, + ( $is_yfm ? "Yaml Front Matter " : "" ) . "Array Element:$ctx", "wrap" => 0 + ) + ) + . "\n" + ); + } + + } elsif ( $type eq 'ARRAY' ) { + if (@$el) { + $self->pushline( $header . "\n" ); + do_array( $self, $is_yfm, $blockref, $el, $indent + 1, + $ctx, $yfm_keys, $yfm_skip_array, $yfm_paths ); + } else { + $self->pushline( $header . " []\n" ); + } + + } elsif ( $type eq 'HASH' ) { + if ( keys %$el ) { + $self->pushline( $header . "\n" ); + do_hash( $self, $is_yfm, $blockref, $el, $indent + 1, $ctx, $yfm_keys, $yfm_skip_array, + $yfm_paths ); + } else { + $self->pushline( $header . " {}\n" ); + } + + } else { + die "YAML $type references not supported"; + } + } + } + + sub do_hash { + my ( $self, $is_yfm, $blockref, $hash, $indent, $ctx, $yfm_keys, $yfm_skip_array, $yfm_paths ) = @_; + + foreach my $name ( sort keys %$hash ) { + my $el = $hash->{$name} // ""; + my $header = ( ' ' x $indent ) . YAML::Tiny::_dump_scalar( "dummy", $name, 1 ) . ":"; + + unless ( length($el) > 0 ) { # empty element, as in "tags: " with nothing after the column + $self->pushline( $header . "\n" ); + next; + } + + my $type = ref $el; + if ( !$type ) { + my %keys = %{$yfm_keys}; + my %paths = %{$yfm_paths}; + my $path = "$ctx $name" =~ s/^\s+|\s+$//gr; # Need to trim the path, at least when there is no ctx yet + + if ( ( $el eq 'false' ) or ( $el eq 'true' ) ) { # Do not translate nor quote booleans + $self->pushline("$header $el\n"); + } elsif ( + ( scalar %keys > 0 && exists $keys{$name} ) or # the key we need is provided + ( scalar %paths > 0 && exists $paths{$path} ) or # that path is provided + ( scalar %keys == 0 && scalar %paths == 0 ) # no key and no path provided + ) + { + my $translation = $self->translate( + $el, $blockref, + ( $is_yfm ? "Yaml Front Matter " : "" ) . "Hash Value:$ctx $name", + "wrap" => 0 + ); + + # add extra quotes to the parameter, as a protection to the extra chars that the translator could add + $self->pushline( $header . ' ' . format_scalar($translation) . "\n" ); + } else { + + # Work around a bug in YAML::Tiny that quotes numbers + # See https://github.com/Perl-Toolchain-Gang/YAML-Tiny#additional-perl-specific-notes + if ( Scalar::Util::looks_like_number($el) ) { + $self->pushline("$header $el\n"); + } else { + $self->pushline( $header . ' ' . YAML::Tiny::_dump_scalar( "dummy", $el ) . "\n" ); + } + } + + } elsif ( $type eq 'ARRAY' ) { + if (@$el) { + $self->pushline( $header . "\n" ); + do_array( + $self, $is_yfm, $blockref, $el, $indent + 1, "$ctx $name", + $yfm_keys, $yfm_skip_array, $yfm_paths + ); + } else { + $self->pushline( $header . " []\n" ); + } + + } elsif ( $type eq 'HASH' ) { + if ( keys %$el ) { + $self->pushline( $header . "\n" ); + do_hash( + $self, $is_yfm, $blockref, $el, $indent + 1, "$ctx $name", + $yfm_keys, $yfm_skip_array, $yfm_paths + ); + } else { + $self->pushline( $header . " {}\n" ); + } + + } else { + die "YAML $type references not supported"; + } + } + } +} + +=back + +=head1 FUTURE DIRECTIONS + +One shortcoming of the current TransTractor is that it can't handle +translated document containing all languages, like debconf templates, or +.desktop files. + +To address this problem, the only interface changes needed are: + +=over 2 + +=item - + +take a hash as po_in_name (a list per language) + +=item - + +add an argument to translate to indicate the target language + +=item - + +make a pushline_all function, which would make pushline of its content for +all languages, using a map-like syntax: + + $self->pushline_all({ "Description[".$langcode."]=". + $self->translate($line,$ref,$langcode) + }); + +=back + +Will see if it's enough ;) + +=head1 AUTHORS + + Denis Barbier <barbier@linuxfr.org> + Martin Quinson (mquinson#debian.org) + Jordi Vilalta <jvprat@gmail.com> + +=cut + +1; diff --git a/lib/Locale/Po4a/Wml.pm b/lib/Locale/Po4a/Wml.pm new file mode 100644 index 0000000..8c93d3b --- /dev/null +++ b/lib/Locale/Po4a/Wml.pm @@ -0,0 +1,210 @@ +#!/usr/bin/perl -w + +# Po4a::Wml.pm +# +# extract and translate translatable strings from a WML (web markup language) documents +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Wml - convert WML (web markup language) documents from/to PO files + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::Wml is a module to help the translation of WML documents into +other [human] languages. Do not mixup the WML we are speaking about here +(web markup language) and the WAP crap used on cell phones. + +Please note that this module relies upon the Locale::Po4a::Xhtml +module, which also relies upon the Locale::Po4a::Xml module. This +means that all tags for web page expressions are assumed to be written +in the XHTML syntax. + +=head1 OPTIONS ACCEPTED BY THIS MODULE + +NONE. + +=head1 STATUS OF THIS MODULE + +This module works for some simple documents, but is still young. +Currently, the biggest issue of the module is probably that it cannot +handle documents that contain non-XML inline tags such as <email +"foo@example.org">, which are often defined in the WML. Improvements +will be added in the future releases. + +=cut + +package Locale::Po4a::Wml; + +use 5.16.0; +use strict; +use warnings; + +require Exporter; +use vars qw(@ISA @EXPORT); +@ISA = qw(Locale::Po4a::Xhtml); +@EXPORT = qw(); + +use Locale::Po4a::Common; +use Locale::Po4a::Xhtml; +use File::Temp; + +sub initialize { + my $self = shift; + my %options = @_; + + $self->SUPER::initialize(%options); + + print "Call treat_options\n" if $self->{options}{'debug'}; + $self->treat_options; +} + +sub read { + my ( $self, $filename, $refname, $charset ) = @_; + my $tmp_filename; + ( undef, $tmp_filename ) = File::Temp::tempfile( + "po4aXXXX", + DIR => File::Spec->tmpdir(), + SUFFIX => ".xml", + OPEN => 0, + UNLINK => 0 + ) or die wrap_msg( gettext("Cannot create a temporary XML file: %s"), $! ); + my $file; + open FILEIN, "$filename" or die "Cannot read $filename: $!\n"; + { + $/ = undef; + $file = <FILEIN>; + } + $/ = "\n"; + + # Mask perl cruft out of XML sight + while (( $file =~ m|^(.*?)<perl>(.*?)</perl>(.*?)$|ms ) + or ( $file =~ m|^(.*?)<:(.*?):>(.*)$|ms ) ) + { + my ( $pre, $in, $post ) = ( $1, $2, $3 ); + $in =~ s/</PO4ALT/g; + $in =~ s/>/PO4AGT/g; + $file = "${pre}<!--PO4ABEGINPERL${in}PO4AENDPERL-->$post"; + } + + # Mask mp4h cruft + while ( $file =~ s|^#(.*)$|<!--PO4ASHARPBEGIN$1PO4ASHARPEND-->|m ) { + my $line = $1; + print STDERR "PROTECT HEADER: $line\n" + if $self->{options}{'debug'}; + + # If the wml tag has a title attribute, use a fake + # <title> xml tag to enable the extraction + # for translation in the xml parser. + if ( $line =~ m/title="([^"]*)"/ ) { + $file = "<title>$1</title>\n" . $file; + } + } + + # Validate define-tag tag's argument + $file =~ s|(<define-tag\s+)([^\s>]+)|$1PO4ADUMMYATTR="$2"|g; + + # Flush the result to disk + open OUTFILE, ">$tmp_filename"; + print OUTFILE $file; + close INFILE; + close OUTFILE or die "Cannot write $tmp_filename: $!\n"; + + push @{ $self->{DOCXML}{infile} }, $tmp_filename; + $self->{DOCWML}{$tmp_filename} = $filename; + $self->Locale::Po4a::TransTractor::read( $tmp_filename, $refname, $charset ); + unlink "$tmp_filename"; +} + +sub parse { + my $self = shift; + + foreach my $filename ( @{ $self->{DOCXML}{infile} } ) { + $self->Locale::Po4a::Xml::parse_file($filename); + my $org_filename = $self->{DOCWML}{$filename}; + + # Fix the references + foreach my $msgid ( keys %{ $self->{TT}{po_out}{po} } ) { + $self->{TT}{po_out}{po}{$msgid}{'reference'} =~ s|$filename(:\d+)|$org_filename$1|o; + } + + # Get the document back (undoing our WML masking) + # FIXME: need to join the file first, and then split? + my @doc_out; + my $cnt = 0; + my $title_node; + my $title; + + foreach my $line ( @{ $self->{TT}{doc_out} } ) { + if ( !$cnt ) { + if ( !$title_node && $line =~ m/<title>/ ) { + $title_node = $line; + } elsif ($title_node) { + $title_node .= $line; + if ( $title_node =~ m/<title>(.*?)<\/title>/ ) { + $title = $1; + $cnt = 1; + } + } else { + $cnt = 1; + } + } else { + if ( $line =~ s/^<!--PO4ASHARPBEGIN(.*?)PO4ASHARPEND-->/#$1/mg && $title ) { + $line =~ s/title="[^"]*"$/title="$title"/mg; + } + $line =~ s/<!--PO4ABEGINPERL(.*?)PO4AENDPERL-->/<:$1:>/sg; + $line =~ s/(<define-tag\s+)PO4ADUMMYATTR="([^"]*)"/$1$2/g; + $line =~ s/PO4ALT/</sg; + $line =~ s/PO4AGT/>/sg; + push @doc_out, $line; + } + } + + # Do a simple left trim + foreach my $line (@doc_out) { + if ( $line =~ m/\s+/ ) { + shift @doc_out; + } else { + last; + } + } + + $self->{TT}{doc_out} = \@doc_out; + } +} + +1; + +=head1 AUTHORS + + Martin Quinson (mquinson#debian.org) + Noriada Kobayashi <nori1@dolphin.c.u-tokyo.ac.jp> + +=head1 COPYRIGHT AND LICENSE + + Copyright © 2005 SPI, Inc. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). diff --git a/lib/Locale/Po4a/Xhtml.pm b/lib/Locale/Po4a/Xhtml.pm new file mode 100644 index 0000000..7d11db3 --- /dev/null +++ b/lib/Locale/Po4a/Xhtml.pm @@ -0,0 +1,245 @@ +#!/usr/bin/perl + +# Po4a::Xhtml.pm +# +# extract and translate translatable strings from XHTML documents. +# +# This code extracts plain text from tags and attributes from strict XHTML +# documents. +# +# Copyright © 2005 Yves Rütschlé <po4a@rutschle.net> +# Copyright © 2007-2008 Nicolas François <nicolas.francois@centraliens.net> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Xhtml - convert XHTML documents from/to PO files + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::Xhtml is a module to help the translation of XHTML documents into +other [human] languages. + +=head1 OPTIONS ACCEPTED BY THIS MODULE + +These are this module's particular options: + +=over 4 + +=item B<includessi>[B<=>I<rootpath>] + +Include files specified by an include SSI (Server Side Includes) element +(e.g. <!--#include virtual="/foo/bar.html" -->). + +B<Note:> You should use it only for static files. + +An additional I<rootpath> parameter can be specified. It specifies the root +path to find files included by a B<virtual> attribute. + +=back + +=head1 STATUS OF THIS MODULE + +This module is fully functional, as it relies in the L<Locale::Po4a::Xml> +module. This only defines the translatable tags and attributes. + +"It works for me", which means I use it successfully on my personal Web site. +However, YMMV: please let me know if something doesn't work for you. + +=head1 SEE ALSO + +L<Locale::Po4a::TransTractor(3pm)>, L<Locale::Po4a::Xml(3pm)>, L<po4a(7)|po4a.7> + +=head1 AUTHORS + + Yves Rütschlé <po4a@rutschle.net> + Nicolas François <nicolas.francois@centraliens.net> + +=head1 COPYRIGHT AND LICENSE + + Copyright © 2004 Yves Rütschlé <po4a@rutschle.net> + Copyright © 2007-2008 Nicolas François <nicolas.francois@centraliens.net> + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut + +package Locale::Po4a::Xhtml; + +use 5.16.0; +use strict; +use warnings; + +use Locale::Po4a::Xml; +use vars qw(@tag_types); +*tag_types = \@Locale::Po4a::Xml::tag_types; + +use Locale::Po4a::Common; +use Carp qw(croak); + +use vars qw(@ISA); +@ISA = qw(Locale::Po4a::Xml); + +sub tag_extract_SSI { + my ( $self, $remove ) = ( shift, shift ); + my ( $eof, @tag ) = $self->get_string_until( + "-->", + { + include => 1, + remove => $remove, + unquoted => 1 + } + ); + my ( $t, $r ) = @tag; + if ( $t =~ m/<!--#include (file|virtual)="(.*?)"\s-->/s ) { + my $includefile; + if ( $1 eq "file" ) { + $includefile = "."; + } else { + $includefile = $self->{options}{'includessi'}; + } + $includefile .= $2; + if ( !$remove ) { + $self->get_string_until( + "-->", + { + include => 1, + remove => 1, + unquoted => 1 + } + ); + } + my $linenum = 0; + my @include; + + open( my $in, $includefile ) + or croak wrap_mod( "po4a::xml", dgettext( "po4a", "Cannot read from %s: %s" ), $includefile, $! ); + while ( defined( my $includeline = <$in> ) ) { + $linenum++; + my $includeref = $includefile . ":$linenum"; + push @include, ( $includeline, $includeref ); + } + close $in + or croak wrap_mod( "po4a::xml", dgettext( "po4a", "Cannot close %s after reading: %s" ), $includefile, $! ); + + while (@include) { + my ( $ir, $il ) = ( pop @include, pop @include ); + $self->unshiftline( $il, $ir ); + } + $t =~ s/<!--#include/<!-- SSI included by po4a: /; + $self->unshiftline( $t, $r ); + } + return ( $eof, @tag ); +} + +sub initialize { + my $self = shift; + my %options = @_; + + $self->{options}{'includessi'} = ''; + + $self->SUPER::initialize(%options); + + $self->{options}{'wrap'} = 1; + $self->{options}{'doctype'} = $self->{options}{'doctype'} || 'html'; + + # Default tags are translated (text rewrapped), and introduce a break. + # The following list indicates the list of tags which should be + # translated without rewrapping. + $self->{options}{'_default_translated'} .= ' + W<pre> + '; + + # The following list indicates the list of tags which should be + # translated inside the current block, without introducing a break. + $self->{options}{'_default_inline'} .= ' + <a> + <abbr> + <acronym> + <b> + <big> + <bdo> + <button> + <cite> + <code> + <del> + <dfn> + <em> + <i> + <ins> + <input> + <kbd> + <label> + <object> + <q> + <samp> + <select> + <small> + <span> + <strong> + <sub> + <sup> + <textarea> + <tt> + <u> + <var> + '; + + # Ignored tags: <img> + # Technically, <img> is an inline tag, but setting it as such is + # annoying, and not usually useful, unless you use images to + # write text (in which case you have bigger problems than this + # program not inlining img: you now have to translate all your + # images. That'll teach you). + # If you choose to translate images, you may also want to set + # <map> as placeholder and <area> as inline. + + $self->{options}{'_default_attributes'} .= ' + alt + lang + title + '; + + $self->{options}{'optionalclosingtag'} = 1; + + print "Call treat_options\n" if $self->{options}{'debug'}; + $self->treat_options; + + if ( defined $self->{options}{'includessi'} + and length $self->{options}{'includessi'} ) + { + foreach (@tag_types) { + if ( $_->{beginning} eq "!--#" ) { + $_->{f_extract} = \&tag_extract_SSI; + } + } + + # FIXME: the directory may be named "1" ;( + if ( $self->{options}{'includessi'} eq "1" ) { + $self->{options}{'includessi'} = "."; + } + } +} diff --git a/lib/Locale/Po4a/Xml.pm b/lib/Locale/Po4a/Xml.pm new file mode 100644 index 0000000..3666ac7 --- /dev/null +++ b/lib/Locale/Po4a/Xml.pm @@ -0,0 +1,2539 @@ +#!/usr/bin/perl + +# Po4a::Xml.pm +# +# extract and translate translatable strings from XML documents. +# +# This code extracts plain text from tags and attributes from generic +# XML documents, and it can be used as a base to build modules for +# XML-based documents. +# +# Copyright © 2004 Jordi Vilalta <jvprat@gmail.com> +# Copyright © 2008-2009 Nicolas François <nicolas.francois@centraliens.net> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Xml - convert XML documents and derivates from/to PO files + +=head1 DESCRIPTION + +The po4a (PO for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::Xml is a module to help the translation of XML documents into +other [human] languages. It can also be used as a base to build modules for +XML-based documents. + +=cut + +package Locale::Po4a::Xml; + +use 5.16.0; +use strict; +use warnings; + +require Exporter; +use vars qw(@ISA @EXPORT); +@ISA = qw(Locale::Po4a::TransTractor); +@EXPORT = qw(new initialize @tag_types); + +use Locale::Po4a::TransTractor; +use Locale::Po4a::Common; +use Carp qw(croak); +use File::Basename; +use File::Spec; + +#It will maintain the path from the root tag to the current one +my @path; + +#It will contain a list of external entities and their attached paths +my %entities; + +my @comments; +my %translate_options_cache; +my $input_charset; + +# This shiftline function returns the next line of the document being parsed +# (and its reference). +# For XML, it overloads the Transtractor shiftline to handle: +# - text file inclusion if includeexternal option is set +# - dropping of the text in the XML comment <!--... --> +my $_shiftline_in_comment = 0; + +sub shiftline { + my $self = shift; + + # call Transtractor's shiftline + my ( $line, $ref ) = $self->SUPER::shiftline(); + return ( $line, $ref ) if ( not defined $line ); + + if ( $self->{options}{'includeexternal'} ) { + my $tmp; + + for my $k ( keys %entities ) { + if ( $line =~ m/^(.*?)&$k;(.*)$/s ) { + my ( $before, $after ) = ( $1, $2 ); + my $linenum = 0; + my @textentries; + + $tmp = $before; + my $tmp_in_comment = 0; + if ($_shiftline_in_comment) { + if ( $before =~ m/^.*?-->(.*)$/s ) { + $tmp = $1; + $tmp_in_comment = 0; + } else { + $tmp_in_comment = 1; + } + } + if ( $tmp_in_comment == 0 ) { + while ( $tmp =~ m/^.*?<!--.*?-->(.*)$/s ) { + $tmp = $1; + } + if ( $tmp =~ m/<!--/s ) { + $tmp_in_comment = 1; + } + } + next if ($tmp_in_comment); + + open( my $in, '<:encoding(' . ( $input_charset // 'UTF-8' ) . ')', $entities{$k} ) + or croak wrap_mod( "po4a::xml::shiftline", dgettext( "po4a", "%s: Cannot read from %s: %s" ), + $ref, $entities{$k}, $! ); + while ( defined( my $textline = <$in> ) ) { + $linenum++; + my $textref = $entities{$k} . ":$linenum"; + push @textentries, ( $textline, $textref ); + } + close $in + or + croak wrap_mod( "po4a::xml::shiftline", dgettext( "po4a", "%s: Cannot close %s after reading: %s" ), + $ref, $entities{$k}, $! ); + + push @textentries, ( $after, $ref ); + $line = $before . ( shift @textentries ); + $ref .= " " . ( shift @textentries ); + $self->unshiftline(@textentries); + } + } + + $tmp = $line; + if ($_shiftline_in_comment) { + if ( $line =~ m/^.*?-->(.*)$/s ) { + $tmp = $1; + $_shiftline_in_comment = 0; + } else { + $_shiftline_in_comment = 1; + } + } + if ( $_shiftline_in_comment == 0 ) { + while ( $tmp =~ m/^.*?<!--.*?-->(.*)$/s ) { + $tmp = $1; + } + if ( $tmp =~ m/<!--/s ) { + $_shiftline_in_comment = 1; + } + } + } + + return ( $line, $ref ); +} + +sub read { + my ( $self, $filename, $refname, $charset ) = @_; + croak wrap_mod( "po4a::xml", dgettext( "po4a", "Cannot have more than one input charset in XML files (%s and %s)" ), + $input_charset, $charset ) + if ( defined $input_charset && $input_charset ne $charset ); + $input_charset = $charset; + push @{ $self->{DOCPOD}{infile} }, $filename; + $self->Locale::Po4a::TransTractor::read( $filename, $refname, $charset ); +} + +sub parse { + my $self = shift; + map { $self->{'current_file'} = $_; $self->parse_file($_) } @{ $self->{DOCPOD}{infile} }; +} + +# @save_holders is a stack of references to ('paragraph', 'translation', +# 'sub_translations', 'open', 'close', 'folded_attributes') hashes, where: +# paragraph is a reference to an array (see paragraph in the +# treat_content() subroutine) of strings followed by +# references. It contains the @paragraph array as it was +# before the processing was interrupted by a tag introducing +# a placeholder. +# translation is the translation of this level up to now +# sub_translations is a reference to an array of strings containing the +# translations which must replace the placeholders. +# open is the tag which opened the placeholder. +# close is the tag which closed the placeholder. +# folded_attributes is a hash of tags with their attributes (<tag attrs=...> +# strings), referenced by the folded tag id, which should +# replace the <tag po4a-id=id> strings in the current +# translation. +# +# If @save_holders only has 1 holder, then we are not processing the +# content of a holder, we are translating the document. +my @save_holders; + +# If we are at the bottom of the stack and there is no <placeholder ...> in +# the current translation, we can push the translation in the translated +# document. +# Otherwise, we keep the translation in the current holder. + +# This pushline function outputs a next translated text string. +# For XML, it overloads the Transtractor pushline to handle: +# - placeholder tag replacement with translated text with tags +# +# This twist causes the string pushed into @{$self->{TT}{doc_out}} to have +# multi-line contents with internal "\n" for placeholder tags. +sub pushline { + my ( $self, $line ) = ( shift, shift ); + + my $holder = $save_holders[$#save_holders]; + my $translation = $holder->{'translation'}; + $translation .= $line; + + while ( %{ $holder->{folded_attributes} } + and $translation =~ m/^(.*)<([^>]+?)\s+po4a-id=([0-9]+)>(.*)$/s ) + { + my $begin = $1; + my $tag = $2; + my $id = $3; + my $end = $4; + if ( defined $holder->{folded_attributes}->{$id} ) { + + # TODO: check if the tag is the same + $translation = $begin . $holder->{folded_attributes}->{$id} . $end; + delete $holder->{folded_attributes}->{$id}; + } else { + + # TODO: It will be hard to identify the location. + # => find a way to retrieve the reference. + die wrap_mod( + "po4a::xml::pushline", + dgettext( + "po4a", + "'po4a-id=%d' in the translation does not exist in the original string (or 'po4a-id=%d' used twice in the translation)." + ), + $id, $id + ); + } + } + + # TODO: check that %folded_attributes is empty at some time + # => in translate_paragraph? + + if ( ( $#save_holders > 0 ) + or ( $translation =~ m/<placeholder\s/s ) ) + { + $holder->{'translation'} = $translation; + } else { + $self->SUPER::pushline($translation); + $holder->{'translation'} = ''; + } +} + +=head1 TRANSLATING WITH PO4A::XML + +This module can be used directly to handle generic XML documents. This will +extract all tag's content, and no attributes, since it's where the text is +written in most XML based documents. + +There are some options (described in the next section) that can customize +this behavior. If this doesn't fit to your document format you're encouraged +to write your own module derived from this, to describe your format's details. +See the section B<WRITING DERIVATE MODULES> below, for the process description. + +=cut + +# +# Parse file and translate it +# +sub parse_file { + my ( $self, $filename ) = @_; + my $eof = 0; + + print wrap_mod( "po4a::xml::parse_file", dgettext( "po4a", ">>> filename = '%s'" ), $filename ) + if $self->{options}{'debug'}; + while ( !$eof ) { + + # We get all the text until the next breaking tag (not + # inline) and translate it + $eof = $self->treat_content; + if ( !$eof ) { + + # And then we treat the following breaking tag + $eof = $self->treat_tag; + } + } +} + +=head1 OPTIONS ACCEPTED BY THIS MODULE + +The global debug option causes this module to show the excluded strings, in +order to see if it skips something important. + +These are this module's particular options: + +=over 4 + +=item B<nostrip> + +Prevents it to strip the spaces around the extracted strings. + +=item B<wrap> + +Canonicalizes the string to translate, considering that whitespaces are not +important, and wraps the translated document. This option can be overridden +by custom tag options. See the B<translated> option below. + +=item B<unwrap_attributes> + +Attributes are wrapped by default. This option disables wrapping. + +=item B<caseinsensitive> + +It makes the tags and attributes searching to work in a case insensitive +way. If it's defined, it will treat E<lt>BooKE<gt>laNG and E<lt>BOOKE<gt>Lang as E<lt>bookE<gt>lang. + +=item B<escapequotes> + +Escape quotes in output strings. Necessary, for example, for creating +string resources for use by Android build tools. + +See also: https://developer.android.com/guide/topics/resources/string-resource.html + +=item B<includeexternal> + +When defined, external entities are included in the generated (translated) +document, and for the extraction of strings. If it's not defined, you +will have to translate external entities separately as independent +documents. + +=item B<ontagerror> + +This option defines the behavior of the module when it encounters invalid +XML syntax (a closing tag which does not match the last opening tag). +It can take the following values: + +=over + +=item I<fail> + +This is the default value. +The module will exit with an error. + +=item I<warn> + +The module will continue, and will issue a warning. + +=item I<silent> + +The module will continue without any warnings. + +=back + +Be careful when using this option. +It is generally recommended to fix the input file. + +=item B<tagsonly> + +Note: This option is deprecated. + +Extracts only the specified tags in the B<tags> option. Otherwise, it +will extract all the tags except the ones specified. + +=item B<doctype> + +String that will try to match with the first line of the document's doctype +(if defined). If it doesn't, a warning will indicate that the document +might be of a bad type. + +=item B<addlang> + +String indicating the path (e.g. E<lt>bbbE<gt>E<lt>aaaE<gt>) of a tag +where a lang="..." attribute shall be added. The language will be defined +as the basename of the PO file without any .po extension. + +=item B<optionalclosingtag> + +Boolean indicating whether closing tags are optional (as in HTML). By default, +missing closing tags raise an error handled according to B<ontagerror>. + +=item B<tags> + +Note: This option is deprecated. +You should use the B<translated> and B<untranslated> options instead. + +Space-separated list of tags you want to translate or skip. By default, +the specified tags will be excluded, but if you use the "tagsonly" option, +the specified tags will be the only ones included. The tags must be in the +form E<lt>aaaE<gt>, but you can join some (E<lt>bbbE<gt>E<lt>aaaE<gt>) to say that the content of +the tag E<lt>aaaE<gt> will only be translated when it's into a E<lt>bbbE<gt> tag. + +You can also specify some tag options by putting some characters in front of +the tag hierarchy. For example, you can put I<w> (wrap) or I<W> (don't wrap) +to override the default behavior specified by the global B<wrap> option. + +Example: WE<lt>chapterE<gt>E<lt>titleE<gt> + +=item B<attributes> + +Space-separated list of tag's attributes you want to translate. You can +specify the attributes by their name (for example, "lang"), but you can +prefix it with a tag hierarchy, to specify that this attribute will only be +translated when it's in the specified tag. For example: E<lt>bbbE<gt>E<lt>aaaE<gt>lang +specifies that the lang attribute will only be translated if it's in an +E<lt>aaaE<gt> tag, and it's in a E<lt>bbbE<gt> tag. + +=item B<foldattributes> + +Do not translate attributes in inline tags. +Instead, replace all attributes of a tag by po4a-id=<id>. + +This is useful when attributes shall not be translated, as this simplifies the +strings for translators, and avoids typos. + +=item B<customtag> + +Space-separated list of tags which should not be treated as tags. +These tags are treated as inline, and do not need to be closed. + +=item B<break> + +Space-separated list of tags which should break the sequence. +By default, all tags break the sequence. + +The tags must be in the form <aaa>, but you can join some +(<bbb><aaa>), if a tag (<aaa>) should only be considered +when it's within another tag (<bbb>). + +Please note a tag should be listed in only one of the B<break>, B<inline> +B<placeholder>, or B<customtag> setting string. + +=item B<inline> + +Space-separated list of tags which should be treated as inline. +By default, all tags break the sequence. + +The tags must be in the form <aaa>, but you can join some +(<bbb><aaa>), if a tag (<aaa>) should only be considered +when it's within another tag (<bbb>). + +=item B<placeholder> + +Space-separated list of tags which should be treated as placeholders. +Placeholders do not break the sequence, but the content of placeholders is +translated separately. + +The location of the placeholder in its block will be marked with a string +similar to: + + <placeholder type=\"footnote\" id=\"0\"/> + +The tags must be in the form <aaa>, but you can join some +(<bbb><aaa>), if a tag (<aaa>) should only be considered +when it's within another tag (<bbb>). + +=item B<break-pi> + +By default, Processing Instructions (i.e., C<E<lt>? ... ?E<gt>> tags) are handled as inline tags. +Pass this option if you want the PI to be handled as breaking tag. +Note that unprocessed PHP tags are handled as Processing Instructions by the parser. + +=item B<nodefault> + +Space separated list of tags that the module should not try to set by +default in any category. + +If you have a tag which has its default setting by the subclass of this module +but you want to set alternative setting, you need to list that tag as a part of +the B<nodefault> setting string. + +=item B<cpp> + +Support C preprocessor directives. +When this option is set, po4a will consider preprocessor directives as +paragraph separators. +This is important if the XML file must be preprocessed because otherwise +the directives may be inserted in the middle of lines if po4a consider it +belong to the current paragraph, and they won't be recognized by the +preprocessor. +Note: the preprocessor directives must only appear between tags +(they must not break a tag). + +=item B<translated> + +Space-separated list of tags you want to translate. + +The tags must be in the form <aaa>, but you can join some +(<bbb><aaa>), if a tag (<aaa>) should only be considered +when it's within another tag (<bbb>). + +You can also specify some tag options by putting some characters in front of +the tag hierarchy. This overrides the default behavior specified by the global +B<wrap> and B<defaulttranslateoption> option. + +=over + +=item I<w> + +Tags should be translated and content can be re-wrapped. + +=item I<W> + +Tags should be translated and content should not be re-wrapped. + +=item I<i> + +Tags should be translated inline. + +=item I<p> + +Tags should be translated as placeholders. + +=back + +Internally, the XML parser only cares about these four options: I<w> I<W> I<i> I<p>. + +* Tags listed in B<break> are set to I<w> or I<W> depending on the B<wrap> option. + +* Tags listed in B<inline> are set to I<i>. + +* Tags listed in B<placeholder> are set to I<p>. + +* Tags listed in B<untranslated> are without any of these options set. + +You can verify actual internal parameter behavior by invoking B<po4a> with +B<--debug> option. + +Example: WE<lt>chapterE<gt>E<lt>titleE<gt> + +Please note a tag should be listed in either B<translated> or B<untranslated> +setting string. + +=item B<untranslated> + +Space-separated list of tags you do not want to translate. + +The tags must be in the form <aaa>, but you can join some +(<bbb><aaa>), if a tag (<aaa>) should only be considered +when it's within another tag (<bbb>). + +Please note a translatable inline tag in an untranslated tag is treated as a +translatable breaking tag, I<i> setting is dropped and I<w> or I<W> is set +depending on the B<wrap> option. + +=item B<defaulttranslateoption> + +The default categories for tags that are not in any of the translated, +untranslated, break, inline, or placeholder. + +This is a set of letters as defined in B<translated> and this setting is only +valid for translatable tags. + +=back + +=cut + +sub initialize { + my $self = shift; + my %options = @_; + + # Reset the path + @path = (); + + # Initialize the stack of holders + my @paragraph = (); + my @sub_translations = (); + my %folded_attributes; + my %holder = ( + 'paragraph' => \@paragraph, + 'translation' => "", + 'sub_translations' => \@sub_translations, + 'folded_attributes' => \%folded_attributes + ); + @save_holders = ( \%holder ); + + $self->{options}{'addlang'} = 0; + $self->{options}{'nostrip'} = 0; + $self->{options}{'wrap'} = 0; + $self->{options}{'unwrap_attributes'} = 0; + $self->{options}{'caseinsensitive'} = 0; + $self->{options}{'escapequotes'} = 0; + $self->{options}{'tagsonly'} = 0; + $self->{options}{'tags'} = ''; + $self->{options}{'break'} = ''; + $self->{options}{'translated'} = ''; + $self->{options}{'untranslated'} = ''; + $self->{options}{'defaulttranslateoption'} = ''; + $self->{options}{'attributes'} = ''; + $self->{options}{'foldattributes'} = 0; + $self->{options}{'inline'} = ''; + $self->{options}{'break-pi'} = 0; + $self->{options}{'placeholder'} = ''; + $self->{options}{'customtag'} = ''; + $self->{options}{'doctype'} = ''; + $self->{options}{'nodefault'} = ''; + $self->{options}{'includeexternal'} = 0; + $self->{options}{'ontagerror'} = "fail"; + $self->{options}{'cpp'} = 0; + + $self->{options}{'verbose'} = ''; + $self->{options}{'debug'} = ''; + + foreach my $opt ( keys %options ) { + if ( $options{$opt} ) { + die wrap_mod( "po4a::xml::initialize", dgettext( "po4a", "Unknown option: %s" ), $opt ) + unless exists $self->{options}{$opt}; + $self->{options}{$opt} = $options{$opt}; + } + } + + # Default options set by modules. Forbidden for users. + $self->{options}{'_default_translated'} = ''; + $self->{options}{'_default_untranslated'} = ''; + $self->{options}{'_default_break'} = ''; + $self->{options}{'_default_inline'} = ''; + $self->{options}{'_default_placeholder'} = ''; + $self->{options}{'_default_attributes'} = ''; + $self->{options}{'_default_customtag'} = ''; + + # It will maintain the list of the translatable tags + $self->{tags} = (); + $self->{translated} = (); + $self->{untranslated} = (); + + # It will maintain the list of the translatable attributes + $self->{attributes} = (); + + # It will maintain the list of the breaking tags + $self->{break} = (); + + # It will maintain the list of the inline tags + $self->{inline} = (); + + # It will maintain the list of the placeholder tags + $self->{placeholder} = (); + + # It will maintain the list of the customtag tags + $self->{customtag} = (); + + # list of the tags that must not be set in the tags or inline category + # by this module or sub-module (unless specified in an option) + $self->{nodefault} = (); + + print "Call treat_options\n" if $self->{options}{'debug'}; + $self->treat_options; + + # Clear cache + %translate_options_cache = (); +} + +=head1 WRITING DERIVATIVE MODULES + +=head2 DEFINE WHAT TAGS AND ATTRIBUTES TO TRANSLATE + +The simplest customization is to define which tags and attributes you want +the parser to translate. This should be done in the initialize function. +First you should call the main initialize, to get the command-line options, +and then, append your custom definitions to the options hash. If you want +to treat some new options from command line, you should define them before +calling the main initialize: + + $self->{options}{'new_option'}=''; + $self->SUPER::initialize(%options); + $self->{options}{'_default_translated'}.=' <p> <head><title>'; + $self->{options}{'attributes'}.=' <p>lang id'; + $self->{options}{'_default_inline'}.=' <br>'; + $self->treat_options; + +You should use the B<_default_inline>, B<_default_break>, +B<_default_placeholder>, B<_default_translated>, B<_default_untranslated>, +and B<_default_attributes> options in derivative modules. This allow users +to override the default behavior defined in your module with command line +options. + +=head2 OVERRIDE THE DEFAULT BEHAVIOR WITH COMMAND LINE OPTIONS + +If you don't like the default behavior of this xml module and its derivative +modules, you can provide command line options to change their behavior. + +See L<Locale::Po4a::Docbook(3pm)|Locale::Po4a::Docbook>, + + +=head2 OVERRIDING THE found_string FUNCTION + +Another simple step is to override the function "found_string", which +receives the extracted strings from the parser, in order to translate them. +There you can control which strings you want to translate, and perform +transformations to them before or after the translation itself. + +It receives the extracted text, the reference on where it was, and a hash +that contains extra information to control what strings to translate, how +to translate them and to generate the comment. + +The content of these options depends on the kind of string it is (specified in an +entry of this hash): + +=over + +=item type="tag" + +The found string is the content of a translatable tag. The entry "tag_options" +contains the option characters in front of the tag hierarchy in the module +"tags" option. + +=item type="attribute" + +Means that the found string is the value of a translatable attribute. The +entry "attribute" has the name of the attribute. + +=back + +It must return the text that will replace the original in the translated +document. Here's a basic example of this function: + + sub found_string { + my ($self,$text,$ref,$options)=@_; + $text = $self->translate($text,$ref,"type ".$options->{'type'}, + 'wrap'=>$self->{options}{'wrap'}); + return $text; + } + +There's another simple example in the new Dia module, which only filters +some strings. + +=cut + +sub found_string { + my ( $self, $text, $ref, $options ) = @_; + + if ( $text =~ m/^\s*$/s ) { + return $text; + } + + my $comment; + my $wrap = $self->{options}{'wrap'}; + + if ( $options->{'type'} eq "tag" ) { + $comment = "Content of: " . $self->get_path; + + if ( $options->{'tag_options'} =~ /w/ ) { + $wrap = 1; + } + if ( $options->{'tag_options'} =~ /W/ ) { + $wrap = 0; + } + } elsif ( $options->{'type'} eq "attribute" ) { + $comment = "Attribute '" . $options->{'attribute'} . "' of: " . $self->get_path; + if ( $self->{options}{'unwrap_attributes'} == 0 ) { + $wrap = 1; + } else { + $wrap = 0; + } + } elsif ( $options->{'type'} eq "CDATA" ) { + $comment = "CDATA"; + $wrap = 0; + } else { + die wrap_ref_mod( $ref, "po4a::xml", dgettext( "po4a", "Internal error: unknown type identifier '%s'." ), + $options->{'type'} ); + } + $text = $self->translate( $text, $ref, $comment, 'wrap' => $wrap, comment => $options->{'comments'} ); + if ( $self->{options}{'escapequotes'} ) { + $text =~ s/'/\\'/g; + $text =~ s/"/\\"/g; + } + return $text; +} + +=head2 MODIFYING TAG TYPES (TODO) + +This is a more complex one, but it enables a (almost) total customization. +It's based on a list of hashes, each one defining a tag type's behavior. The +list should be sorted so that the most general tags are after the most +concrete ones (sorted first by the beginning and then by the end keys). To +define a tag type you'll have to make a hash with the following keys: + +=over 4 + +=item B<beginning> + +Specifies the beginning of the tag, after the "E<lt>". + +=item B<end> + +Specifies the end of the tag, before the "E<gt>". + +=item B<breaking> + +It says if this is a breaking tag class. A non-breaking (inline) tag is one +that can be taken as part of the content of another tag. It can take the +values false (0), true (1) or undefined. If you leave this undefined, you'll +have to define the f_breaking function that will say whether a concrete tag of +this class is a breaking tag or not. + +=item B<f_breaking> + +It's a function that will tell if the next tag is a breaking one or not. It +should be defined if the B<breaking> option is not. + +=item B<f_extract> + +If you leave this key undefined, the generic extraction function will have to +extract the tag itself. It's useful for tags that can have other tags or +special structures in them, so that the main parser doesn't get mad. This +function receives a boolean that says if the tag should be removed from the +input stream or not. + +=item B<f_translate> + +This function receives the tag (in the get_string_until() format) and returns +the translated tag (translated attributes or all needed transformations) as a +single string. + +=back + +=cut + +##### Generic XML tag types #####' + +our @tag_types = ( + { + beginning => "!--#", + end => "--", + breaking => 0, + f_extract => \&tag_extract_comment, + f_translate => \&tag_trans_comment + }, + { + beginning => "!--", + end => "--", + breaking => 0, + f_extract => \&tag_extract_comment, + f_translate => \&tag_trans_comment + }, + { + beginning => "?xml", + end => "?", + breaking => 1, + f_translate => \&tag_trans_xmlhead + }, + { + beginning => "?", + end => "?", + breaking => 0, # Can be changed with option break-pi + f_translate => \&tag_trans_procins + }, + { + beginning => "!DOCTYPE", + end => "", + breaking => 1, + f_extract => \&tag_extract_doctype, + f_translate => \&tag_trans_doctype + }, + { + beginning => "![CDATA[", + end => "]]", + breaking => 1, + f_extract => \&CDATA_extract, + f_translate => \&CDATA_trans + }, + { + beginning => "/", + end => "", + f_breaking => \&tag_break_close, + f_translate => \&tag_trans_close + }, + { + beginning => "", + end => "/", + f_breaking => \&tag_break_alone, + f_translate => \&tag_trans_alone + }, + { + beginning => "", + end => "", + f_breaking => \&tag_break_open, + f_translate => \&tag_trans_open + } +); + +sub tag_extract_comment { + my ( $self, $remove ) = ( shift, shift ); + my ( $eof, @tag ) = $self->get_string_until( '-->', { include => 1, remove => $remove } ); + return ( $eof, @tag ); +} + +sub tag_trans_comment { + my ( $self, @tag ) = @_; + return $self->join_lines(@tag); +} + +sub tag_trans_xmlhead { + my ( $self, @tag ) = @_; + + # We don't have to translate anything from here: throw away references + my $tag = $self->join_lines(@tag); + $tag =~ /encoding=(("|')|)(.*?)(\s|\2)/s; + my $in_charset = $3; + my $out_charset = $self->get_out_charset; + + if ( defined $in_charset ) { + croak wrap_mod( + "po4a::xml", + dgettext( + "po4a", + "The file %s declares %s as encoding, but you provided %s as master charset. Please change either setting." + ), + $self->{'current_file'}, + $in_charset, + $input_charset + ) if ( length( $input_charset // '' ) > 0 && uc($input_charset) ne uc($in_charset) ); + + $tag =~ s/$in_charset/$out_charset/; + } else { + if ( $tag =~ m/standalone/ ) { + $tag =~ s/(standalone)/encoding="$out_charset" $1/; + } else { + $tag .= " encoding=\"$out_charset\""; + } + } + + return $tag; +} + +sub tag_trans_procins { + my ( $self, @tag ) = @_; + return $self->join_lines(@tag); +} + +sub tag_extract_doctype { + my ( $self, $remove ) = ( shift, shift ); + + # Check if there is an internal subset (between []). + my ( $eof, @tag ) = $self->get_string_until( '>', { include => 1, unquoted => 1 } ); + my $parity = 0; + my $paragraph = ""; + map { $parity = 1 - $parity; $paragraph .= $parity ? $_ : ""; } @tag; + my $found = 0; + if ( $paragraph =~ m/<.*\[.*</s ) { + $found = 1; + } + + if ( not $found ) { + ( $eof, @tag ) = $self->get_string_until( '>', { include => 1, remove => $remove, unquoted => 1 } ); + } else { + ( $eof, @tag ) = + $self->get_string_until( ']\s*>', { include => 1, remove => $remove, unquoted => 1, regex => 1 } ); + } + return ( $eof, @tag ); +} + +sub tag_trans_doctype { + + # This check is not really reliable. There are system and public + # identifiers. Only the public one could be checked reliably. + my ( $self, @tag ) = @_; + if ( defined $self->{options}{'doctype'} ) { + my $doctype = $self->{options}{'doctype'}; + if ( $tag[0] !~ /\Q$doctype\E/i ) { + warn wrap_ref_mod( + $tag[1], + "po4a::xml", + dgettext( + "po4a", + "Bad document type. '%s' expected. You can fix this warning with a -o doctype option, or ignore this check with -o doctype=\"\"." + ), + $doctype + ); + } + } + my $i = 0; + my $basedir = $tag[1]; + $basedir =~ s/:[0-9]+$//; + $basedir = dirname($basedir); + + while ( $i < $#tag ) { + my $t = $tag[$i]; + my $ref = $tag[ $i + 1 ]; + if ( $t =~ /^(\s*<!ENTITY\s+)(.*)$/is ) { + my $part1 = $1; + my $part2 = $2; + my $includenow = 0; + my $file = 0; + my $name = ""; + if ( $part2 =~ /^(%\s+)(.*)$/s ) { + $part1 .= $1; + $part2 = $2; + $includenow = 1; + } + $part2 =~ /^(\S+)(\s+)(.*)$/s; + $name = $1; + $part1 .= $1 . $2; + $part2 = $3; + if ( $part2 =~ /^(SYSTEM\s+)(.*)$/is ) { + $part1 .= $1; + $part2 = $2; + $file = 1; + if ( $self->{options}{'includeexternal'} ) { + $entities{$name} = $part2; + $entities{$name} =~ s/^"?(.*?)".*$/$1/s; + $entities{$name} = File::Spec->catfile( $basedir, $entities{$name} ); + } + } + if ( ( not $file ) and ( not $includenow ) ) { + if ( $part2 =~ m/^\s*(["'])(.*)\1(\s*>.*)$/s ) { + my $comment = "Content of the $name entity"; + my $quote = $1; + my $text = $2; + $part2 = $3; + $text = $self->translate( $text, $ref, $comment, 'wrap' => 1 ); + $t = $part1 . "$quote$text$quote$part2"; + } + } + + # print $part1."\n"; + # print $name."\n"; + # print $part2."\n"; + } + $tag[$i] = $t; + $i += 2; + } + return $self->join_lines(@tag); +} + +sub tag_break_close { + my ( $self, @tag ) = @_; + my $struct = $self->get_path; + my $options = $self->get_translate_options($struct); + if ( $options =~ m/[ip]/ ) { + return 0; + } else { + return 1; + } +} + +sub tag_trans_close { + my ( $self, @tag ) = @_; + my $name = $self->get_tag_name(@tag); + + my $test = pop @path; + if ( !defined($test) || $test ne $name ) { + + # Check whether it's simply a missing closing tag that I could survive + if ( ( $self->{options}{'optionalclosingtag'} // 0 ) == 1 ) { + my $found = 0; + map { $found = 1 if $_ eq $name } @path; + if ($found) + { # The opening tag corresponding to the one closed now exists in the path. Pop everything in between + while ( $test ne $name ) { + $test = pop @path; + } + return $self->join_lines(@tag); + } + } + + my $ontagerror = $self->{options}{'ontagerror'}; + if ( $ontagerror eq "warn" ) { + warn wrap_ref_mod( + $tag[1], + "po4a::xml", + dgettext( "po4a", "Unexpected closing tag </%s> found. The main document may be wrong. Continuing…" ), + $name + ); + } elsif ( $ontagerror ne "silent" ) { + die wrap_ref_mod( $tag[1], "po4a::xml", + dgettext( "po4a", "Unexpected closing tag </%s> found. The main document may be wrong." ), $name ); + } + } + return $self->join_lines(@tag); +} + +sub CDATA_extract { + my ( $self, $remove ) = ( shift, shift ); + my ( $eof, @tag ) = $self->get_string_until( ']]>', { include => 1, unquoted => 0, remove => $remove } ); + + return ( $eof, @tag ); +} + +sub CDATA_trans { + my ( $self, @tag ) = @_; + return $self->found_string( $self->join_lines(@tag), $tag[1], { 'type' => "CDATA" } ); +} + +sub tag_break_alone { + my ( $self, @tag ) = @_; + my $struct = $self->get_path( $self->get_tag_name(@tag) ); + if ( $self->get_translate_options($struct) =~ m/[ip]/ ) { + return 0; + } else { + return 1; + } +} + +sub tag_trans_alone { + my ( $self, @tag ) = @_; + my $name = $self->get_tag_name(@tag); + push @path, $name; + + $name = $self->treat_attributes(@tag); + + pop @path; + return $name; +} + +sub tag_break_open { + my ( $self, @tag ) = @_; + my $struct = $self->get_path( $self->get_tag_name(@tag) ); + my $options = $self->get_translate_options($struct); + if ( $options =~ m/[ip]/ ) { + return 0; + } else { + return 1; + } +} + +sub tag_trans_open { + my ( $self, @tag ) = @_; + my $name = $self->get_tag_name(@tag); + push @path, $name; + + $name = $self->treat_attributes(@tag); + + if ( defined $self->{options}{'addlang'} ) { + my $struct = $self->get_path(); + if ( $struct eq $self->{options}{'addlang'} ) { + $name .= ' lang="' . $self->{TT}{po_in}->{lang} . '"'; + } + } + + return $name; +} + +##### END of Generic XML tag types ##### + +=head1 INTERNAL FUNCTIONS used to write derivative parsers + +=head2 WORKING WITH TAGS + +=over 4 + +=item get_path() + +This function returns the path to the current tag from the document's root, +in the form E<lt>htmlE<gt>E<lt>bodyE<gt>E<lt>pE<gt>. + +An additional array of tags (without brackets) can be passed as argument. +These path elements are added to the end of the current path. + +=cut + +sub get_path { + my $self = shift; + my @add = @_; + if ( @path > 0 or @add > 0 ) { + return "<" . join( "><", @path, @add ) . ">"; + } else { + return "outside any tag (error?)"; + } +} + +=item tag_type() + +This function returns the index from the tag_types list that fits to the next +tag in the input stream, or -1 if it's at the end of the input file. + +Here, the tag has structure started by E<lt> and end by E<gt> and it can +contain multiple lines. + +This works on the array C<< @{$self->{TT}{doc_in}} >> holding input document +data and reference indirectly via C<< $self->shiftline() >> and C<< +$self->unshiftline($$) >>. + +=cut + +sub tag_type { + my $self = shift; + my ( $line, $ref ) = $self->shiftline(); + my ( $match1, $match2 ); + my $found = 0; + my $i = 0; + + if ( !defined($line) ) { return -1; } + + $self->unshiftline( $line, $ref ); + my ( $eof, @lines ) = $self->get_string_until( ">", { include => 1, unquoted => 1 } ); + my $line2 = $self->join_lines(@lines); + while ( !$found && $i < @tag_types ) { + ( $match1, $match2 ) = ( $tag_types[$i]->{beginning}, $tag_types[$i]->{end} ); + if ( $line =~ /^<\Q$match1\E/ ) { + if ( !defined( $tag_types[$i]->{f_extract} ) ) { + + #print substr($line2,length($line2)-1-length($match2),1+length($match2))."\n"; + if ( defined($line2) and $line2 =~ /\Q$match2\E>$/ ) { + $found = 1; + + #print "YES: <".$match1." ".$match2.">\n"; + } else { + + #print "NO: <".$match1." ".$match2.">\n"; + $i++; + } + } else { + $found = 1; + } + } else { + $i++; + } + } + if ( !$found ) { + + #It should never enter here, unless you undefine the most + #general tags (as <...>) + chomp $line; + die $ref . ": Unknown tag type: " . $line . "\n"; + } else { + return $i; + } +} + +=item extract_tag($$) + +This function returns the next tag from the input stream without the beginning +and end, in an array form, to maintain the references from the input file. It +has two parameters: the type of the tag (as returned by tag_type) and a +boolean, that indicates if it should be removed from the input stream. + +This works on the array C<< @{$self->{TT}{doc_in}} >> holding input document +data and reference indirectly via C<< $self->shiftline() >> and C<< +$self->unshiftline($$) >>. + +=cut + +sub extract_tag { + my ( $self, $type, $remove ) = ( shift, shift, shift ); + my ( $match1, $match2 ) = ( $tag_types[$type]->{beginning}, $tag_types[$type]->{end} ); + my ( $eof, @tag ); + if ( defined( $tag_types[$type]->{f_extract} ) ) { + + # <!--# ... -->, <!-- ... -->, <!DOCTYPE ... >, or <![CDATA[ ... ]]> + ( $eof, @tag ) = &{ $tag_types[$type]->{f_extract} }( $self, $remove ); + } else { + + # <?xml ?>, <? ... ?>, </ tag>, <tag />, or <tag >. + ( $eof, @tag ) = $self->get_string_until( $match2 . ">", { include => 1, remove => $remove, unquoted => 1 } ); + } + + # Please note even index of array @tag holds actual text of input line + # Please note odd index of array @tag holds its reference = $filename:$flinenum + $tag[0] =~ /^<\Q$match1\E(.*)$/s; + $tag[0] = $1; + $tag[ $#tag - 1 ] =~ /^(.*)\Q$match2\E>$/s; + $tag[ $#tag - 1 ] = $1; + + # Please note even index of array @tag holds tag string + return ( $eof, @tag ); +} + +=item get_tag_name(@) + +This function returns the name of the tag passed as an argument, in the array +form returned by extract_tag. + +=cut + +sub get_tag_name { + my ( $self, @tag ) = @_; + $tag[0] =~ /^(\S*)/; + return $1; +} + +=item breaking_tag() + +This function returns a boolean that says if the next tag in the input stream +is a breaking tag or not (inline tag). It leaves the input stream intact. + +=cut + +sub breaking_tag { + my $self = shift; + my $break; + + my $type = $self->tag_type; + if ( $type == -1 ) { return 0; } + + $break = $tag_types[$type]->{breaking}; + if ( !defined($break) ) { + + # This tag's breaking depends on its content + my ( $eof, @lines ) = $self->extract_tag( $type, 0 ); + $break = &{ $tag_types[$type]->{f_breaking} }( $self, @lines ); + } + + # print "TAG TYPE = ".$type." (<".$tag_types[$type]->{beginning}.") break:$break\n"; + return $break; +} + +=item treat_tag() + +This function translates the next tag from the input stream. Using each +tag type's custom translation functions. + +This works on the array C<< @{$self->{TT}{doc_in}} >> holding input document +data and reference indirectly via C<< $self->shiftline() >> and C<< +$self->unshiftline($$) >>. + +=cut + +sub treat_tag { + my $self = shift; + my $type = $self->tag_type; + + my ( $match1, $match2 ) = ( $tag_types[$type]->{beginning}, $tag_types[$type]->{end} ); + my ( $eof, @lines ) = $self->extract_tag( $type, 1 ); + + # Please note even index of array @lines holds actual text of input line + # Please note odd index of array @lines holds its reference = $filename:$flinenum + + $lines[0] =~ /^(\s*)(.*)$/s; + my $space1 = $1; + $lines[0] = $2; + $lines[ $#lines - 1 ] =~ /^(.*?)(\s*)$/s; + my $space2 = $2; + $lines[ $#lines - 1 ] = $1; + + # Calling this tag type's specific handling (translation of attributes...) + my $line = &{ $tag_types[$type]->{f_translate} }( $self, @lines ); + print wrap_mod( "po4a::xml::treat_tag", "%s: type=%s <%s%s%s%s%s>", + $lines[1], $type, $match1, $space1, $line, $space2, $match2 ) + if $self->{options}{'debug'}; + $self->pushline( "<" . $match1 . $space1 . $line . $space2 . $match2 . ">" ); + return $eof; +} + +=item tag_in_list($@) + +This function returns a string value that says if the first argument (a tag +hierarchy) matches any of the tags from the second argument (a list of tags +or tag hierarchies). If it doesn't match, it returns 0. Else, it returns the +matched tag's options (the characters in front of the tag) or 1 (if that tag +doesn't have options). + +=back + +=cut + +sub tag_in_list ($$$) { + my ( $self, $path, $list ) = @_; + if ( $self->{options}{'caseinsensitive'} ) { + $path = lc $path; + } + + while (1) { + if ( defined $list->{$path} ) { + if ( length $list->{$path} ) { + return $list->{$path}; + } else { + return 1; + } + } + last unless ( $path =~ m/</ ); + $path =~ s/^<.*?>//; + } + + return 0; +} + +=head2 WORKING WITH ATTRIBUTES + +=over 4 + +=item treat_attributes(@) + +This function handles the translation of the tags' attributes. It receives the tag +without the beginning / end marks, and then it finds the attributes, and it +translates the translatable ones (specified by the module option B<attributes>). +This returns a plain string with the translated tag. + +=back + +=cut + +sub treat_attributes { + my ( $self, @tag ) = @_; + + $tag[0] =~ /^(\S*)(.*)/s; + my $text = $1; + $tag[0] = $2; + + while (@tag) { + my $complete = 1; + + $text .= $self->skip_spaces( \@tag ); + if (@tag) { + + # Get the attribute's name + $complete = 0; + + $tag[0] =~ /^([^\s=]+)(.*)/s; + my $name = $1; + my $ref = $tag[1]; + $tag[0] = $2; + $text .= $name; + $text .= $self->skip_spaces( \@tag ); + if (@tag) { + + # Get the '=' + if ( $tag[0] =~ /^=(.*)/s ) { + $tag[0] = $1; + $text .= "="; + $text .= $self->skip_spaces( \@tag ); + if (@tag) { + + # Get the value + my $value = ""; + $ref = $tag[1]; + my $quot = substr( $tag[0], 0, 1 ); + if ( $quot ne "\"" and $quot ne "'" ) { + + # Unquoted value + $quot = ""; + $tag[0] =~ /^(\S+)(.*)/s; + $value = $1; + $tag[0] = $2; + } else { + + # Quoted value + $text .= $quot; + $tag[0] =~ /^\Q$quot\E(.*)/s; + $tag[0] = $1; + while ( $tag[0] !~ /\Q$quot\E/ ) { + $value .= $tag[0]; + shift @tag; + shift @tag; + } + $tag[0] =~ /^(.*?)\Q$quot\E(.*)/s; + $value .= $1; + $tag[0] = $2; + } + $complete = 1; + if ( $self->tag_in_list( $self->get_path . $name, $self->{attributes} ) ) { + $text .= $self->found_string( $value, $ref, { type => "attribute", attribute => $name } ); + } else { + print wrap_mod( + "po4a::xml::treat_attributes", + dgettext( + "po4a", + "%s: attribute '%s' is not defined in module option 'attributes' and\n" + . ".... is not translated for the attribute path '%s'" + ), + $ref, $value, + $self->get_path . $name + ) if $self->{options}{'debug'}; + $text .= $value; + } + $text .= $quot; + } + } else { # This is an attribute with no '=' sign, nothing to translate + $complete = 1; + } + } + + unless ($complete) { + my $ontagerror = $self->{options}{'ontagerror'}; + if ( $ontagerror eq "warn" ) { + warn wrap_mod( "po4a::xml::treat_attributes", + dgettext( "po4a", "%s: Bad attribute syntax. Continuing…" ), $ref ); + } elsif ( $ontagerror ne "silent" ) { + die wrap_mod( "po4a::xml::treat_attributes", dgettext( "po4a", "%s: Bad attribute syntax" ), $ref ); + } + } + } + } + return $text; +} + +# Returns an empty string if the content in the $path should not be +# translated. +# +# Otherwise, returns the set of options for translation: +# w: the content shall be re-wrapped +# W: the content shall not be re-wrapped +# i: the tag shall be inlined +# p: a placeholder shall replace the tag (and its content) +# n: a custom tag +# f: fold attribute +# +# A translatable inline tag in an untranslated tag is treated as a translatable breaking tag. +sub get_translate_options { + my $self = shift; + my $path = shift; + + if ( defined $translate_options_cache{$path} ) { + + # print "option($path)=".$translate_options_cache{$path}." (cached)\n"; + return $translate_options_cache{$path}; + } + + my $options = ""; + my $translate = 0; + my $usedefault = 1; + + my $inlist = 0; + my $tag = $self->get_tag_from_list( $path, $self->{tags} ); + if ( defined $tag ) { + $inlist = 1; + } + + # Note: tags option is deprecated. --> $inlist should be 0 now + + if ( $self->{options}{'tagsonly'} eq $inlist ) { + + # Note: tags option is deprecated. --> $inlist should be 0 now + # Default is not to use tagsonly --> You are here. + $usedefault = 0; + if ( defined $tag ) { + $options = $tag; + $options =~ s/<.*$//; + } else { + + # Note: tags option is deprecated. --> $tag is undefined + # $self->{options}{'wrap'} = 0 ... xml inherent default + # $self->{options}{'wrap'} = 1 ... docbook overridden default + # This sets all tags unlisted in translated nor untranslated to become translated tag normally + if ( $self->{options}{'wrap'} ) { + $options = "w"; + } else { + $options = "W"; + } + } + $translate = 1; + } + + # TODO: a less precise set of tags should not override a more precise one + # The tags and tagsonly options are deprecated. + # The translated and untranslated options have a higher priority. + $tag = $self->get_tag_from_list( $path, $self->{translated} ); + if ( defined $tag ) { + $usedefault = 0; + $options = $tag; + $options =~ s/<.*$//; + $translate = 1; + } + + if ( $translate and $options !~ m/w/i ) { + $options .= ( $self->{options}{'wrap'} ) ? "w" : "W"; + } + + if ( not defined $tag ) { + $tag = $self->get_tag_from_list( $path, $self->{untranslated} ); + if ( defined $tag ) { + $usedefault = 0; + $options = ""; + $translate = 0; + } + } + + $tag = $self->get_tag_from_list( $path, $self->{inline} ); + if ( defined $tag ) { + $usedefault = 0; + $options .= "i"; + } else { + $tag = $self->get_tag_from_list( $path, $self->{placeholder} ); + if ( defined $tag ) { + $usedefault = 0; + $options .= "p"; + } + } + + $tag = $self->get_tag_from_list( $path, $self->{customtag} ); + if ( defined $tag ) { + $usedefault = 0; + $options = "in"; # This erases any other setting + } + + if ($usedefault) { + $options = $self->{options}{'defaulttranslateoption'}; + } + + # A translatable inline tag in an untranslated tag is treated as a + # translatable breaking tag. + if ( $options =~ m/i/ ) { + my $ppath = $path; + $ppath =~ s/<[^>]*>$//; + my $poptions = $self->get_translate_options($ppath); + if ( $poptions eq "" ) { + $options =~ s/i//; + print wrap_mod( + "po4a::xml::get_translate_options", + dgettext( + "po4a", + "%s: translation option='%s'.\n *** the original translation option is overridden here since parent path='%s' is untranslated," + ), + $path, $options, $ppath + ) if $self->{options}{'debug'}; + } + } + + if ( $options =~ m/i/ and $self->{options}{'foldattributes'} ) { + $options .= "f"; + } + + if ( $options !~ m/i/ and $self->{options}{'foldattributes'} ) { + print wrap_mod( + "po4a::xml::get_translate_options", + dgettext( "po4a", "%s: foldattributes setting ignored since '%s' is not inline tag" ), + $path, $tag + ) if $self->{options}{'debug'}; + } + + $translate_options_cache{$path} = $options; + + # print "option($path)=".$translate_options_cache{$path}." (new)\n"; + + #print wrap_mod("po4a::xml::get_translate_options", dgettext ("po4a", "%s: options: '%s'"), $path, $options) if $self->{options}{'debug'}; + return $options; +} + +# Return the tag (or biggest set of tags) of a list which matches with the +# given path. +# +# The tag (or set of tags) is returned with its options. +# +# If no tags could match the path, undef is returned. +sub get_tag_from_list ($$$) { + my ( $self, $path, $list ) = @_; + if ( $self->{options}{'caseinsensitive'} ) { + $path = lc $path; + } + + while (1) { + if ( defined $list->{$path} ) { + return $list->{$path} . $path; + } + last unless ( $path =~ m/</ ); + $path =~ s/^<.*?>//; + } + + return undef; +} + +=head2 WORKING WITH TAGGED CONTENTS + +=over 4 + + +=item treat_content() + +This function gets the text until the next breaking tag (not inline) from the +input stream. Translate it using each tag type's custom translation functions. + +This works on the array C<< @{$self->{TT}{doc_in}} >> holding input document +data and reference indirectly via C<< $self->shiftline() >> and C<< +$self->unshiftline($$) >>. + +=back + +=cut + +sub treat_content { + my $self = shift; + my $blank = ""; + + # Indicates if the paragraph will have to be translated + my $translate = ""; + + my ( $eof, @paragraph ) = $self->get_string_until( '<', { remove => 1 } ); + + # Please note even index of array @paragraph holds actual text of input line + # Please note odd index of array @paragraph holds its reference = $filename:$flinenum + + while ( !$eof and !$self->breaking_tag ) { + NEXT_TAG: + + # Loop if tag is <!--# ... -->, <!-- ... -->, </tag>, <tag />, or <tag > + my @text; + my $type = $self->tag_type; + my $f_extract = $tag_types[$type]->{'f_extract'}; + if ( defined($f_extract) + and $f_extract eq \&tag_extract_comment ) + { + # if tag is <!--# ... --> or <!-- ... -->, remove this tag from the + # input stream and save its content to @comments for use by + # translate_paragraph. + print wrap_mod( "po4a::xml::treat_content", "%s: type='%s'", $paragraph[1], $type ) + if $self->{options}{'debug'}; + ( $eof, @text ) = $self->extract_tag( $type, 1 ); + + # Add "\0" to mark end of each separate comment + $text[ $#text - 1 ] .= "\0"; + if ( $tag_types[$type]->{'beginning'} eq "!--#" ) { + $text[0] = "#" . $text[0]; + } + push @comments, @text; + } else { + + # if tag is </tag>, <tag />, or <tag >, get its tag name + # alone in @tag without touching the input stream, then get this + # whole tag with attributes in @text while removing this whole tag + # from the input stream for use by translate_paragraph. + my ( $tmpeof, @tag ) = $self->extract_tag( $type, 0 ); + + # Append the found inline tag + ( $eof, @text ) = $self->get_string_until( + '>', + { + include => 1, + remove => 1, + unquoted => 1 + } + ); + + # print "cur: ".$self->get_tag_name(@tag)."\n"; + + # Append or remove the opening/closing tag from the tag path + if ( $tag_types[$type]->{'end'} eq "" ) { + if ( $tag_types[$type]->{'beginning'} eq "" ) { + $self->treat_content_open_tag( \@tag, \@paragraph, \@text ); + } elsif ( $tag_types[$type]->{'beginning'} eq "/" ) { + $self->treat_content_close_tag( \@tag, \@paragraph, \@text ); + } + } elsif ( $tag_types[$type]->{'beginning'} eq "" + && $tag_types[$type]->{'end'} eq "/" ) + { + # As for empty-element tag, + # treat as if both open and close tags exist + $self->treat_content_open_tag( \@tag, \@paragraph, \@text ); + $self->treat_content_close_tag( \@tag, \@paragraph, \@text ); + } + push @paragraph, @text; + } + + # Next tag + ( $eof, @text ) = $self->get_string_until( '<', { remove => 1 } ); + if ( $#text > 0 ) { + + # Check if text (extracted after the inline tag) + # has to be translated + push @paragraph, @text; + } + } + + # This strips the extracted strings + # (only if you don't specify the 'nostrip' option, and if the + # paragraph can be re-wrapped) + $translate = $self->get_translate_options( $self->get_path ); + if ( !$self->{options}{'nostrip'} and $translate !~ m/W/ ) { + my $clean = 0; + + # Clean the beginning + while ( !$clean and $#paragraph > 0 ) { + $paragraph[0] =~ /^(\s*)(.*)/s; + my $match = $1; + if ( $paragraph[0] eq $match ) { + if ( $match ne "" ) { + $self->pushline($match); + } + shift @paragraph; + shift @paragraph; + } else { + $paragraph[0] = $2; + if ( $match ne "" ) { + $self->pushline($match); + } + $clean = 1; + } + } + $clean = 0; + + # Clean the end + while ( !$clean and $#paragraph > 0 ) { + $paragraph[ $#paragraph - 1 ] =~ /^(.*?)(\s*)$/s; + my $match = $2; + if ( $paragraph[ $#paragraph - 1 ] eq $match ) { + if ( $match ne "" ) { + $blank = $match . $blank; + } + pop @paragraph; + pop @paragraph; + } else { + $paragraph[ $#paragraph - 1 ] = $1; + if ( $match ne "" ) { + $blank = $match . $blank; + } + $clean = 1; + } + } + } + + # Translate the string when needed + # This will either push the translation in the translated document or + # in the current holder translation. + $self->translate_paragraph(@paragraph); + + # Push the trailing blanks + if ( $blank ne "" ) { + $self->pushline($blank); + } + return $eof; +} + +# Processes open tags during getting texts. +# Performs special process for placeholder and attribute folding. +sub treat_content_open_tag { + my $self = shift; + my ( $tag, $paragraph, $text ) = @_; + + # tag is <tag > + my $cur_tag_name = $self->get_tag_name(@$tag); + my $t_opts = $self->get_translate_options( $self->get_path($cur_tag_name) ); + if ( $t_opts =~ m/p/ ) { + + # tag has a placeholder option, append a "<placeholder + # type=cur_tag_name id =id_index>" tag to @$paragraph. + # using $self->get_tag_name(@$tag) as cur_tag_name and + # using $#{$save_holders[$#save_holders]->{'sub_translations'}} + 1 + # as id_index + my $last_holder = $save_holders[$#save_holders]; + my $placeholder_str = + "<placeholder type=\"" + . $cur_tag_name + . "\" id=\"" + . ( $#{ $last_holder->{'sub_translations'} } + 1 ) . "\"/>"; + push @$paragraph, ( $placeholder_str, $text->[1] ); + my @saved_paragraph = @$paragraph; + + $last_holder->{'paragraph'} = \@saved_paragraph; + + # make attributes be able to be translated + my $open_tag = $self->join_lines(@$text); + if ( $open_tag =~ m/^<(\s*)(\S+\s+\S.*)>$/s ) { + my ( $ws, $tag_inner ) = ( $1, $2 ); + $tag_inner =~ s|(\s*/)$||; + my $postfix = $1; + push @path, $cur_tag_name; + $open_tag = "<" . $ws . $self->treat_attributes($tag_inner) . $postfix . ">"; + pop @path; + } + + # Then we must push a new holder into @save_holders + my @new_paragraph = (); + my @sub_translations = (); + my %folded_attributes; + my %new_holder = ( + 'paragraph' => \@new_paragraph, + 'open' => $open_tag, + 'translation' => "", + 'close' => undef, + 'sub_translations' => \@sub_translations, + 'folded_attributes' => \%folded_attributes + ); + push @save_holders, \%new_holder; + + # reset @$text holding the whole tag with attributes + # to empty + @$text = (); + + # reset the current @$paragraph (for the current holder) + # to empty. + @$paragraph = (); + + } elsif ( $t_opts =~ m/f/ ) { + + # tag has a "f" option for folded attributes + my $tag_full = $self->join_lines(@$text); + my $tag_ref = $text->[1]; + if ( $tag_full =~ m/^<(\s*)(\S+\s+\S.*)>$/s ) { + my ( $ws, $tag_inner ) = ( $1, $2 ); + my $holder = $save_holders[$#save_holders]; + my $id = 0; + foreach ( keys %{ $holder->{folded_attributes} } ) { + $id = $_ + 1 if ( $_ >= $id ); + } + + # make attributes be able to be translated + $tag_inner =~ s|(\s*/)$||; + my $postfix = $1; + push @path, $cur_tag_name; + $holder->{folded_attributes}->{$id} = "<" . $ws . $self->treat_attributes($tag_inner) . $postfix . ">"; + pop @path; + + @$text = ( "<$cur_tag_name po4a-id=$id>", $tag_ref ); + } + } + unless ( $t_opts =~ m/n/ ) { + + # unless "n" for custom (such as non-XML HTML) tag, update @path + push @path, $cur_tag_name; + } +} + +# Processes close tags during getting texts. +# Performs special process for placeholder. +sub treat_content_close_tag { + my $self = shift; + my ( $tag, $paragraph, $text ) = @_; + + # tag is </tag> + + # Verify this closing tag matches with the last opening tag + # while removing the last opening tag in @path + my $test = pop @path; + my $name = $self->get_tag_name(@$tag); + if ( !defined($test) + || $test ne $name ) + { + my $ontagerror = $self->{options}{'ontagerror'}; + if ( $ontagerror eq "warn" ) { + warn wrap_ref_mod( + $tag->[1], + "po4a::xml", + dgettext( "po4a", "Unexpected closing tag </%s> found. The main document may be wrong. Continuing…" ), + $name + ); + } elsif ( $ontagerror ne "silent" ) { + die wrap_ref_mod( $tag->[1], "po4a::xml", + dgettext( "po4a", "Unexpected closing tag </%s> found. The main document may be wrong." ), $name ); + } + } + + if ( $self->get_translate_options( $self->get_path( $self->get_tag_name(@$tag) ) ) =~ m/p/ ) { + + # this closing tag has a placeholder option + + # revert @path to include this tag for translate_paragraph + push @path, $self->get_tag_name(@$tag); + + # Now translate this paragraph if needed. + # This will call pushline and append the + # translation to the current holder's translation. + $self->translate_paragraph(@$paragraph); + + # remove this tag from @path + pop @path; + + # Now that this holder is closed, we can remove + # the holder from the stack. + my $holder = pop @save_holders; + + # We need to keep the translation of this holder + my $translation = $holder->{'open'} . $holder->{'translation'}; + $translation .= $self->join_lines(@$text); + + @$text = (); + + # Then we store the translation in the previous + # holder's sub_translations array + my $previous_holder = $save_holders[$#save_holders]; + push @{ $previous_holder->{'sub_translations'} }, $translation; + + # We also need to restore the @$paragraph array, as + # it was before we encountered the holder. + @$paragraph = @{ $previous_holder->{'paragraph'} }; + } +} + +# Translate a @paragraph array of (string, reference). +# The $translate argument indicates if the strings must be translated or +# just pushed +sub translate_paragraph { + my $self = shift; + my @paragraph = @_; + my $translate = $self->get_translate_options( $self->get_path ); + + while ( ( scalar @paragraph ) + and ( $paragraph[0] =~ m/^\s*\n/s ) ) + { + $self->pushline( $paragraph[0] ); + shift @paragraph; + shift @paragraph; + } + + my $comments; + while (@comments) { + my ( $comment, $eoc ); + do { + my ( $t, $l ) = ( shift @comments, shift @comments ); + $t =~ s/\n?(\0)?$//; + $eoc = $1; + $comment .= "\n" if defined $comment; + $comment .= $t; + } until ($eoc); + $comments .= "\n" if defined $comments; + $comments .= $comment; + $self->pushline( "<!--" . $comment . "-->\n" ) if defined $comment; + } + @comments = (); + + if ( $self->{options}{'cpp'} ) { + my @tmp = @paragraph; + @paragraph = (); + while (@tmp) { + my ( $t, $l ) = ( shift @tmp, shift @tmp ); + + # #include can be followed by a filename between + # <> brackets. In that case, the argument won't be + # handled in the same call to translate_paragraph. + # Thus do not try to match "include ". + if ( $t =~ m/^#[ \t]*(if |endif|undef |include|else|ifdef |ifndef |define )/si ) { + if (@paragraph) { + $self->translate_paragraph(@paragraph); + @paragraph = (); + $self->pushline("\n"); + } + $self->pushline($t); + } else { + push @paragraph, ( $t, $l ); + } + } + } + + my $para = $self->join_lines(@paragraph); + if ( length($para) > 0 ) { + if ( $translate ne "" ) { + + # This tag should be translated + print wrap_mod( + "po4a::xml::translate_paragraph", + "%s: path='%s', translation option='%s'", + $paragraph[1], $self->get_path, $translate + ) if $self->{options}{'debug'}; + $self->pushline( + $self->found_string( + $para, + $paragraph[1], + { + type => "tag", + tag_options => $translate, + comments => $comments + } + ) + ); + } else { + + # Inform that this tag isn't translated in debug mode + print wrap_mod( + "po4a::xml::translate_paragraph", + "%s: path='%s', translation option='%s' (no translation)", + $paragraph[1], $self->get_path, $translate + ) if $self->{options}{'debug'}; + $self->pushline($para); + } + } + + # Now the paragraph is fully translated. + # If we have all the holders' translation, we can replace the + # placeholders by their translations. + # We must wait to have all the translations because the holders are + # numbered. + { + my $holder = $save_holders[$#save_holders]; + my $translation = $holder->{'translation'}; + + # Count the number of <placeholder ...> in $translation + my $count = 0; + my $str = $translation; + while ( ( defined $str ) + and ( $str =~ m/^.*?<placeholder\s+[^>]*>(.*)$/s ) ) + { + die wrap_mod( + "po4a::xml", + dgettext( + "po4a", + "Invalid placeholder in the translation (the 'type' and 'id' must be present, in this order).\n%s\n\nPlease fix your translation." + ), + $str + ) unless ( $str =~ m/^.*?<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>(.*)$/s ); + + $count += 1; + $str = $2; + if ( $holder->{'sub_translations'}->[$1] =~ m/<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>/s ) { + $count = -1; + last; + } + } + + if ( ( defined $translation ) + and ( scalar( @{ $holder->{'sub_translations'} } ) == $count ) ) + { + # OK, all the holders of the current paragraph are + # closed (and translated). + # Replace them by their translation. + while ( $translation =~ m/^(.*?)<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>(.*)$/s ) { + + # FIXME: we could also check that + # * the holder exists + # * all the holders are used + $translation = $1 . $holder->{'sub_translations'}->[$2] . $3; + } + + # We have our translation + $holder->{'translation'} = $translation; + + # And there is no need for any holder in it. + my @sub_translations = (); + $holder->{'sub_translations'} = \@sub_translations; + } + } + +} + +=head2 WORKING WITH THE MODULE OPTIONS + +=over 4 + +=item treat_options() + +This function fills the internal structures that contain the tags, attributes +and inline data with the options of the module (specified in the command-line +or in the initialize function). + +=back + +=cut + +sub treat_options { + my $self = shift; + + if ( $self->{options}{'caseinsensitive'} ) { + $self->{options}{'nodefault'} = lc $self->{options}{'nodefault'}; + $self->{options}{'tags'} = lc $self->{options}{'tags'}; + $self->{options}{'break'} = lc $self->{options}{'break'}; + $self->{options}{'_default_break'} = lc $self->{options}{'_default_break'}; + $self->{options}{'translated'} = lc $self->{options}{'translated'}; + $self->{options}{'_default_translated'} = lc $self->{options}{'_default_translated'}; + $self->{options}{'untranslated'} = lc $self->{options}{'untranslated'}; + $self->{options}{'_default_untranslated'} = lc $self->{options}{'_default_untranslated'}; + $self->{options}{'attributes'} = lc $self->{options}{'attributes'}; + $self->{options}{'_default_attributes'} = lc $self->{options}{'_default_attributes'}; + $self->{options}{'inline'} = lc $self->{options}{'inline'}; + $self->{options}{'_default_inline'} = lc $self->{options}{'_default_inline'}; + $self->{options}{'placeholder'} = lc $self->{options}{'placeholder'}; + $self->{options}{'_default_placeholder'} = lc $self->{options}{'_default_placeholder'}; + $self->{options}{'customtag'} = lc $self->{options}{'customtag'}; + $self->{options}{'_default_customtag'} = lc $self->{options}{'_default_customtag'}; + } + + $self->{options}{'nodefault'} =~ /^\s*(.*)\s*$/s; + my %list_nodefault; + foreach ( split( /\s+/s, $1 ) ) { + $list_nodefault{$_} = 1; + } + $self->{nodefault} = \%list_nodefault; + + $self->{options}{'tags'} =~ /^\s*(.*)\s*$/s; + if ( length $self->{options}{'tags'} ) { + warn wrap_mod( + "po4a::xml::treat_options", + dgettext( + "po4a", + "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories." + ), + "tags" + ); + } + foreach ( split( /\s+/s, $1 ) ) { + $_ =~ m/^(.*?)(<.*)$/; + $self->{tags}->{$2} = $1 || ""; + } + + if ( $self->{options}{'tagsonly'} ) { + warn wrap_mod( + "po4a::xml::treat_options", + dgettext( + "po4a", + "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories." + ), + "tagsonly" + ); + } + + $self->{options}{'break'} =~ /^\s*(.*)\s*$/s; + foreach my $tag ( split( /\s+/s, $1 ) ) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{break}->{$2} = $1 || ""; + } + $self->{options}{'_default_break'} =~ /^\s*(.*)\s*$/s; + foreach my $tag ( split( /\s+/s, $1 ) ) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{break}->{$2} = $1 || "" + unless $list_nodefault{$2} + or defined $self->{break}->{$2}; + } + + $self->{options}{'translated'} =~ /^\s*(.*)\s*$/s; + foreach my $tag ( split( /\s+/s, $1 ) ) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{translated}->{$2} = $1 || ""; + } + $self->{options}{'_default_translated'} =~ /^\s*(.*)\s*$/s; + foreach my $tag ( split( /\s+/s, $1 ) ) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{translated}->{$2} = $1 || "" + unless $list_nodefault{$2} + or defined $self->{translated}->{$2}; + } + + $self->{options}{'untranslated'} =~ /^\s*(.*)\s*$/s; + foreach my $tag ( split( /\s+/s, $1 ) ) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{untranslated}->{$2} = $1 || ""; + } + $self->{options}{'_default_untranslated'} =~ /^\s*(.*)\s*$/s; + foreach my $tag ( split( /\s+/s, $1 ) ) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{untranslated}->{$2} = $1 || "" + unless $list_nodefault{$2} + or defined $self->{untranslated}->{$2}; + } + + $self->{options}{'attributes'} =~ /^\s*(.*)\s*$/s; + foreach my $tag ( split( /\s+/s, $1 ) ) { + if ( $tag =~ m/^(.*?)(<.*)$/ ) { + $self->{attributes}->{$2} = $1 || ""; + } else { + $self->{attributes}->{$tag} = ""; + } + } + $self->{options}{'_default_attributes'} =~ /^\s*(.*)\s*$/s; + foreach my $tag ( split( /\s+/s, $1 ) ) { + if ( $tag =~ m/^(.*?)(<.*)$/ ) { + $self->{attributes}->{$2} = $1 || "" + unless $list_nodefault{$2} + or defined $self->{attributes}->{$2}; + } else { + $self->{attributes}->{$tag} = "" + unless $list_nodefault{$tag} + or defined $self->{attributes}->{$tag}; + } + } + + $self->{options}{'inline'} =~ /^\s*(.*)\s*$/s; + foreach my $tag ( split( /\s+/s, $1 ) ) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{inline}->{$2} = $1 || ""; + } + $self->{options}{'_default_inline'} =~ /^\s*(.*)\s*$/s; + foreach my $tag ( split( /\s+/s, $1 ) ) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{inline}->{$2} = $1 || "" + unless $list_nodefault{$2} + or defined $self->{inline}->{$2}; + } + + $self->{options}{'placeholder'} =~ /^\s*(.*)\s*$/s; + foreach my $tag ( split( /\s+/s, $1 ) ) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{placeholder}->{$2} = $1 || ""; + } + $self->{options}{'_default_placeholder'} =~ /^\s*(.*)\s*$/s; + foreach my $tag ( split( /\s+/s, $1 ) ) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{placeholder}->{$2} = $1 || "" + unless $list_nodefault{$2} + or defined $self->{placeholder}->{$2}; + } + + $self->{options}{'customtag'} =~ /^\s*(.*)\s*$/s; + foreach my $tag ( split( /\s+/s, $1 ) ) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{customtag}->{$2} = $1 || ""; + } + $self->{options}{'_default_customtag'} =~ /^\s*(.*)\s*$/s; + foreach my $tag ( split( /\s+/s, $1 ) ) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{customtag}->{$2} = $1 || "" + unless $list_nodefault{$2} + or defined $self->{customtag}->{$2}; + } + + # If break-pi is provided, we should ensure that: $tag_types[the one of PI]->breaking is 1 + if ( $self->{options}{'break-pi'} ) { + for ( my $i = 0 ; $i < @tag_types ; $i++ ) { + if ( $tag_types[$i]->{beginning} eq '?' && $tag_types[$i]->{end} eq '?' ) { + $tag_types[$i]->{breaking} = 1; + } + } + } + + foreach my $tagtype (qw(untranslated)) { + foreach my $tag ( sort keys %{ $self->{$tagtype} } ) { + warn + "po4a::xml::treat_options: WARN: tag='$tag' is %s tag, translation option='$self->{$tagtype}->{$tag}' is ignores wW.\n" + if $self->{$tagtype}->{$tag} =~ m/wW/; + } + } + foreach my $tagtype (qw(inline break placeholder customtag)) { + foreach my $tag ( sort keys %{ $self->{$tagtype} } ) { + die + "po4a::xml::treat_options: WARN: tag='$tag' is %s tag, translation option='$self->{$tagtype}->{$tag}' is ignored.\n" + if $self->{$tagtype}->{$tag} ne ""; + } + } + foreach my $tagtype (qw(attributes)) { + foreach my $tag ( sort keys %{ $self->{$tagtype} } ) { + warn + "po4a::xml::treat_options: WARN: tag='$tag' is %s tag, translation option='$self->{$tagtype}->{$tag}' is ignored.\n" + if $self->{$tagtype}->{$tag} ne ""; + } + } + + # Debug output of internal parameters for generic XML parser + # Marked content of a XML tag can be either "translated" or "untranslated". + # -- XML tags in these may specify options: wWip + # Extraction of XML content can be one of "inline", "break", "placeholder", or "customtag". + # -- XML tags in these must not specify options + if ( $self->{options}{'debug'} ) { + foreach my $tagtype (qw(translated untranslated)) { + foreach my $tag ( sort keys %{ $self->{$tagtype} } ) { + print + "po4a::xml::treat_options: $tag: translation option='$self->{$tagtype}->{$tag}' (original), but listed in '$tagtype'"; + foreach my $tagtype1 (qw(inline break placeholder customtag)) { + if ( exists $self->{$tagtype1}->{$tag} ) { + print " / '$tagtype1'"; + } + } + print "\n"; + print wrap_mod( "po4a::xml::treat_options", "%s: translation option='%s' (valid)", + $tag, $self->get_translate_options($tag) ); + } + } + foreach my $tag ( sort keys %{ $self->{'attributes'} } ) { + print "po4a::xml::treat_options: $tag: translated attributes.\n"; + } + } + + # There should be no translated and untranslated tags + foreach my $tag ( keys %{ $self->{translated} } ) { + die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ), + $tag, "translated", "untranslated" ) + if defined $self->{untranslated}->{$tag}; + } + + # There should be no inline, break, placeholder, and customtag tags + foreach my $tag ( keys %{ $self->{inline} } ) { + die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ), + $tag, "inline", "break" ) + if defined $self->{break}->{$tag}; + die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ), + $tag, "inline", "placeholder" ) + if defined $self->{placeholder}->{$tag}; + die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ), + $tag, "inline", "customtag" ) + if defined $self->{customtag}->{$tag}; + } + foreach my $tag ( keys %{ $self->{break} } ) { + die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ), + $tag, "break", "placeholder" ) + if defined $self->{placeholder}->{$tag}; + die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ), + $tag, "break", "customtag" ) + if defined $self->{customtag}->{$tag}; + } + foreach my $tag ( keys %{ $self->{placeholder} } ) { + die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ), + $tag, "placeholder", "customtag" ) + if defined $self->{customtag}->{$tag}; + } +} + +=head2 GETTING TEXT FROM THE INPUT DOCUMENT + +=over + +=item get_string_until($%) + +This function returns an array with the lines (and references) from the input +document until it finds the first argument. The second argument is an options +hash. Value 0 means disabled (the default) and 1, enabled. + +The valid options are: + +=over 4 + +=item B<include> + +This makes the returned array to contain the searched text + +=item B<remove> + +This removes the returned stream from the input + +=item B<unquoted> + +This ensures that the searched text is outside any quotes + +=item B<regex> + +This denotes that the first argument is a regular expression rather than +an plain string + +=back + +=cut + +sub get_string_until { + my ( $self, $search ) = ( shift, shift ); + my $options = shift; + my ( $include, $remove, $unquoted, $regex ) = ( 0, 0, 0, 0 ); + + if ( defined( $options->{include} ) ) { $include = $options->{include}; } + if ( defined( $options->{remove} ) ) { $remove = $options->{remove}; } + if ( defined( $options->{unquoted} ) ) { $unquoted = $options->{unquoted}; } + if ( defined( $options->{regex} ) ) { $regex = $options->{regex}; } + + my ( $line, $ref ) = $self->shiftline(); + my ( @text, $paragraph ); + my ( $eof, $found ) = ( 0, 0 ); + + $search = "\Q$search\E" unless $regex; + while ( defined($line) and !$found ) { + push @text, ( $line, $ref ); + $paragraph .= $line; + if ($unquoted) { + if ( $paragraph =~ /^((\".*?\")|(\'.*?\')|[^\"\'])*$search/s ) { + $found = 1; + } + } else { + if ( $paragraph =~ /$search/s ) { + $found = 1; + } + } + if ( !$found ) { + ( $line, $ref ) = $self->shiftline(); + } + } + + if ( !defined($line) ) { $eof = 1; } + + if ($found) { + $line = ""; + if ($unquoted) { + $paragraph =~ /^(?:(?:\".*?\")|(?:\'.*?\')|[^\"\'])*?$search(.*)$/s; + $line = $1; + $text[ $#text - 1 ] =~ s/\Q$line\E$//s; + } else { + $paragraph =~ /$search(.*)$/s; + $line = $1; + $text[ $#text - 1 ] =~ s/\Q$line\E$//s; + } + if ( !$include ) { + $text[ $#text - 1 ] =~ /^(.*)($search.*)$/s; + $text[ $#text - 1 ] = $1; + $line = $2 . $line; + } + if ( defined($line) and ( $line ne "" ) ) { + $self->unshiftline( $line, $text[$#text] ); + } + } + if ( !$remove ) { + $self->unshiftline(@text); + } + + #If we get to the end of the file, we return the whole paragraph + return ( $eof, @text ); +} + +=item skip_spaces(\@) + +This function receives as argument the reference to a paragraph (in the format +returned by get_string_until), skips his heading spaces and returns them as +a simple string. + +=cut + +sub skip_spaces { + my ( $self, $pstring ) = @_; + my $space = ""; + + while ( @$pstring and ( @$pstring[0] =~ /^(\s+)(.*)$/s or @$pstring[0] eq "" ) ) { + if ( @$pstring[0] ne "" ) { + $space .= $1; + @$pstring[0] = $2; + } + + if ( @$pstring[0] eq "" ) { + shift @$pstring; + shift @$pstring; + } + } + return $space; +} + +=item join_lines(@) + +This function returns a simple string with the text from the argument array +(discarding the references). + +=cut + +sub join_lines { + my ( $self, @lines ) = @_; + my ( $line, $ref ); + my $text = ""; + while ( $#lines > 0 ) { + ( $line, $ref ) = ( shift @lines, shift @lines ); + $text .= $line; + } + return $text; +} + +=back + +=head1 STATUS OF THIS MODULE + +This module can translate tags and attributes. + +=head1 TODO LIST + +DOCTYPE (ENTITIES) + +There is a minimal support for the translation of entities. They are +translated as a whole, and tags are not taken into account. Multilines +entities are not supported and entities are always rewrapped during the +translation. + +MODIFY TAG TYPES FROM INHERITED MODULES +(move the tag_types structure inside the $self hash?) + +=head1 SEE ALSO + +L<Locale::Po4a::TransTractor(3pm)|Locale::Po4a::TransTractor>, +L<po4a(7)|po4a.7> + +=head1 AUTHORS + + Jordi Vilalta <jvprat@gmail.com> + Nicolas François <nicolas.francois@centraliens.net> + +=head1 COPYRIGHT AND LICENSE + + Copyright © 2004 Jordi Vilalta <jvprat@gmail.com> + Copyright © 2008-2009 Nicolas François <nicolas.francois@centraliens.net> + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut + +1; diff --git a/lib/Locale/Po4a/Yaml.pm b/lib/Locale/Po4a/Yaml.pm new file mode 100644 index 0000000..919c40c --- /dev/null +++ b/lib/Locale/Po4a/Yaml.pm @@ -0,0 +1,160 @@ +# Locale::Po4a::Yaml -- Convert yaml files to PO file, for translation. +# +# This program is free software; you may redistribute it and/or modify it +# under the terms of GPL v2.0 or later (see COPYING). +# + +=encoding UTF-8 + +=head1 NAME + +Locale::Po4a::Yaml - convert YAML files from/to PO files + +=head1 DESCRIPTION + +Locale::Po4a::Yaml is a module to help the translation of Yaml files into other +[human] languages. + +The module extracts the value of YAML hashes and arrays. Hash keys are +not extracted. + +NOTE: This module parses the YAML file with YAML::Tiny. + +=head1 OPTIONS ACCEPTED BY THIS MODULE + +These are this module's particular options: + +=over + +=item B<keys> + +Space-separated list of hash keys to process for extraction, all +other keys are skipped. Keys are matched with a case-sensitive match. +If B<paths> and B<keys> are used together, values are included if they are +matched by at least one of the options. +Arrays values are always returned unless the B<skip_array> option is +provided. + +=item B<paths> + +Comma-separated list of hash paths to process for extraction, all +other paths are skipped. Paths are matched with a case-sensitive match. +If B<paths> and B<keys> are used together, values are included if they are +matched by at least one of the options. +Arrays values are always returned unless the B<skip_array> option is +provided. + +=item B<skip_array> + +Do not translate array values. + +=back + +=head1 SEE ALSO + +L<Locale::Po4a::TransTractor(3pm)>, L<po4a(7)|po4a.7> + +=head1 AUTHORS + + Brian Exelbierd <bex@pobox.com> + +=head1 COPYRIGHT AND LICENSE + + Copyright © 2017 Brian Exelbierd. + Copyright © 2022 Martin Quinson <mquinson#debian.org>. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL v2.0 or later (see the COPYING file). + +=cut + +############################################################################ +# Modules and declarations +############################################################################ + +package Locale::Po4a::Yaml; + +use Locale::Po4a::TransTractor; +use Locale::Po4a::Common; +use YAML::Tiny; +use Scalar::Util; +use Encode; + +use 5.16.0; +use strict; +use warnings; + +require Exporter; + +use vars qw(@ISA @EXPORT $AUTOLOAD); +@ISA = qw(Locale::Po4a::TransTractor); +@EXPORT = qw(); + +my %yfm_keys = (); +my %yfm_paths = (); + +sub initialize { + my $self = shift; + my %options = @_; + + $self->{options}{'keys'} = ''; + $self->{options}{'paths'} = ''; + $self->{options}{'debug'} = 0; + $self->{options}{'verbose'} = 1; + $self->{options}{'skip_array'} = 0; + + foreach my $opt ( keys %options ) { + die wrap_mod( "po4a::yaml", dgettext( "po4a", "Unknown option: %s" ), $opt ) + unless exists $self->{options}{$opt}; + $self->{options}{$opt} = $options{$opt}; + } + + map { + $_ =~ s/^\s+|\s+$//g; # Trim the keys before using them + $yfm_keys{$_} = 1 + } ( split( /[, ]/, $self->{options}{keys} ) ); + + # map { print STDERR "key: '$_'\n"; } (keys %yfm_keys); + + map { + $_ =~ s/^\s+|\s+$//g; # Trim the keys before using them + $yfm_paths{$_} = 1 + } ( split( /,/, $self->{options}{paths} ) ); +} + +sub parse { + my $self = shift; + my $yfm; + + # Get the ref of the first line. We'll use it as the ref for the whole doc + my ( $line, $ref ) = $self->shiftline(); + $self->unshiftline( $line, $ref ); + + while (1) { + my ( $nextline, $nextref ) = $self->shiftline(); + + if ( not defined($nextline) ) { + last; + } elsif ( $nextline =~ /: [\[\{]/ ) { + die wrap_mod( + "po4a::text", + dgettext( + "po4a", + "Inline lists and dictionaries on a single line are not correctly handled the parser we use (YAML::Tiny): they are interpreted as regular strings. " + . "Please use multi-lines definitions instead. Offending line:\n %s" + ), + $nextline + ); + } + + $yfm .= $nextline; + } + + my $yamlarray = YAML::Tiny->read_string($yfm) + || die "YAML::Tiny failed to parse the content of $ref: $!"; + + $self->handle_yaml( 0, $ref, $yamlarray, \%yfm_keys, $self->{options}{skip_array}, \%yfm_paths ); +} + +1; +__END__ |