summaryrefslogtreecommitdiffstats
path: root/lib/Locale
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-17 11:26:17 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-17 11:26:17 +0000
commit5df6c2aefebe3d2abcc939a88e294876d59f03ca (patch)
tree63fb332a0f21ddb91cb789c80cf64e134d373463 /lib/Locale
parentInitial commit. (diff)
downloadpo4a-5df6c2aefebe3d2abcc939a88e294876d59f03ca.tar.xz
po4a-5df6c2aefebe3d2abcc939a88e294876d59f03ca.zip
Adding upstream version 0.72.upstream/0.72
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lib/Locale')
-rw-r--r--lib/Locale/Po4a/AsciiDoc.pm1449
-rw-r--r--lib/Locale/Po4a/BibTeX.pm146
-rw-r--r--lib/Locale/Po4a/Chooser.pm178
-rw-r--r--lib/Locale/Po4a/Common.pm252
-rw-r--r--lib/Locale/Po4a/Dia.pm114
-rw-r--r--lib/Locale/Po4a/Docbook.pm2060
-rw-r--r--lib/Locale/Po4a/Gemtext.pm193
-rw-r--r--lib/Locale/Po4a/Guide.pm153
-rw-r--r--lib/Locale/Po4a/Halibut.pm448
-rw-r--r--lib/Locale/Po4a/InProgress/Debconf.pm226
-rw-r--r--lib/Locale/Po4a/InProgress/NewsDebian.pm155
-rw-r--r--lib/Locale/Po4a/Ini.pm120
-rw-r--r--lib/Locale/Po4a/KernelHelp.pm170
-rw-r--r--lib/Locale/Po4a/LaTeX.pm397
-rw-r--r--lib/Locale/Po4a/Man.pm2904
-rw-r--r--lib/Locale/Po4a/Po.pm1618
-rw-r--r--lib/Locale/Po4a/Pod.pm287
-rw-r--r--lib/Locale/Po4a/RubyDoc.pm527
-rw-r--r--lib/Locale/Po4a/Sgml.pm1372
-rw-r--r--lib/Locale/Po4a/TeX.pm1756
-rw-r--r--lib/Locale/Po4a/Texinfo.pm613
-rw-r--r--lib/Locale/Po4a/Text.pm1072
-rw-r--r--lib/Locale/Po4a/TransTractor.pm1337
-rw-r--r--lib/Locale/Po4a/Wml.pm210
-rw-r--r--lib/Locale/Po4a/Xhtml.pm245
-rw-r--r--lib/Locale/Po4a/Xml.pm2539
-rw-r--r--lib/Locale/Po4a/Yaml.pm160
27 files changed, 20701 insertions, 0 deletions
diff --git a/lib/Locale/Po4a/AsciiDoc.pm b/lib/Locale/Po4a/AsciiDoc.pm
new file mode 100644
index 0000000..6fe1cd1
--- /dev/null
+++ b/lib/Locale/Po4a/AsciiDoc.pm
@@ -0,0 +1,1449 @@
+#!/usr/bin/perl -w
+
+# http://asciidoc.org/userguide.html
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::AsciiDoc - convert AsciiDoc documents from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::AsciiDoc is a module to help the translation of documentation in
+the AsciiDoc format.
+
+=cut
+
+package Locale::Po4a::AsciiDoc;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+require Exporter;
+use vars qw(@ISA @EXPORT);
+@ISA = qw(Locale::Po4a::TransTractor);
+@EXPORT = qw();
+
+use Locale::Po4a::TransTractor;
+use Locale::Po4a::Common;
+use YAML::Tiny;
+
+=head1 OPTIONS ACCEPTED BY THIS MODULE
+
+These are this module's particular options:
+
+=over
+
+=item B<definitions>
+
+The name of a file containing definitions for po4a, as defined in the
+B<INLINE CUSTOMIZATION> section.
+You can use this option if it is not possible to put the definitions in
+the document being translated.
+
+In a definitions file, lines must not start by two slashes, but directly
+by B<po4a:>.
+
+=item B<entry>
+
+Space-separated list of attribute entries you want to translate. By default,
+no attribute entries are translatable.
+
+=item B<macro>
+
+Space-separated list of macro definitions.
+
+=item B<style>
+
+Space-separated list of style definitions.
+
+=item B<forcewrap>
+
+Enable automatic line wrapping in non-verbatim blocks, even if the
+result could be misinterpreted by AsciiDoc formatters.
+
+By default, po4a will not wrap the produced AsciiDoc files because a
+manual inspection is mandated to ensure that the wrapping does not
+change the formatting. Consider for instance the following list
+item:
+
+ * a long sentence that is ending with a number 1. A second sentence.
+
+If the wrapping leads to the following presentation, the item is
+split into a numbered sub-list. To make things worse, only the
+speakers of the language used in the translation can inspect the
+situation.
+
+ * a long sentence that is ending with a number
+ 1. A second sentence.
+
+Note that not wrapping the files produced by po4a should not be a
+problem since those files are meant to be processed automatically.
+They should not be regarded as source files anyway.
+
+With this option, po4a will produce better-looking AsciiDoc files, but it
+may lead to possibly erroneous formatted outputs.
+
+=item B<noimagetargets>
+
+By default, the targets of block images are translatable to give opportunity
+to make the content point to translated images. This can be stopped by setting
+this option.
+
+=item B<tablecells>
+
+This option is a flag that enables sub-table segmentation into cell content.
+The segmentation is limited to cell content, without any parsing inside of it.
+
+=item B<compat>
+
+Switch parsing rules to compatibility with different tools. Available options are
+"asciidoc" or "asciidoctor". Asciidoctor has stricter parsing rules, such as
+equality of length of opening and closing block fences.
+
+=item B<nolinting>
+
+Disable linting messages. When the source code cannot be fixed for clearer document structure, these messages are useless.
+
+=item B<cleanspaces>
+
+Remove extra spaces from the source segments in no-wrap mode. This is useful when the
+translation tools are sensitive to the number of spaces.
+
+=item B<yfm_keys>
+
+Comma-separated list of keys to process for translation in the YAML Front Matter
+section. All other keys are skipped. Keys are matched with a case-sensitive
+match. If B<yfm_paths> and B<yfm_keys> are used together, values are included if
+they are matched by at least one of the options. Array values are always translated,
+unless the B<yfm_skip_array> option is provided.
+
+=cut
+
+my %yfm_keys = ();
+
+=item B<yfm_skip_array>
+
+Do not translate array values in the YAML Front Matter section.
+
+=cut
+
+my $yfm_skip_array = 0;
+
+=item B<yfm_paths>
+
+Comma-separated list of hash paths to process for extraction in the YAML
+Front Matter section, all other paths are skipped. Paths are matched with a
+case-sensitive match. If B<yfm_paths> and B<yfm_keys> are used together,
+values are included if they are matched by at least one of the options.
+Arrays values are always returned unless the B<yfm_skip_array> option is
+provided.
+
+=cut
+
+my %yfm_paths = ();
+
+=back
+
+=head1 INLINE CUSTOMIZATION
+
+The AsciiDoc module can be customized with lines starting by B<//po4a:>.
+These lines are interpreted as commands to the parser.
+The following commands are recognized:
+
+=over 4
+
+=item B<//po4a: macro >I<name>B<[>I<attribute list>B<]>
+
+This describes in detail the parameters of a B<macro>;
+I<name> must be a valid macro name, and it ends with an underscore
+if the target must be translated.
+
+The I<attribute list> argument is a comma separated list which
+contains information about translatable arguments. This list contains
+either numbers, to define positional parameters, or named attributes.
+
+If a plus sign (B<+>) is prepended to I<name>, then the macro and its
+arguments are translated as a whole. There is no need to define
+attribute list in this case, but brackets must be present.
+
+=item B<//po4a: style >B<[>I<attribute list>B<]>
+
+This describes in detail which attributes of a style must
+be translated.
+
+The I<attribute list> argument is a comma separated list which
+contains information about translatable arguments. This list contains
+either numbers, to define positional parameters, or named attributes.
+The first attribute is the style name, it will not be translated.
+
+If a plus sign (B<+>) is prepended to the style name, then the
+attribute list is translated as a whole. There is no need to define
+translatable attributes.
+
+If a minus sign (B<->) is prepended to the style name, then this
+attribute is not translated.
+
+=item B<//po4a: entry >I<name>
+
+This declares an attribute entry as being translatable. By default,
+they are not translated.
+
+=back
+
+=cut
+
+my @comments = ();
+
+my %debug = (
+ 'split_attributelist' => 0,
+ 'join_attributelist' => 0,
+ 'parse' => 0,
+);
+
+sub initialize {
+ my $self = shift;
+ my %options = @_;
+
+ $self->{options}{'nobullets'} = 1;
+ $self->{options}{'forcewrap'} = 0;
+ $self->{options}{'debug'} = '';
+ $self->{options}{'verbose'} = 1;
+ $self->{options}{'entry'} = '';
+ $self->{options}{'macro'} = '';
+ $self->{options}{'style'} = '';
+ $self->{options}{'definitions'} = '';
+ $self->{options}{'noimagetargets'} = 0;
+ $self->{options}{'tablecells'} = 0;
+ $self->{options}{'compat'} = 'asciidoc';
+ $self->{options}{'yfm_keys'} = '';
+ $self->{options}{'yfm_skip_array'} = 0;
+ $self->{options}{'yfm_paths'} = '';
+ $self->{options}{'nolinting'} = 0;
+ $self->{options}{'cleanspaces'} = 0;
+
+ foreach my $opt ( keys %options ) {
+ die wrap_mod( "po4a::asciidoc", dgettext( "po4a", "Unknown option: %s" ), $opt )
+ unless exists $self->{options}{$opt};
+ $self->{options}{$opt} = $options{$opt};
+ }
+
+ my $compat = $self->{options}{'compat'};
+ die wrap_mod( "po4a::asciidoc",
+ dgettext( "po4a", "Invalid compatibility setting: '%s'. It must be either '%s' or '%s'." ),
+ $compat, 'asciidoc', 'asciidoctor' )
+ if ( defined $compat && $compat ne "asciidoc" && $compat ne "asciidoctor" );
+
+ if ( $options{'debug'} ) {
+ foreach ( $options{'debug'} ) {
+ $debug{$_} = 1;
+ }
+ }
+ map {
+ $_ =~ s/^\s+|\s+$//g; # Trim the keys before using them
+ $yfm_keys{$_} = 1
+ } ( split( ',', $self->{options}{'yfm_keys'} ) );
+ map {
+ $_ =~ s/^\s+|\s+$//g; # Trim the keys before using them
+ $yfm_paths{$_} = 1
+ } ( split( ',', $self->{options}{'yfm_paths'} ) );
+
+ $yfm_skip_array = $self->{options}{'yfm_skip_array'};
+
+ $self->{translate} = {
+ macro => {},
+ style => {},
+ entry => {}
+ };
+
+ $self->register_attributelist('[verse,2,3,attribution,citetitle]');
+ $self->register_attributelist('[quote,2,3,attribution,citetitle]');
+ $self->register_attributelist('[icon]');
+ $self->register_attributelist('[caption]');
+ $self->register_attributelist('[-icons,caption]');
+ $self->register_macro('image_[1,alt,title,link]') unless $self->{options}{'noimagetargets'};
+ $self->register_macro('indexterm[1,2,3]') unless $self->{options}{'noimagetargets'};
+
+ if ( $self->{options}{'definitions'} ) {
+ $self->parse_definition_file( $self->{options}{'definitions'} );
+ }
+ $self->{options}{entry} =~ s/^\s*//;
+ foreach my $attr ( split( /\s+/, $self->{options}{entry} ) ) {
+ $self->{translate}->{entry}->{$attr} = 1;
+ }
+ $self->{options}{macro} =~ s/^\s*//;
+ foreach my $attr ( split( /\s+/, $self->{options}{macro} ) ) {
+ $self->register_macro($attr);
+ }
+ $self->{options}{style} =~ s/^\s*//;
+ foreach my $attr ( split( /\s+/, $self->{options}{style} ) ) {
+ $self->register_attributelist($attr);
+ }
+
+}
+
+sub register_attributelist {
+ my $self = shift;
+ my $list = shift;
+ my $type = shift || 'style';
+ $list =~ s/^\[//;
+ $list =~ s/\]$//;
+ $list =~ s/\s+//;
+ $list = "," . $list . ",";
+ $list =~ m/^,([-+]?)([^,]*)/;
+ my $command = $2;
+ $self->{translate}->{$type}->{$command} = $list;
+ print STDERR "Definition: $type $command: $list\n" if $debug{definitions};
+}
+
+sub register_macro {
+ my $self = shift;
+ my $text = shift;
+ die wrap_mod( "po4a::asciidoc", dgettext( "po4a", "Unable to parse macro definition: %s" ), $text )
+ unless $text =~ m/^(\+?)([\w\d][\w\d-]*?)(_?)\[(.*)\]$/;
+ my $macroplus = $1;
+ my $macroname = $2;
+ my $macrotarget = $3;
+ my $macroparam = $macroname . "," . $4;
+ $self->register_attributelist( $macroparam, 'macro' );
+
+ if ( $macrotarget eq '_' ) {
+ $self->{translate}->{macro}->{$macroname} .= '_';
+ }
+ if ( $macroplus eq '+' ) {
+ $self->{translate}->{macro}->{$macroname} =~ s/^,/,+/;
+ }
+}
+
+sub is_translated_target {
+ my $self = shift;
+ my $macroname = shift;
+ return defined( $self->{translate}->{macro}->{$macroname} )
+ && $self->{translate}->{macro}->{$macroname} =~ m/_$/;
+}
+
+sub is_unsplitted_attributelist {
+ my $self = shift;
+ my $name = shift;
+ my $type = shift;
+ return defined( $self->{translate}->{$type}->{$name} )
+ && $self->{translate}->{$type}->{$name} =~ m/^,\+/;
+}
+
+sub process_definition {
+ my $self = shift;
+ my $command = shift;
+ if ( $command =~ m/^po4a: macro\s+(.*\[.*\])\s*$/ ) {
+ $self->register_macro($1);
+ } elsif ( $command =~ m/^po4a: style\s*(\[.*\])\s*$/ ) {
+ $self->register_attributelist($1);
+ } elsif ( $command =~ m/^po4a: entry\s+(.+?)\s*$/ ) {
+ $self->{translate}->{entry}->{$1} = 1;
+ }
+}
+
+sub parse_definition_file {
+ my $self = shift;
+ my $filename = shift;
+ if ( !open( IN, "<", $filename ) ) {
+ die wrap_mod( "po4a::asciidoc", dgettext( "po4a", "Cannot open %s: %s" ), $filename, $! );
+ }
+ while (<IN>) {
+ chomp;
+ process_definition( $self, $_ );
+ }
+ close IN;
+}
+
+my $RE_SECTION_TEMPLATES = "sect1|sect2|sect3|sect4|preface|colophon|dedication|synopsis|index";
+my $RE_STYLE_ADMONITION = "TIP|NOTE|IMPORTANT|WARNING|CAUTION";
+my $RE_STYLE_PARAGRAPH =
+ "normal|literal|verse|quote|listing|abstract|partintro|comment|example|sidebar|source|music|latex|graphviz";
+my $RE_STYLE_NUMBERING = "arabic|loweralpha|upperalpha|lowerroman|upperroman";
+my $RE_STYLE_LIST = "appendix|horizontal|qanda|glossary|bibliography";
+my $RE_STYLES =
+ "$RE_SECTION_TEMPLATES|$RE_STYLE_ADMONITION|$RE_STYLE_PARAGRAPH|$RE_STYLE_NUMBERING|$RE_STYLE_LIST|float";
+
+BEGIN {
+ my $UnicodeGCString_available = 0;
+ $UnicodeGCString_available = 1 if ( eval { require Unicode::GCString } );
+ eval {
+
+ sub chars($$$) {
+ my $text = shift;
+ my $encoder = shift;
+ $text = $encoder->decode($text) if ( defined($encoder) && $encoder->name ne "ascii" );
+ if ($UnicodeGCString_available) {
+ return Unicode::GCString->new($text)->chars();
+ } else {
+ $text =~ s/\n$//s;
+ return length($text) if !( defined($encoder) && $encoder->name ne "ascii" );
+ eval { require Unicode::GCString };
+ die wrap_mod(
+ "po4a::asciidoc",
+ dgettext(
+ "po4a",
+ "Detection of two line titles failed at %s\nPlease install the Unicode::GCString module (error: %s)."
+ ),
+ shift, $@
+ );
+ }
+ }
+ };
+}
+
+sub translate {
+ my ( $self, $str, $ref, $type ) = @_;
+ my (%options) = @_;
+ if ( $options{'wrap'} == 1 ) {
+ if ($str =~ / \+\n/) {
+ $options{'wrap'} = 0;
+ $str =~ s/([^+])\n/$1 /g;
+ $str =~ s/ \+\n/\n/g;
+ $str = $self->SUPER::translate( $str, $ref, $type, %options);
+ $str =~ s/\n/ +\n/g;
+ $options{'wrap'} = 1;
+ } else {
+ if ( $self->{options}{'cleanspaces'} == 1 ) {
+ $str =~ s/[ \n]+/ /g;
+ }
+ $str = $self->SUPER::translate( $str, $ref, $type, %options);
+ }
+ } else {
+ $str = $self->SUPER::translate( $str, $ref, $type, %options );
+ }
+ return $str;
+}
+
+sub parse {
+ my $self = shift;
+ my ( $line, $ref ) = $self->shiftline();
+
+ # Handle the YAML Front Matter, if any
+ if ( defined($line) && $line =~ /^---$/ ) {
+ my $yfm;
+ my ( $nextline, $nextref ) = $self->shiftline();
+ while ( defined($nextline) ) {
+ last if ( $nextline =~ /^(---|\.\.\.)$/ );
+ $yfm .= $nextline;
+ ( $nextline, $nextref ) = $self->shiftline();
+ }
+ die "Could not get the YAML Front Matter from the file." if ( length($yfm) == 0 );
+ my $yamlarray = YAML::Tiny->read_string($yfm)
+ || die "Couldn't read YAML Front Matter ($!)\n$yfm\n";
+
+ $self->handle_yaml( 1, $ref, $yamlarray, \%yfm_keys, $yfm_skip_array, \%yfm_paths );
+ $self->pushline("---\n");
+
+ ( $line, $ref ) = $self->shiftline(); # Pass the final '---'
+ }
+
+ my $paragraph = "";
+ my $wrapped_mode = 1;
+ my $file = $ref;
+ $file =~ s/:[0-9]+$// if defined($line);
+
+ while ( defined($line) ) {
+ $ref =~ m/^(.*):[0-9]+$/;
+ if ( $1 ne $file ) {
+ $file = $1;
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = 1;
+ }
+
+ chomp($line);
+ print STDERR "Seen $ref $line\n"
+ if ( $debug{parse} );
+ $self->{ref} = "$ref";
+ if ( ( defined $self->{verbatim} ) and ( $self->{verbatim} == 3 ) ) {
+
+ # Untranslated blocks
+ $self->pushline( $line . "\n" );
+ if ( $line =~ m/^~{4,}$/ ) {
+ undef $self->{verbatim};
+ undef $self->{type};
+ $wrapped_mode = 1;
+ }
+ } elsif ( ( defined $self->{verbatim} ) and ( $self->{verbatim} == 2 ) ) {
+
+ # CommentBlock
+ if ( $line =~ m/^\/{4,}$/ ) {
+ undef $self->{verbatim};
+ undef $self->{type};
+ $wrapped_mode = 1;
+ } else {
+ push @comments, $line;
+ }
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = 1 unless defined( $self->{verbatim} );
+ $self->pushline( $line . "\n" );
+ } elsif ( ( defined $self->{type} )
+ and ( $self->{type} eq "Table" )
+ and ( $line !~ m/^\|===/ )
+ and ( $self->{options}{"tablecells"} )
+ and ( not defined $self->{disabletablecells} ) )
+ {
+ # inside a table, and we should split per cell
+ my $new_line = "";
+ my @texts = split /(?:(?:\d+|\d*(?:\.\d+)?)(?:\+|\*))?[<^>]?(?:\.[<^>])?[demshalv]?\|/, $line;
+ my @seps = ($line) =~ m/(?:(?:\d+|\d*(?:\.\d+)?)(?:\+|\*))?[<^>]?(?:\.[<^>])?[demshalv]?\|/g;
+ if ( ( scalar(@texts) and length( $texts[0] ) ) || ( !length($line) ) ) {
+ if ( !length($line) ) { $texts[0] = ""; }
+ if ( length($paragraph) ) {
+
+ # if we are in a continuation line
+ $paragraph .= "\n" . $texts[0];
+ } else {
+ $paragraph = $texts[0];
+ $self->pushline("\n");
+ }
+ } elsif ( length($paragraph) ) {
+ $new_line = "\n";
+ }
+
+ shift @texts;
+ my @parts = map { ( $_, shift @texts ) } @seps;
+ foreach my $part (@parts) {
+ if ( not defined $part ) {
+
+ # allows concatenation and will be stripped anyway
+ $part = " ";
+ }
+ if ( $part =~ /\|$/ ) {
+
+ # this is a cell separator. End the previous cell
+ do_stripped_unwrapped_paragraph( $self, $paragraph, $wrapped_mode );
+ if ( $new_line eq "\n" ) {
+ $self->pushline("\n");
+ $new_line = "";
+ }
+ $paragraph = "";
+ $self->pushline($part);
+ } else {
+
+ # this is content. Append it.
+ $paragraph .= $part;
+ }
+ }
+
+ } elsif ( ( not defined( $self->{verbatim} ) ) and ( $line =~ m/^(\+|--)$/ ) ) {
+
+ # List Item Continuation or List Block
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = 1 unless defined( $self->{verbatim} );
+ $self->pushline( $line . "\n" );
+
+ # TODO: add support for Open blocks
+ } elsif (
+ ( not defined( $self->{verbatim} ) )
+ and ( $line =~ m/^(={2,}|-{2,}|~{2,}|\^{2,}|\+{2,})$/ )
+ and ( defined($paragraph) )
+ and ( $paragraph =~ m/^[^\n]*\n$/s )
+ and
+
+ # subtract one because chars includes the newline on the paragraph
+ ( abs( ( chars( $paragraph, $self->{TT}{po_in}{encoder}, $ref ) - 1 ) - length($line) ) < 3 )
+ )
+ {
+ # Found title
+
+ $wrapped_mode = 0;
+ my $level = $line;
+ $level =~ s/^(.).*$/$1/;
+ $paragraph =~ s/\n$//s;
+
+ warn wrap_mod(
+ "$ref",
+ dgettext(
+ "po4a",
+ "'%s' seems to be a two-lines title underlined with '%s', but the underlines are too short or too long compared to the title length. "
+ . "You may want to fix your master document."
+ ),
+ $paragraph,
+ $level
+ )
+ if ( ( chars( $paragraph, $self->{TT}{po_in}{encoder}, $ref ) != length($line) )
+ && ( !$self->{options}{'nolinting'} ) );
+
+ my $t = $self->translate(
+ $paragraph,
+ $self->{ref},
+ "Title $level",
+ "comment" => join( "\n", @comments ),
+ "wrap" => 0
+ );
+ $self->pushline( $t . "\n" );
+ $paragraph = "";
+ @comments = ();
+ $wrapped_mode = 1;
+ $self->pushline( ( $level x ( chars( $t, $self->{TT}{po_in}{encoder}, $ref ) ) ) . "\n" );
+ } elsif ( $line =~ m/^(={1,5})( +)(.*?)( +\1)?$/ ) {
+ my $titlelevel1 = $1;
+ my $titlespaces = $2;
+ my $title = $3;
+ my $titlelevel2 = $4 || "";
+
+ # Found one line title
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $wrapped_mode = 0;
+ $paragraph = "";
+ $self->pushline( $titlelevel1 . $titlespaces);
+ $title = $self->translate_indexterms($title);
+ my $t = $self->translate(
+ $title,
+ $self->{ref},
+ "Title $titlelevel1",
+ "comment" => join( "\n", @comments ),
+ "wrap" => 0
+ );
+ $self->pushline( $t . $titlelevel2 . "\n" );
+ @comments = ();
+ $wrapped_mode = 1;
+ } elsif ( ( $line =~ m/^(\/{4,}|\+{4,}|-{4,}|\.{4,}|\*{4,}|_{4,}|={4,}|~{4,})$/ )
+ and ( !defined( $self->{type} ) or ( defined( $self->{type} ) and ( $self->{type} !~ /^Table/i ) ) ) )
+ {
+ # Found one delimited block
+ my $t = $line;
+ $t =~ s/^(.).*$/$1/;
+ my $l = length $line;
+ my $type = "delimited block $t";
+ $type = "$type $l" if ( $self->{options}{'compat'} eq 'asciidoctor' );
+ if ( defined $self->{verbatim} and ( $self->{type} ne $type ) ) {
+ $paragraph .= "$line\n";
+ } else {
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ if ( ( defined $self->{type} )
+ and ( $self->{type} eq $type ) )
+ {
+ undef $self->{type};
+ undef $self->{verbatim};
+ undef $self->{bullet};
+ undef $self->{indent};
+ $wrapped_mode = 1;
+ print STDERR "Closing $t block\n" if $debug{parse};
+ } else {
+ print STDERR "Begining $t block\n" if $debug{parse};
+ if ( $t eq "\/" ) {
+
+ # CommentBlock, should not be treated
+ $self->{verbatim} = 2;
+ } elsif ( $t eq "+" ) {
+
+ # PassthroughBlock
+ $wrapped_mode = 0;
+ $self->{verbatim} = 1;
+ } elsif ( $t eq "-" or $t eq "|" ) {
+
+ # ListingBlock
+ $wrapped_mode = 0;
+ $self->{verbatim} = 1;
+ } elsif ( $t eq "." ) {
+
+ # LiteralBlock
+ $wrapped_mode = 0;
+ $self->{verbatim} = 1;
+ } elsif ( $t eq "*" ) {
+
+ # SidebarBlock
+ $wrapped_mode = 1;
+ } elsif ( $t eq "_" ) {
+
+ # QuoteBlock
+ if ( ( defined $self->{type} )
+ and ( $self->{type} eq "verse" ) )
+ {
+ $wrapped_mode = 0;
+ $self->{verbatim} = 1;
+ print STDERR "QuoteBlock verse\n" if $debug{parse};
+ } else {
+ $wrapped_mode = 1;
+ }
+ } elsif ( $t eq "=" ) {
+
+ # ExampleBlock
+ $wrapped_mode = 1;
+ } elsif ( $t eq "~" ) {
+
+ # Filter blocks, TBC: not translated
+ $wrapped_mode = 0;
+ $self->{verbatim} = 3;
+ }
+ $self->{type} = $type;
+ }
+ $paragraph = "";
+ $self->pushline( $line . "\n" );
+ }
+ } elsif ( ( not defined( $self->{verbatim} ) ) and ( $line =~ m/^\/\/(.*)/ ) ) {
+ my $comment = $1;
+ if ( $comment =~ m/^po4a: / ) {
+
+ # Po4a command line
+ $self->process_definition($comment);
+ } else {
+
+ # Comment line
+ push @comments, $comment;
+ }
+ do_paragraph( $self, $paragraph, $wrapped_mode ) if length($paragraph);
+ $paragraph = "";
+ $wrapped_mode = 1;
+
+ $self->pushline( $line . "\n" );
+ } elsif ( not defined $self->{verbatim}
+ and ( $line =~ m/^\[\[([^\]]*)\]\]$/ ) )
+ {
+ # Found BlockId
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ my $block_id = $1;
+ $paragraph = "";
+ $wrapped_mode = 1;
+ if ($block_id =~ m/^([^,]+),(.+)$/) {
+ # Found BlockId with a xlabel
+ my $xlabel = $2;
+ $block_id = $1;
+ $self->pushline( "[[$block_id," );
+ do_paragraph( $self, $xlabel, 0);
+ $self->pushline( "]]\n" );
+ } else {
+ $self->pushline( $line . "\n" );
+ }
+ undef $self->{bullet};
+ undef $self->{indent};
+ } elsif ( not defined $self->{verbatim}
+ and ( $paragraph eq "" )
+ and ( $line =~ m/^((?:$RE_STYLE_ADMONITION):\s+)(.*)$/ ) )
+ {
+ my $type = $1;
+ my $text = $2;
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = $text . "\n";
+ $wrapped_mode = 1;
+ $self->pushline($type);
+ undef $self->{bullet};
+ undef $self->{indent};
+ } elsif ( not defined $self->{verbatim}
+ and ( $line =~ m/^\[($RE_STYLES)\]$/ ) )
+ {
+ my $type = $1;
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = 1;
+ $self->pushline( $line . "\n" );
+ if ( $type eq "verse" ) {
+ $wrapped_mode = 0;
+ }
+ undef $self->{bullet};
+ undef $self->{indent};
+ } elsif ( not defined $self->{verbatim}
+ and ( $line =~ m/^\[[^\]]+\]$/ ) )
+ {
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ my $t = $self->parse_style($line);
+ $self->pushline("$t\n");
+ @comments = ();
+ $wrapped_mode = 1;
+ if ( $line =~ m/^\[(['"]?)(verse|quote)\1,/ ) {
+ $self->{type} = $2;
+ if ( $self->{type} eq 'verse' ) {
+ $wrapped_mode = 0;
+ }
+ print STDERR "Starting verse\n" if $debug{parse};
+ }
+ if ( ( ( $line =~ m/^\[format=(['"]?)(csv|tsv|dsv)\1,/ ) || ( $line =~ m/^\[separator=[^\|]/ ) )
+ && $self->{options}{'tablecells'} )
+ {
+ warn wrap_mod(
+ "$ref",
+ dgettext(
+ "po4a",
+ "Po4a's tablecells mode only supports PSV formatted tables with '|' separators. Disabling tablecells and falling back to block mode for this table."
+ )
+ );
+ $self->{disabletablecells} = 1;
+ }
+ undef $self->{bullet};
+ undef $self->{indent};
+ } elsif ( not defined $self->{verbatim}
+ and ( $line =~ m/^(\s*)([-%~\$[*_+`'#<>[:alnum:]\\"(|\{].*?)((?::::?|;;|\?\?|:-)(?: *\\)?)$/ ) )
+ {
+ my $indent = $1;
+ my $label = $2;
+ my $labelend = $3;
+
+ # Found labeled list
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = 1;
+ $self->{bullet} = "";
+ $self->{indent} = $indent;
+ my $t = $self->translate(
+ $label,
+ $self->{ref},
+ "Labeled list",
+ "comment" => join( "\n", @comments ),
+ "wrap" => 0
+ );
+ $self->pushline("$indent$t$labelend\n");
+ @comments = ();
+ } elsif ( not defined $self->{verbatim}
+ and ( $line =~ m/^(\s*)(\S.*?)((?::?::|;;)\s+)(.*)$/ ) )
+ {
+ my $indent = $1;
+ my $label = $2;
+ my $labelend = $3;
+ my $labeltext = $4;
+
+ # Found Horizontal Labeled Lists
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = $labeltext . "\n";
+ $wrapped_mode = 1;
+ $self->{bullet} = "";
+ $self->{indent} = $indent;
+ my $t = $self->translate(
+ $label,
+ $self->{ref},
+ "Labeled list",
+ "comment" => join( "\n", @comments ),
+ "wrap" => 0
+ );
+ $self->pushline("$indent$t$labelend");
+ @comments = ();
+ } elsif ( not defined $self->{verbatim}
+ and ( $line =~ m/^\:(\S.*?)(:\s*)(.*)$/ ) )
+ {
+ my $attrname = $1;
+ my $attrsep = $2;
+ my $attrvalue = $3;
+ my $linebreak = "";
+ while ( $attrvalue =~ s/ ([\\+])$//s ) {
+ $linebreak = $1;
+ # add a carriage return at the end of attrvalue if there is none
+ $attrvalue .= "\n" if $attrvalue !~ m/\n$/;
+ ( $line, $ref ) = $self->shiftline();
+ $ref =~ m/^(.*):[0-9]+$/;
+ $line =~ s/^\s+|\s+$//;
+ print STDERR "appending attribute with $line" if $debug{parse};
+ $attrvalue .= $line;
+ }
+ print STDERR "attr definition: $attrvalue\n" if $debug{parse};
+ # Found an Attribute entry
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = 1;
+ undef $self->{bullet};
+ undef $self->{indent};
+ if ( defined( $self->{translate}->{entry}->{$attrname} ) ) {
+ my $t = $self->translate(
+ $attrvalue,
+ $self->{ref},
+ "Attribute :$attrname:",
+ "comment" => join( "\n", @comments ),
+ "wrap" => 1
+ );
+ $t =~ s/\n/ \\\n/g;
+ $self->pushline(":$attrname$attrsep$t\n");
+ } else {
+ $attrvalue =~ s/\n/ $linebreak\n/g;
+ $self->pushline(":$attrname$attrsep$attrvalue\n");
+ }
+ @comments = ();
+ } elsif ( not defined $self->{verbatim}
+ and ( $line =~ m/^([\w\d][\w\d-]*)(::)(\S*|\S*\{.*\}\S*)\[(.*)\]$/ ) )
+ {
+ my $macroname = $1;
+ my $macrotype = $2;
+ my $macrotarget = $3;
+ my $macroparam = $4;
+
+ # Found a macro
+ # print STDERR "macro: $macroname|type: $macrotype|target: $macrotarget|param: $macroparam\n";
+
+ # Don't process include macros in tables, pass them through
+ if ( ( $macroname eq "include" )
+ and ( $macrotype eq '::' )
+ and ( defined( $self->{type} ) and ( $self->{type} eq "Table" ) ) )
+ {
+ $paragraph .= $line . "\n";
+ } elsif ( ( $macroname eq "include" || $macroname eq "ifeval" )
+ and ( $macrotype eq '::' ) )
+ {
+ $self->pushline( $line . "\n" );
+ } else {
+ if ( $macrotype eq '::' ) {
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = 1;
+ undef $self->{bullet};
+ undef $self->{indent};
+ }
+ my $t = $self->parse_macro( $macroname, $macrotype, $macrotarget, $macroparam );
+ $self->pushline("$t\n");
+ @comments = ();
+ }
+ } elsif ( not defined $self->{verbatim}
+ and ($paragraph =~ m/^\s*$/)
+ and ( $line !~ m/^\.\./ )
+ and ( $line =~ m/^\.(\S.*)$/ ) )
+ {
+ my $title = $1;
+
+ # Found block title
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = 1;
+ undef $self->{bullet};
+ undef $self->{indent};
+ my $t = $self->translate(
+ $title,
+ $self->{ref},
+ "Block title",
+ "comment" => join( "\n", @comments ),
+ "wrap" => 0
+ );
+ $self->pushline(".$t\n");
+ @comments = ();
+ } elsif ( not defined $self->{verbatim}
+ and ( $line =~ m/^(\s*)((?:(?:[-*o+\.]+(?:\s+\[[ xX\*]\])?)|(?:[0-9]+[.\)])|(?:[a-z][.\)])|\([0-9]+\))\s+)(.*)$/ ) )
+ {
+ my $indent = $1 || "";
+ my $bullet = $2;
+ my $text = $3;
+ print STDERR "Item (bullet: '$bullet')\n" if ( $debug{parse} );
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = $text . "\n";
+ $self->{indent} = $indent;
+ $self->{bullet} = $bullet;
+ } elsif ( not defined $self->{verbatim}
+ and ( $line =~ m/^((?:<?(?:[0-9]|\.)+)?> +)(.*)$/ ) )
+ {
+ my $bullet = $1;
+ my $text = $2;
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = $text . "\n";
+ $self->{indent} = "";
+ $self->{bullet} = $bullet;
+ } elsif ( not defined $self->{verbatim}
+ and ( $line eq " +") ) {
+ $paragraph .= $line . "\n";
+ } elsif ( ( $line =~ /^\s*$/ ) and ( !defined( $self->{type} ) or ( $self->{type} ne "Table" ) ) ) {
+
+ # When not in table, empty lines or lines containing only spaces do break paragraphs
+ print STDERR "Empty new line. Wrap: " . ( defined( $self->{verbatim} ) ? "yes. " : "no. " ) . "\n"
+ if $debug{parse};
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = 1 unless defined( $self->{verbatim} );
+ $self->pushline( $line . "\n" );
+
+ } elsif ( ( $line =~ /^\s*$/ ) ) {
+
+ # When in table, empty lines are either added to the current paragraph if it not empty, or pushed verbatim if not
+ if ( length $paragraph ) {
+ $paragraph .= $line . "\n";
+ } else {
+ $self->pushline( $line . "\n" );
+ }
+
+ # print STDERR ">>$paragraph<<\n";
+ } elsif ( not defined $self->{verbatim}
+ and ( defined $self->{bullet} and $line =~ m/^(\s+)(.*)$/ ) )
+ {
+ my $indent = $1;
+ my $text = $2;
+ print STDERR "bullet (" . ( $self->{bullet} ) . ") starting with " . length($indent) . " spaces\n"
+ if $debug{'parse'};
+ if ( $paragraph eq "" && length( $self->{bullet} ) && length($indent) ) {
+
+ # starting a paragraph with a bullet (not an enum or so), and indented.
+ # Thus a literal paragraph in a list.
+ $wrapped_mode = 0;
+ }
+ if ( not defined $self->{indent} ) {
+
+ # No indent level before => Starting a paragraph?
+ $paragraph .= $text . "\n";
+ $self->{indent} = $indent;
+ print STDERR "Starting a paragraph\n" if ( $debug{parse} );
+ } elsif ( length($paragraph)
+ and ( length( $self->{bullet} ) + length( $self->{indent} ) == length($indent) ) )
+ {
+ # same indent level as before: append
+ $paragraph .= $text . "\n";
+ } elsif ( length($paragraph)
+ and ( length( $self->{bullet} ) == 0 ) )
+ {
+ # definition list continuation
+ $paragraph .= $text . "\n";
+ $self->{indent} = "";
+ print STDERR " definition list continuation\n" if ( $debug{parse} );
+ } else {
+
+ # not the same indent level: start a new translated paragraph
+ print STDERR "New paragraph (indent: '" . ( $self->{indent} ) . "')\n" if ( $debug{parse} );
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ if ( length( $self->{indent} ) > 0 && length( $self->{indent} ) < length($indent) ) {
+
+ # increase indentation: the new block must not be wrapped
+ $wrapped_mode = 0;
+ }
+ $paragraph = $text . "\n";
+ $self->{indent} = $indent;
+ $self->{bullet} = "";
+ }
+ } elsif ( $line =~ /^-- $/ ) {
+
+ # Break paragraphs on email signature hint
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = 1;
+ $self->pushline( $line . "\n" );
+ } elsif ( $line =~ /^=+$/
+ or $line =~ /^_+$/
+ or $line =~ /^-+$/ )
+ {
+ $wrapped_mode = 0;
+ $paragraph .= $line . "\n";
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = 1;
+ } elsif ( $paragraph ne ""
+ && $self->{bullet}
+ && length( $self->{indent} || "" ) == 0
+ && ( $line =~ m/^(\s*)((?:[-*o+]+|([0-9]+[.\)])|\([0-9]+\))\s+)/s ) )
+ {
+ # If the next line starts with a bullet, process this immediately and setup the next line
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = 0;
+ $self->unshiftline( $line, $ref );
+ $line = "";
+ undef $self->{bullet};
+ undef $self->{indent};
+ } elsif ( $line =~ /^\|===/ ) {
+
+ # This is a table, treat it as a non-wrapped paragraph
+ print STDERR "Found Table delimiter\n" if ( $debug{parse} );
+ if ( ( $paragraph eq "" ) or ( defined( $self->{type} ) and ( $self->{type} =~ /^delimited block/i ) ) ) {
+
+ # Start the table
+ $wrapped_mode = 0;
+ $self->{type} = "Table";
+ } else {
+
+ # End the Table
+ if ( $self->{options}{'tablecells'}
+ and not defined $self->{disabletablecells} )
+ {
+ do_stripped_unwrapped_paragraph( $self, $paragraph, $wrapped_mode );
+ $self->pushline("\n");
+ } else {
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ }
+ undef $self->{verbatim};
+ undef $self->{type};
+ undef $self->{disabletablecells};
+ $paragraph = "";
+ }
+ $self->pushline( $line . "\n" );
+ } else {
+
+ # A stupid paragraph of text
+ print STDERR "Regular line. "
+ . "Bullet: '"
+ . ( defined( $self->{bullet} ) ? $self->{bullet} : 'none' ) . "'; "
+ . "Indent: '"
+ . ( defined( $self->{indent} ) ? $self->{indent} : 'none' ) . "'\n"
+ if ( $debug{parse} );
+
+ if ( $line =~ /^\s/ ) {
+
+ # A line starting by a space indicates a non-wrap
+ # paragraph
+ $wrapped_mode = 0;
+ }
+
+ if ( ( $paragraph ne "" && $self->{bullet} && length( $self->{indent} || "" ) == 0 ) )
+ {
+ if ( ( !$self->{options}{'nolinting'} ) && ($paragraph !~ m/ \+\n/ ) ) {
+ # Second line of an item block is not indented. It is unindented
+ # (and allowed) additional text or a new list item.
+ warn wrap_mod(
+ "$ref",
+ dgettext(
+ "po4a",
+ "It seems that you are adding unindented content to an item. "
+ . "The standard allows this, but you may still want to change your document "
+ . "to use indented text to provide better visual clues to writers."
+ )
+ );
+ }
+ } else {
+ undef $self->{bullet};
+ undef $self->{indent};
+ }
+
+ # TODO: comments
+ $paragraph .= $line . "\n";
+ }
+
+ ( $line, $ref ) = $self->shiftline();
+ }
+ if ( length $paragraph ) {
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ }
+}
+
+sub do_stripped_unwrapped_paragraph {
+ my ( $self, $paragraph, $wrap ) = ( shift, shift, shift );
+ my $type = shift || $self->{type} || "Plain text";
+ my ( $pre, $trans, $post ) = $paragraph =~ /^(\s*)(.*?)(\s*)$/s;
+ $self->pushline($pre);
+ do_paragraph( $self, $trans, $wrap, $type );
+ $self->pushline($post);
+}
+
+sub do_paragraph {
+ my ( $self, $paragraph, $wrap ) = ( shift, shift, shift );
+ my $type = shift || $self->{type} || "Plain text";
+ return if ( $paragraph eq "" );
+
+ # DEBUG
+ # my $b;
+ # if (defined $self->{bullet}) {
+ # $b = $self->{bullet};
+ # } else {
+ # $b = "UNDEF";
+ # }
+ # $type .= " verbatim: '".($self->{verbatim}||"NONE")."' bullet: '$b' indent: '".($self->{indent}||"NONE")."' type: '".($self->{type}||"NONE")."'";
+
+ if ( not defined $self->{verbatim} ) {
+
+ # Detect bullets
+ # | * blah blah
+ # |<spaces> blah
+ # | ^-- aligned
+ # <empty line>
+ #
+ # Other bullets supported:
+ # - blah o blah + blah
+ # 1. blah 1) blah (1) blah
+ TEST_BULLET:
+ if ( $paragraph =~ m/^(\s*)((?:(?:[-*o+](?:\s+\[[ Xx\*]\])?)|([0-9]+[.\)])|\([0-9]+\))\s+)([^\n]*\n)(.*)$/s ) {
+ my $para = $5;
+ my $bullet = $2;
+ my $indent1 = $1;
+ my $indent2 = "$1" . ( ' ' x length $bullet );
+ my $text = $4;
+ while ( $para !~ m/$indent2(?:(?:[-*o+](?:\\s+[[ Xx\*]\])?)|([0-9]+[.\)])|\([0-9]+\))\s+/
+ and $para =~ s/^$indent2(\S[^\n]*\n)//s )
+ {
+ $text .= $1;
+ }
+
+ # TODO: detect if a line starts with the same bullet
+ if ( $text !~ m/\S[ \t][ \t][ \t]+\S/s ) {
+ my $bullet_regex = quotemeta( $indent1 . $bullet );
+ $bullet_regex =~ s/[0-9]+/\\d\+/;
+ if ( $para eq '' or $para =~ m/^$bullet_regex\S/s ) {
+ my $trans = $self->translate(
+ $text,
+ $self->{ref},
+ "Bullet: '$indent1$bullet'",
+ "wrap" => 1,
+ "wrapcol" => -( length $indent2 )
+ );
+ $trans =~ s/^/$indent1$bullet/s;
+ $trans =~ s/\n(.)/\n$indent2$1/sg;
+ $self->pushline( $trans . "\n" );
+ if ( $para eq '' ) {
+ return;
+ } else {
+
+ # Another bullet
+ $paragraph = $para;
+ goto TEST_BULLET;
+ }
+ }
+ }
+ }
+ }
+
+ my $end = "";
+ if ($wrap) {
+ $paragraph =~ s/(\n*)$//s;
+ $end = $1 || "";
+ }
+ if ( defined $self->{bullet} ) {
+ my $bullet = $self->{bullet};
+ my $indent1 = $self->{indent};
+ $self->pushline($indent1 . $bullet);
+ }
+ $paragraph = $self->translate_indexterms($paragraph);
+
+ my $t = $self->translate(
+ $paragraph,
+ $self->{ref},
+ $type,
+ "comment" => join( "\n", @comments ),
+ "wrap" => $wrap
+ );
+ my $bullet = $self->{bullet} || "";
+ # print STDERR "translated: '$t', $bullet\n";
+
+ my $unwrap_result = !$self->{options}{'forcewrap'} && $wrap && (! ($t =~ /\+\n/) ) ;
+ if ($unwrap_result) {
+ $t =~ s/[\n ]+/ /g;
+ }
+
+ @comments = ();
+ if ( ( defined $self->{bullet} ) && !($t =~ /\+\n/ ) ) {
+ my $bullet = $self->{bullet};
+ my $indent1 = $self->{indent};
+ my $indent2 = $indent1 . ( ' ' x length($bullet) );
+ $t =~ s/\n(.)/\n$indent2$1/sg;
+ }
+ $self->pushline( $t . $end );
+}
+
+sub translate_indexterms {
+ my ($self, $paragraph) = @_;
+ $paragraph = $self->translate_in_regex($paragraph, qr/\(\(\(([^\)]+)\)\)\)/);
+ return $self->translate_in_regex($paragraph, qr/indexterm:\[([^\]]+)\]/);
+}
+
+sub translate_in_regex {
+ # Detect index entries and translate them separately.
+ # They are moved in front of the paragraph, regardless of their original location,
+ # but that's consistant with the specification.
+ my ($self, $paragraph, $pattern) = @_;
+ if ( my @indexes = ($paragraph =~ m/$pattern/g ) ) {
+ for my $index (@indexes) {
+ my @terms = ();
+ while (
+ $index =~ m/\G(
+ "(?:[^"\\])+" # quoted term
+ | (?:[^,\\])+ # unquoted term
+ )(,\s*+)?/gx
+ )
+ {
+ my $term = $1;
+ if ( $term =~ /^"(.*)"$/ ) {
+ push @terms, '"' . ($self->translate(
+ $1,
+ $self->{ref},
+ "Index entry",
+ "wrap" => 1,
+ "wrapcol" => 0)) . '"';
+ } else {
+ push @terms, $self->translate(
+ $term,
+ $self->{ref},
+ "Index entry",
+ "wrap" => 1,
+ "wrapcol" => 0);
+ }
+ }
+ $self->pushline("(((" . join (",", @terms) . ")))");
+
+ }
+ }
+ $paragraph =~ s/$pattern\n?//g;
+ return $paragraph;
+}
+
+sub parse_style {
+ my ( $self, $text ) = ( shift, shift );
+ $text =~ s/^\[//;
+ $text =~ s/\]$//;
+ $text =~ m/^([^=,]+)/;
+ if ( defined($1) && $self->is_unsplitted_attributelist( $1, 'style' ) ) {
+ my $t = $self->translate(
+ $text,
+ $self->{ref},
+ "Unsplitted AttributeList",
+ "comment" => join( "\n", @comments ),
+ "wrap" => 0
+ );
+ return "[$t]";
+ }
+ my @attributes = $self->split_attributelist($text);
+ return "[" . join( ", ", $self->join_attributelist( "style", @attributes ) ) . "]";
+}
+
+sub parse_macro {
+ my ( $self, $macroname, $macrotype, $macrotarget, $macroparam ) = ( shift, shift, shift, shift, shift );
+ if ( $self->is_unsplitted_attributelist( $macroname, 'macro' ) ) {
+ my $t = $self->translate(
+ "$macroname$macrotype$macrotarget\[$macroparam\]",
+ $self->{ref},
+ "Unsplitted macro call",
+ "comment" => join( "\n", @comments ),
+ "wrap" => 0
+ );
+ return $t;
+ }
+ my @attributes = ();
+ @attributes = $self->split_attributelist($macroparam) unless $macroparam eq "";
+
+ unshift @attributes, $macroname;
+ my @translated_attributes = $self->join_attributelist( "macro", @attributes );
+ shift @translated_attributes;
+ if ( $self->is_translated_target($macroname) ) {
+ my $target = unquote_space($macrotarget);
+ my $t = $self->translate(
+ $target,
+ $self->{ref},
+ "Target for macro $macroname",
+ "comment" => join( "\n", @comments ),
+ "wrap" => 0
+ );
+ $macrotarget = quote_space($t);
+ }
+ return "$macroname$macrotype$macrotarget\[" . join( ", ", @translated_attributes ) . "]";
+}
+
+sub split_attributelist {
+ my ( $self, $text ) = ( shift, shift );
+
+ print STDERR "Splitting attributes in: $text\n" if $debug{split_attributelist};
+ my @attributes = ();
+ while (
+ $text =~ m/\G(
+ [^\W\d][-\w]*="(?:[^"\\]++|\\.)*+" # named attribute
+ | [^\W\d][-\w]*=None # undefined named attribute
+ | [^\W\d][-\w]*=\S+ # invalid, but accept it anyway
+ | "(?:[^"\\]++|\\.)*+" # quoted attribute
+ | (?:[^,\\]++|\\.)++ # unquoted attribute
+ | ^$ # Empty attribute list allowed
+
+ )(?:,\s*+)?/gx
+ )
+ {
+ print STDERR " -> $1\n" if $debug{split_attributelist};
+ push @attributes, $1;
+ }
+ return @attributes;
+}
+
+sub join_attributelist {
+ my ( $self, $type ) = ( shift, shift );
+ my @attributes = @_;
+ my $command = shift(@attributes);
+ my $position;
+ if ( $type eq 'macro' ) {
+ $position = 0; # macroname is passed through the first attribute
+ } else {
+ $position = 1;
+ }
+ my @text = ($command);
+ if ( $command =~ m/=/ ) {
+ my $attr = $command;
+ $command =~ s/=.*//;
+ @text = ();
+ push @text, $self->translate_attributelist( $type, $command, $position, $attr );
+ }
+ foreach my $attr (@attributes) {
+ $position++;
+ push @text, $self->translate_attributelist( $type, $command, $position, $attr );
+ }
+ print STDERR "Joined attributes: " . join( ", ", @text ) . "\n" if $debug{join_attributelist};
+ return @text;
+}
+
+sub translate_attributelist {
+ my ( $self, $type, $command, $count, $attr ) = ( shift, shift, shift, shift, shift );
+ return $attr unless defined $self->{translate}->{$type}->{$command};
+ if ( $attr =~ m/^([^\W\d][-\w]*)=(.*)/ ) {
+ my $attrname = $1;
+ my $attrvalue = $2;
+ if ( $self->{translate}->{$type}->{$command} =~ m/,$attrname,/ ) {
+ my $value = unquote($attrvalue);
+ my $t = $self->translate(
+ $value,
+ $self->{ref},
+ "Named '$attrname' AttributeList argument for $type '$command'",
+ "comment" => join( "\n", @comments ),
+ "wrap" => 0
+ );
+ if ( $attrvalue eq 'None' && $t eq 'None' ) {
+ $attr = $attrname . "=None";
+ } else {
+ $attr = $attrname . "=" . quote($t);
+ }
+ }
+ } else {
+ if ( $self->{translate}->{$type}->{$command} =~ m/,$count,/ ) {
+ my $attrvalue = unquote($attr);
+ my $t = $self->translate(
+ $attrvalue,
+ $self->{ref},
+ "Positional (\$$count) AttributeList argument for $type '$command'",
+ "comment" => join( "\n", @comments ),
+ "wrap" => 0
+ );
+ $attr = quote($t);
+ }
+ }
+ return $attr;
+}
+
+sub unquote {
+ my ($text) = shift;
+ return $text unless $text =~ s/^"(.*)"$/$1/;
+ $text =~ s/\\"/"/g;
+ return $text;
+}
+
+sub quote {
+ my $text = shift;
+ $text =~ s/"/\\"/g;
+ return '"' . $text . '"';
+}
+
+sub quote_space {
+ my $text = shift;
+ $text =~ s/ /%20/g;
+ return $text;
+}
+
+sub unquote_space {
+ my $text = shift;
+ $text =~ s/%20/ /g;
+ return $text;
+}
+
+=head1 STATUS OF THIS MODULE
+
+Tested successfully on simple AsciiDoc files.
+
+=head1 AUTHORS
+
+ Nicolas François <nicolas.francois@centraliens.net>
+ Denis Barbier <barbier@linuxfr.org>
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright © 2005-2008 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.
+ Copyright © 2012 Denis BARBIER <barbier@linuxfr.org>.
+ Copyright © 2017 Martin Quinson <mquinson#debian.org>.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
+
+1;
+
+__END__
+
+# LocalWords: Charset charset AsciiDoc tablecells po UTF gettext msgid nostrip
diff --git a/lib/Locale/Po4a/BibTeX.pm b/lib/Locale/Po4a/BibTeX.pm
new file mode 100644
index 0000000..d4e73be
--- /dev/null
+++ b/lib/Locale/Po4a/BibTeX.pm
@@ -0,0 +1,146 @@
+#!/usr/bin/perl -w
+
+# Po4a::BibTeX.pm
+#
+# extract and translate translatable strings from BibTeX documents
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+########################################################################
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::BibTeX - convert BibTeX documents from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::BibTeX is a module to help the translation of
+bibliographies in the BibTeX format into other [human] languages.
+
+Fields values are extracted and proposed for translation.
+
+=head1 OPTIONS ACCEPTED BY THIS MODULE
+
+NONE.
+
+=head1 STATUS OF THIS MODULE
+
+It is a very simple module, but still young.
+
+=cut
+
+package Locale::Po4a::BibTeX;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+require Exporter;
+use vars qw(@ISA @EXPORT);
+@ISA = qw(Locale::Po4a::TransTractor);
+@EXPORT = qw();
+
+use Locale::Po4a::TransTractor;
+use Locale::Po4a::Common;
+
+sub initialize { }
+
+sub parse {
+ my $self = shift;
+ my ( $line, $ref );
+ my $paragraph = "";
+ my $field = "";
+ my $id = "";
+ my $wrapped_mode = 1;
+ ( $line, $ref ) = $self->shiftline();
+ while ( defined($line) ) {
+ chomp($line);
+
+ #print "tutu: '$line'\n";
+ $self->{ref} = "$ref";
+ if ( $id eq ""
+ and $line =~ m/^\@.*?\s*\{\s*(.*),\s*$/ )
+ {
+ $id = $1;
+ $self->pushline( $line . "\n" );
+ } elsif ( $id ne ""
+ and $field eq ""
+ and $line =~ m/^((.*?)\s*=\s*)([^ "{].*?)(\s*,?\s*)$/ )
+ {
+ my $end = ( defined $4 ) ? $4 : "";
+ $self->pushline( $1 . $self->translate( $3, $self->{ref}, "$2 ($id)", "wrap" => 1 ) . $end . "\n" );
+ $field = "";
+ $paragraph = "";
+ } elsif ( $id ne ""
+ and $field eq ""
+ and $line =~ m/^((.*?)\s*=\s*)(.*)$/ )
+ {
+ $field = $2;
+ $paragraph = $3 . "\n";
+ $self->pushline($1);
+ } elsif ( $field ne "" ) {
+ $paragraph .= "$line\n";
+ } elsif ( $line =~ m/^\s*(\%.*)?$/ ) {
+ $self->pushline( $line . "\n" );
+ } elsif ( $line =~ m/^\s*\}\s*$/ ) {
+ $self->pushline( $line . "\n" );
+ $id = "";
+ } else {
+ print "unsupported line: '$line'\n";
+ }
+ if ( $paragraph =~ m/^(\s*\{)(.*)(\}\s*,?\s*)$/s
+ or $paragraph =~ m/^(\s*")(.*)("\s*,?\s*)$/s
+ or $paragraph =~ m/^(\s*)([^ "{].*)(\s*,?\s*)$/s )
+ {
+ $self->pushline( $1 . $self->translate( $2, $self->{ref}, "$field ($id)", "wrap" => 1 ) . $3 );
+ $field = "";
+ $paragraph = "";
+ }
+ ( $line, $ref ) = $self->shiftline();
+ }
+ if ( $paragraph =~ m/^(\s*\{)(.*)(\}\s*,?\s*)$/s
+ or $paragraph =~ m/^(\s*")(.*)("\s*,?\s*)$/s
+ or $paragraph =~ m/^(\s*)(.*)(\s*,?\s*)$/s )
+ {
+ $self->pushline( $self->translate( $1, $self->{ref}, "$field ($id)", "wrap" => 1 ) . $2 );
+ $field = "";
+ $paragraph = "";
+ }
+}
+
+sub do_paragraph {
+ my ( $self, $paragraph, $wrap ) = ( shift, shift, shift );
+ $self->pushline( $self->translate( $paragraph, $self->{ref}, "Plain text", "wrap" => $wrap ) );
+}
+
+1;
+
+=head1 AUTHORS
+
+ Nicolas François <nicolas.francois@centraliens.net>
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright © 2006 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
diff --git a/lib/Locale/Po4a/Chooser.pm b/lib/Locale/Po4a/Chooser.pm
new file mode 100644
index 0000000..fa96084
--- /dev/null
+++ b/lib/Locale/Po4a/Chooser.pm
@@ -0,0 +1,178 @@
+# Locale::Po4a::Chooser -- Manage po4a modules
+#
+# This program is free software; you may redistribute it and/or modify it
+# under the terms of GPL v2.0 or later (see COPYING).
+#
+# This module converts POD to PO file, so that it becomes possible to
+# translate POD formatted documentation. See gettext documentation for
+# more info about PO files.
+
+############################################################################
+# Modules and declarations
+############################################################################
+
+package Locale::Po4a::Chooser;
+
+use 5.16.0;
+use strict;
+use warnings;
+use Locale::Po4a::Common;
+
+sub new {
+ my ($module) = shift;
+ my (%options) = @_;
+
+ die wrap_mod( "po4a::chooser", gettext("Please provide a module name") )
+ unless defined $module;
+
+ my $modname;
+ if ( $module eq 'kernelhelp' ) {
+ $modname = 'KernelHelp';
+ } elsif ( $module eq 'newsdebian' ) {
+ $modname = 'NewsDebian';
+ } elsif ( $module eq 'latex' ) {
+ $modname = 'LaTeX';
+ } elsif ( $module eq 'bibtex' ) {
+ $modname = 'BibTex';
+ } elsif ( $module eq 'tex' ) {
+ $modname = 'TeX';
+ } elsif ( $module eq 'asciidoc' ) {
+ $modname = 'AsciiDoc';
+ } elsif ( $module eq 'Rd' || $module eq 'rubydoc' ) {
+ $modname = 'RubyDoc';
+ } else {
+ $modname = ucfirst($module);
+ }
+ if ( !UNIVERSAL::can( "Locale::Po4a::$modname", 'new' ) ) {
+ eval qq{use Locale::Po4a::$modname};
+ if ($@) {
+ my $error = $@;
+ warn wrap_msg( gettext("Unknown format type: %s."), $module );
+ warn wrap_mod( "po4a::chooser", gettext("Module loading error: %s"), $error )
+ unless defined $options{'quiet'};
+ list(1);
+ }
+ }
+ return "Locale::Po4a::$modname"->new(%options);
+}
+
+sub list {
+ warn wrap_msg(
+ gettext("List of valid formats:")
+
+ # ."\n - ".gettext("bibtex: BibTex bibliography format.")
+ . "\n - "
+ . gettext("asciidoc: AsciiDoc format.")
+ . "\n - "
+ . gettext("dia: uncompressed Dia diagrams.")
+ . "\n - "
+ . gettext("docbook: DocBook XML.")
+ . "\n - "
+ . gettext("guide: Gentoo Linux's XML documentation format.")
+ . "\n - "
+ . gettext("ini: INI format.")
+ . "\n - "
+ . gettext("kernelhelp: Help messages of each kernel compilation option.")
+ . "\n - "
+ . gettext("latex: LaTeX format.")
+ . "\n - "
+ . gettext("man: Good old manual page format.")
+ . "\n - "
+ . gettext("pod: Perl Online Documentation format.")
+ . "\n - "
+ . gettext("rubydoc: Ruby Documentation (RD) format.")
+ . "\n - "
+ . gettext("sgml: either DebianDoc or DocBook DTD.")
+ . "\n - "
+ . gettext("texinfo: The info page format.")
+ . "\n - "
+ . gettext("tex: generic TeX documents (see also latex).")
+ . "\n - "
+ . gettext("text: simple text document.")
+ . "\n - "
+ . gettext("wml: WML documents.")
+ . "\n - "
+ . gettext("xhtml: XHTML documents.")
+ . "\n - "
+ . gettext("xml: generic XML documents (see also docbook).")
+ . "\n - "
+ . gettext("yaml: YAML documents.")
+ );
+ exit shift;
+}
+##############################################################################
+# Module return value and documentation
+##############################################################################
+
+1;
+__END__
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Chooser - manage po4a modules
+
+=head1 DESCRIPTION
+
+Locale::Po4a::Chooser is a module to manage po4a modules. Previously, all po4a
+binaries used to know all po4a modules (pod, man, sgml, etc). This made the
+addition of a new module boring, because you had to make sure that the documentation is synchronized
+in all modules, and that each of them can access the new module.
+
+Now, you just have to call the Locale::Po4a::Chooser::new() function,
+passing the name of module as argument.
+
+The function Locale::Po4a::Chooser::list() lists the available
+formats, and exits with the value passed as argument. So, we call
+Locale::Po4a::Chooser::list(0) when requested for the list of
+formats, and Locale::Po4a::Chooser::list(1) when passed an invalid
+format name.
+
+=head1 SEE ALSO
+
+=over 4
+
+=item About po4a:
+
+L<Locale::Po4a::Po(3pm)>,
+L<Locale::Po4a::TransTractor(3pm)>,
+L<po4a(7)|po4a.7>
+
+=item About modules:
+
+L<Locale::Po4a::Dia(3pm)>,
+L<Locale::Po4a::Docbook(3pm)>,
+L<Locale::Po4a::Guide(3pm)>,
+L<Locale::Po4a::Halibut(3pm)>,
+L<Locale::Po4a::Ini(3pm)>,
+L<Locale::Po4a::KernelHelp(3pm)>,
+L<Locale::Po4a::LaTeX(3pm)>,
+L<Locale::Po4a::Man(3pm)>,
+L<Locale::Po4a::Pod(3pm)>,
+L<Locale::Po4a::RubyDoc(3pm)>,
+L<Locale::Po4a::Sgml(3pm)>,
+L<Locale::Po4a::TeX(3pm)>,
+L<Locale::Po4a::Texinfo(3pm)>,
+L<Locale::Po4a::Text(3pm)>,
+L<Locale::Po4a::Wml(3pm)>.
+L<Locale::Po4a::Xhtml(3pm)>,
+L<Locale::Po4a::Xml(3pm)>,
+L<Locale::Po4a::Wml(3pm)>,
+L<Locale::Po4a::Yaml(3pm)>.
+
+=back
+
+=head1 AUTHORS
+
+ Denis Barbier <barbier@linuxfr.org>
+ Martin Quinson (mquinson#debian.org)
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright © 2002-2005, 2014, 2017 SPI, Inc.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
diff --git a/lib/Locale/Po4a/Common.pm b/lib/Locale/Po4a/Common.pm
new file mode 100644
index 0000000..1b09adc
--- /dev/null
+++ b/lib/Locale/Po4a/Common.pm
@@ -0,0 +1,252 @@
+# Locale::Po4a::Common -- Common parts of the po4a scripts and utils
+#
+# Copyright © 2005 Jordi Vilalta <jvprat@gmail.com>
+#
+# This program is free software; you may redistribute it and/or modify it
+# under the terms of GPL v2.0 or later (see COPYING).
+#
+# This module has common utilities for the various scripts of po4a
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Common - common parts of the po4a scripts and utils
+
+=head1 DESCRIPTION
+
+Locale::Po4a::Common contains common parts of the po4a scripts and some useful
+functions used along the other modules.
+
+If needed, you can disable the use of Text::WrapI18N as such:
+
+ use Locale::Po4a::Common qw(nowrapi18n);
+ use Locale::Po4a::Text;
+
+instead of:
+
+ use Locale::Po4a::Text;
+
+The ordering is important here: as most Locale::Po4a modules load themselves
+Locale::Po4a::Common, the first time this module is loaded determines whether Text::WrapI18N is used.
+
+=cut
+
+package Locale::Po4a::Common;
+
+require Exporter;
+use vars qw(@ISA @EXPORT);
+@ISA = qw(Exporter);
+@EXPORT = qw(wrap_msg wrap_mod wrap_ref_mod textdomain gettext dgettext);
+
+use 5.16.0;
+use strict;
+use warnings;
+
+sub import {
+ my $class = shift;
+
+ my $wrapi18n = 1;
+ if ( exists $_[0] && defined $_[0] && $_[0] eq 'nowrapi18n' ) {
+ shift;
+ $wrapi18n = 0;
+ }
+ $class->export_to_level( 1, $class, @_ );
+
+ return if defined &wrapi18n;
+
+ if ( $wrapi18n && -t STDERR && -t STDOUT && eval { require Text::WrapI18N } ) {
+
+ # Don't bother determining the wrap column if we cannot wrap.
+ my $col = $ENV{COLUMNS};
+ if ( !defined $col ) {
+ my @term = eval "use Term::ReadKey; Term::ReadKey::GetTerminalSize()";
+ $col = $term[0] if ( !$@ );
+
+ # If GetTerminalSize() failed we will fallback to a safe default.
+ # This can happen if Term::ReadKey is not available
+ # or this is a terminal-less build or such strange condition.
+ }
+ $col = 76 if ( !defined $col );
+
+ eval ' use Text::WrapI18N qw($columns);
+ $columns = $col;
+ ';
+
+ eval ' sub wrapi18n($$$) { Text::WrapI18N::wrap($_[0],$_[1],$_[2]) } ';
+ } else {
+
+ # If we cannot wrap, well, that's too bad. Survive anyway.
+ eval ' sub wrapi18n($$$) { $_[0].$_[2] } ';
+ }
+}
+
+sub min($$) {
+ return $_[0] < $_[1] ? $_[0] : $_[1];
+}
+
+=head1 FUNCTIONS
+
+=head2 Showing output messages
+
+=over
+
+=item
+
+show_version($)
+
+Shows the current version of the script, and a short copyright message. It
+takes the name of the script as an argument.
+
+=cut
+
+sub show_version {
+ my $name = shift;
+
+ print sprintf(
+ gettext(
+ "%s version %s.\n"
+ . "Written by Martin Quinson and Denis Barbier.\n\n"
+ . "Copyright © 2002-2022 Software in the Public Interest, Inc.\n"
+ . "This is free software; see source code for copying\n"
+ . "conditions. There is NO warranty; not even for\n"
+ . "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+ ),
+ $name,
+ $Locale::Po4a::TransTractor::VERSION
+ ) . "\n";
+}
+
+=item
+
+wrap_msg($@)
+
+This function displays a message the same way as sprintf() does, but wraps
+the result so that they look nice on the terminal.
+
+=cut
+
+sub wrap_msg($@) {
+ my $msg = shift;
+ my @args = @_;
+
+ # print "'$msg' ; ".(scalar @args)." $args[0] $args[1]\n";
+ return wrapi18n( "", "", sprintf( $msg, @args ) ) . "\n";
+}
+
+=item
+
+wrap_mod($$@)
+
+This function works like wrap_msg(), but it takes a module name as the first
+argument, and leaves a space at the left of the message.
+
+=cut
+
+sub wrap_mod($$@) {
+ my ( $mod, $msg ) = ( shift, shift );
+ my @args = @_;
+
+ $mod .= ": ";
+ my $spaces = " " x min( length($mod), 15 );
+ return wrapi18n( $mod, $spaces, sprintf( $msg, @args ) ) . "\n";
+}
+
+=item
+
+wrap_ref_mod($$$@)
+
+This function works like wrap_msg(), but it takes a file:line reference as the
+first argument, a module name as the second one, and leaves a space at the left
+of the message.
+
+=back
+
+=cut
+
+sub wrap_ref_mod($$$@) {
+ my ( $ref, $mod, $msg ) = ( shift, shift, shift );
+ my @args = @_;
+
+ if ( !$mod ) {
+
+ # If we don't get a module name, show the message like wrap_mod does
+ return wrap_mod( $ref, $msg, @args );
+ } else {
+ $ref .= ": ";
+ my $spaces = " " x min( length($ref), 15 );
+ $msg = "$ref($mod)\n$msg";
+ return wrapi18n( "", $spaces, sprintf( $msg, @args ) ) . "\n";
+ }
+}
+
+=head2 Wrappers for other modules
+
+=over
+
+=item
+
+Locale::Gettext
+
+When the Locale::Gettext module cannot be loaded, this module provide dummy
+(empty) implementation of the following functions. In that case, po4a
+messages won't get translated but the program will continue to work.
+
+If Locale::gettext is present, this wrapper also calls
+setlocale(LC_MESSAGES, "") so callers don't depend on the POSIX module
+either.
+
+=over
+
+=item
+
+bindtextdomain($$)
+
+=item
+
+textdomain($)
+
+=item
+
+gettext($)
+
+=item
+
+dgettext($$)
+
+=back
+
+=back
+
+=cut
+
+BEGIN {
+ if ( eval { require Locale::gettext } ) {
+ import Locale::gettext;
+ require POSIX;
+ POSIX::setlocale( &POSIX::LC_MESSAGES, '' );
+ } else {
+ eval '
+ sub bindtextdomain($$) { }
+ sub textdomain($) { }
+ sub gettext($) { shift }
+ sub dgettext($$) { return $_[1] }
+ '
+ }
+}
+
+1;
+__END__
+
+=head1 AUTHORS
+
+ Jordi Vilalta <jvprat@gmail.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright © 2005 SPI, Inc.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
diff --git a/lib/Locale/Po4a/Dia.pm b/lib/Locale/Po4a/Dia.pm
new file mode 100644
index 0000000..2652edc
--- /dev/null
+++ b/lib/Locale/Po4a/Dia.pm
@@ -0,0 +1,114 @@
+#!/usr/bin/perl
+
+# Po4a::Dia.pm
+#
+# extract and translate translatable strings from Dia diagrams.
+#
+# This code extracts plain text from string tags on uncompressed Dia
+# diagrams.
+#
+# Copyright © 2004 Jordi Vilalta <jvprat@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+########################################################################
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Dia - convert uncompressed Dia diagrams from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::Dia is a module to help the translation of diagrams in the
+uncompressed Dia format into other [human] languages.
+
+You can get Dia (the graphical editor for these diagrams) from:
+ http://www.gnome.org/projects/dia/
+
+=head1 TRANSLATING WITH PO4A::DIA
+
+This module only translates uncompressed Dia diagrams. You can save your
+uncompressed diagrams with Dia itself, unchecking the "Compress diagram
+files" at the "Save Diagram" dialog.
+
+Another way is to uncompress the dia files from command line with:
+ gunzip < original.dia > uncompressed.dia
+
+=head1 STATUS OF THIS MODULE
+
+This module is fully functional, as it relies in the L<Locale::Po4a::Xml>
+module. This only defines the translatable tags (E<lt>dia:stringE<gt>), and
+filters the internal strings (the content of the E<lt>dia:diagramdataE<gt>
+tag), not interesting for translation.
+
+=head1 SEE ALSO
+
+L<Locale::Po4a::TransTractor(3pm)>, L<Locale::Po4a::Xml(3pm)>, L<po4a(7)|po4a.7>
+
+=head1 AUTHORS
+
+ Jordi Vilalta <jvprat@gmail.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright © 2004 Jordi Vilalta <jvprat@gmail.com>
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
+
+package Locale::Po4a::Dia;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+use Locale::Po4a::Common;
+use Locale::Po4a::Xml;
+
+use vars qw(@ISA);
+@ISA = qw(Locale::Po4a::Xml);
+
+sub initialize {
+ my $self = shift;
+ my %options = @_;
+
+ $self->SUPER::initialize(%options);
+ $self->{options}{'nostrip'} = 1;
+ $self->{options}{'_default_translated'} .= ' <dia:string>';
+ print "Call treat_options\n" if $self->{options}{'debug'};
+ $self->treat_options;
+}
+
+sub found_string {
+ my ( $self, $text, $ref, $options ) = @_;
+ return $text if $text =~ m/^\s*$/s;
+
+ #We skip the paper type string
+ if ( $self->get_path() !~ /<dia:diagramdata>/ ) {
+ $text =~ /^#(.*)#$/s;
+ $text = "#" . $self->translate( $1, $ref, "String", 'wrap' => $self->{options}{'wrap'} ) . "#";
+ }
+
+ return $text;
+}
diff --git a/lib/Locale/Po4a/Docbook.pm b/lib/Locale/Po4a/Docbook.pm
new file mode 100644
index 0000000..4b1dd4f
--- /dev/null
+++ b/lib/Locale/Po4a/Docbook.pm
@@ -0,0 +1,2060 @@
+#!/usr/bin/perl
+# aptitude: cmdsynopsis => missing removal of leading spaces
+
+# Po4a::Docbook.pm
+#
+# extract and translate translatable strings from DocBook XML documents.
+#
+# This code extracts plain text from tags and attributes on DocBook XML
+# documents.
+#
+# Copyright © 2004 Jordi Vilalta <jvprat@gmail.com>
+# Copyright © 2007-2009 Nicolas François <nicolas.francois@centraliens.net>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+########################################################################
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Docbook - convert DocBook XML documents from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::Docbook is a module to help the translation of DocBook XML
+documents into other [human] languages.
+
+=head1 STATUS OF THIS MODULE
+
+This module is fully functional, as it relies in the L<Locale::Po4a::Xml>
+module. This only defines the translatable tags and attributes.
+
+The only known issue is that it doesn't handle entities yet, and this includes
+the file inclusion entities, but you can translate most of those files alone
+(except the typical entities files), and it's usually better to maintain them
+separated.
+
+=head1 OVERRIDE THE DEFAULT BEHAVIOR WITH COMMAND LINE OPTIONS
+
+The default behavior of system provided modules is set to be on the safe side.
+
+For example, the default of B<< <author> >> tag is aiming it to appear under
+B<< <para> >>. But you may be using it only under B<< <bookinfo> >>. For this
+case, you may want to translate it independently for each author.
+
+If you don't like the default behavior of the xml module and its derivative
+modules, you can provide command line options to change their behavior. For
+example, you can add the following to the po4a configuration file:
+
+ opt:"-k 0 -o nodefault=\"<bookinfo> <author>\" \
+ -o break=\"<bookinfo> <author>\" \
+ -o untranslated=\"<bookinfo>\" \
+ -o translated=\"<author>\""
+
+This overrides the default behavior for B<< <bookinfo> >> and B<< <author> >>,
+set B<< <bookinfo> >> and B<< <author> >> to break input data stream on these
+tags, set B<< <bookinfo> >> not to translate its tagged content, and set B<<
+<author> >> to translate its tagged content.
+
+=head1 SEE ALSO
+
+L<Locale::Po4a::TransTractor(3pm)>, L<Locale::Po4a::Xml(3pm)>, L<po4a(7)|po4a.7>
+
+=head1 AUTHORS
+
+ Jordi Vilalta <jvprat@gmail.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright © 2004 Jordi Vilalta <jvprat@gmail.com>
+ Copyright © 2007-2009 Nicolas François <nicolas.francois@centraliens.net>
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
+
+package Locale::Po4a::Docbook;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+use Locale::Po4a::Common;
+use Locale::Po4a::Xml;
+
+use vars qw(@ISA);
+@ISA = qw(Locale::Po4a::Xml);
+
+sub initialize {
+ my $self = shift;
+ my %options = @_;
+
+ $self->SUPER::initialize(%options);
+ $self->{options}{'wrap'} = 1;
+ $self->{options}{'doctype'} = $self->{options}{'doctype'} || 'docbook xml';
+
+ # AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+
+ # abbrev; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <abbrev>";
+ $self->{options}{'_default_inline'} .= " <abbrev>";
+
+ # abstract; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <abstract>";
+ $self->{options}{'_default_break'} .= " <abstract>";
+
+ # accel; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <accel>";
+ $self->{options}{'_default_inline'} .= " <accel>";
+
+ # ackno; does not contain text; Formatted as a displayed block
+ # Replaced by acknowledgements in DocBook v5.0
+ $self->{options}{'_default_untranslated'} .= " <ackno>";
+ $self->{options}{'_default_break'} .= " <ackno>";
+
+ # acknowledgements; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <acknowledgements>";
+ $self->{options}{'_default_break'} .= " <acknowledgements>";
+
+ # acronym; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <acronym>";
+ $self->{options}{'_default_inline'} .= " <acronym>";
+
+ # action; contains text; Formatted inline; v4, not in v5
+ $self->{options}{'_default_translated'} .= " <action>";
+ $self->{options}{'_default_inline'} .= " <action>";
+
+ # address; contains text; Formatted as a displayed block; verbatim
+ $self->{options}{'_default_translated'} .= " W<address>";
+ $self->{options}{'_default_placeholder'} .= " <address>";
+
+ # affiliation; does not contain text; Formatted inline or as a
+ # displayed block depending on context
+ $self->{options}{'_default_untranslated'} .= " <affiliation>";
+ $self->{options}{'_default_inline'} .= " <affiliation>";
+
+ # alt; contains text; Formatted inline or as a
+ # displayed block depending on context
+ $self->{options}{'_default_translated'} .= " <alt>";
+ $self->{options}{'_default_inline'} .= " <alt>";
+
+ # anchor; does not contain text; Produces no output
+ $self->{options}{'_default_untranslated'} .= " <anchor>";
+ $self->{options}{'_default_inline'} .= " <anchor>";
+
+ # annotation; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <annotation>";
+ $self->{options}{'_default_placeholder'} .= " <annotation>";
+
+ # answer; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <answer>";
+ $self->{options}{'_default_break'} .= " <answer>";
+
+ # appendix; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <appendix>";
+ $self->{options}{'_default_break'} .= " <appendix>";
+
+ # appendixinfo; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <appendixinfo>";
+ $self->{options}{'_default_placeholder'} .= " <appendixinfo>";
+
+ # application; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <application>";
+ $self->{options}{'_default_inline'} .= " <application>";
+
+ # arc; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <arc>";
+ $self->{options}{'_default_inline'} .= " <arc>";
+
+ # area; does not contain text;
+ # NOTE: the area is not translatable as is, but the coords
+ # attribute might be.
+ $self->{options}{'_default_untranslated'} .= " <area>";
+ $self->{options}{'_default_inline'} .= " <area>";
+
+ # areaset; does not contain text;
+ # NOTE: the areaset is not translatable as is. depending on the
+ # language there might be more or less area tags inside.
+ $self->{options}{'_default_untranslated'} .= " <areaset>";
+ $self->{options}{'_default_inline'} .= " <areaset>";
+
+ # areaspec; does not contain text;
+ # NOTE: see area and areaset
+ $self->{options}{'_default_translated'} .= " <areaspec>";
+ $self->{options}{'_default_break'} .= " <areaspec>";
+
+ # arg; contains text; Formatted inline or as a
+ # displayed block depending on context
+ $self->{options}{'_default_translated'} .= " <arg>";
+ $self->{options}{'_default_inline'} .= " <arg>";
+
+ # artheader; does not contain text; renamed to articleinfo in v4.0
+ $self->{options}{'_default_untranslated'} .= " <artheader>";
+ $self->{options}{'_default_placeholder'} .= " <artheader>";
+
+ # article; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <article>";
+ $self->{options}{'_default_break'} .= " <article>";
+
+ # articleinfo; does not contain text; v4 only
+ $self->{options}{'_default_untranslated'} .= " <articleinfo>";
+ $self->{options}{'_default_placeholder'} .= " <articleinfo>";
+
+ # artpagenums; contains text; Formatted inline
+ # NOTE: could be in the break class
+ $self->{options}{'_default_translated'} .= " <artpagenums>";
+ $self->{options}{'_default_inline'} .= " <artpagenums>";
+
+ # attribution; contains text; Formatted inline or as a
+ # displayed block depending on context
+ $self->{options}{'_default_translated'} .= " <attribution>";
+ $self->{options}{'_default_inline'} .= " <attribution>";
+
+ # audiodata; does not contain text;
+ # NOTE: the attributes might be translated
+ $self->{options}{'_default_translated'} .= " <audiodata>";
+ $self->{options}{'_default_placeholder'} .= " <audiodata>";
+ $self->{options}{'_default_attributes'} .= ' <audiodata>fileref';
+
+ # audioobject; does not contain text;
+ # NOTE: might be contained in a inlinemediaobject
+ $self->{options}{'_default_translated'} .= " <audioobject>";
+ $self->{options}{'_default_placeholder'} .= " <audioobject>";
+
+ # author; does not contain text; Formatted inline or as a
+ # displayed block depending on context
+ $self->{options}{'_default_untranslated'} .= " <author>";
+ $self->{options}{'_default_inline'} .= " <author>";
+
+ # authorblurb; does not contain text; Formatted as a displayed block.
+ # v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <authorblurb>";
+ $self->{options}{'_default_placeholder'} .= " <authorblurb>";
+
+ # authorgroup; does not contain text; Formatted inline or as a
+ # displayed block depending on context
+ # NOTE: given the possible parents, it is probably very rarely
+ # inlined
+ $self->{options}{'_default_untranslated'} .= " <authorgroup>";
+ $self->{options}{'_default_break'} .= " <authorgroup>";
+
+ # authorinitials; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <authorinitials>";
+ $self->{options}{'_default_inline'} .= " <authorinitials>";
+
+ # BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+
+ # beginpage; does not contain text; v4, not in v5
+ # beginpage and indexterm are ubiquitous elements, they
+ # can be inlined or block elements. It is better to
+ # make it inline, like indexterm. A placeholder may
+ # be used too.
+ $self->{options}{'_default_untranslated'} .= " <beginpage>";
+ $self->{options}{'_default_inline'} .= " <beginpage>";
+
+ # bibliocoverage; contains text; Formatted inline
+ # NOTE: could be in the break class
+ $self->{options}{'_default_translated'} .= " <bibliocoverage>";
+ $self->{options}{'_default_inline'} .= " <bibliocoverage>";
+
+ # bibliodiv; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <bibliodiv>";
+ $self->{options}{'_default_break'} .= " <bibliodiv>";
+
+ # biblioentry; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <biblioentry>";
+ $self->{options}{'_default_break'} .= " <biblioentry>";
+
+ # bibliography; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <bibliography>";
+ $self->{options}{'_default_break'} .= " <bibliography>";
+
+ # bibliographyinfo; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <bibliographyinfo>";
+ $self->{options}{'_default_placeholder'} .= " <bibliographyinfo>";
+
+ # biblioid; contains text; Formatted inline
+ # NOTE: could be in the break class
+ $self->{options}{'_default_translated'} .= " <biblioid>";
+ $self->{options}{'_default_inline'} .= " <biblioid>";
+
+ # bibliolist; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <bibliolist>";
+ $self->{options}{'_default_break'} .= " <bibliolist>";
+
+ # bibliomisc; contains text; Formatted inline
+ # NOTE: could be in the break class
+ $self->{options}{'_default_translated'} .= " <bibliomisc>";
+ $self->{options}{'_default_inline'} .= " <bibliomisc>";
+
+ # bibliomixed; contains text; Formatted as a displayed block
+ $self->{options}{'_default_translated'} .= " <bibliomixed>";
+ $self->{options}{'_default_placeholder'} .= " <bibliomixed>";
+
+ # bibliomset; contains text; Formatted as a displayed block
+ # NOTE: content might need to be inlined, e.g. <bibliomset><title>
+ $self->{options}{'_default_translated'} .= " <bibliomset>";
+ $self->{options}{'_default_placeholder'} .= " <bibliomset>";
+
+ # biblioref; does not contain text; Formatted inline
+ $self->{options}{'_default_untranslated'} .= " <biblioref>";
+ $self->{options}{'_default_inline'} .= " <biblioref>";
+
+ # bibliorelation; does not contain text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <bibliorelation>";
+ $self->{options}{'_default_inline'} .= " <bibliorelation>";
+
+ # biblioset; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <biblioset>";
+ $self->{options}{'_default_break'} .= " <biblioset>";
+
+ # bibliosource; contains text; Formatted inline
+ # NOTE: could be in the break class
+ $self->{options}{'_default_translated'} .= " <bibliosource>";
+ $self->{options}{'_default_inline'} .= " <bibliosource>";
+
+ # blockinfo; does not contain text; v4.2, not in v5
+ $self->{options}{'_default_untranslated'} .= " <blockinfo>";
+ $self->{options}{'_default_placeholder'} .= " <blockinfo>";
+
+ # blockquote; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <blockquote>";
+ $self->{options}{'_default_break'} .= " <blockquote>";
+
+ # book; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <book>";
+ $self->{options}{'_default_break'} .= " <book>";
+
+ # bookbiblio; does not contain text; Formatted as a displayed block
+ # Removed in v4.0
+ $self->{options}{'_default_untranslated'} .= " <bookbiblio>";
+ $self->{options}{'_default_break'} .= " <bookbiblio>";
+
+ # bookinfo; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <bookinfo>";
+ $self->{options}{'_default_placeholder'} .= " <bookinfo>";
+
+ # bridgehead; contains text; Formatted as a displayed block
+ $self->{options}{'_default_translated'} .= " <bridgehead>";
+ $self->{options}{'_default_break'} .= " <bridgehead>";
+
+ # CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
+
+ # callout; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <callout>";
+ $self->{options}{'_default_break'} .= " <callout>";
+
+ # calloutlist; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <calloutlist>";
+ $self->{options}{'_default_break'} .= " <calloutlist>";
+
+ # caption; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <caption>";
+ $self->{options}{'_default_break'} .= " <caption>";
+
+ # caption (db.html.caption); contains text; Formatted as a displayed block
+ # TODO: Check if this works
+ $self->{options}{'_default_translated'} .= " <table><caption>";
+ $self->{options}{'_default_break'} .= " <table><caption>";
+
+ # caution; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <caution>";
+ $self->{options}{'_default_break'} .= " <caution>";
+
+ # chapter; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <chapter>";
+ $self->{options}{'_default_break'} .= " <chapter>";
+
+ # chapterinfo; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <chapterinfo>";
+ $self->{options}{'_default_placeholder'} .= " <chapterinfo>";
+
+ # citation; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <citation>";
+ $self->{options}{'_default_inline'} .= " <citation>";
+
+ # citebiblioid; contains text; Formatted inline
+ # NOTE: maybe untranslated?
+ $self->{options}{'_default_translated'} .= " <citebiblioid>";
+ $self->{options}{'_default_inline'} .= " <citebiblioid>";
+
+ # citerefentry; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <citerefentry>";
+ $self->{options}{'_default_inline'} .= " <citerefentry>";
+
+ # citetitle; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <citetitle>";
+ $self->{options}{'_default_inline'} .= " <citetitle>";
+
+ # city; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <city>";
+ $self->{options}{'_default_inline'} .= " <city>";
+
+ # classname; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <classname>";
+ $self->{options}{'_default_inline'} .= " <classname>";
+
+ # classsynopsis; does not contain text; may be in a para
+ # NOTE: It may contain a classsynopsisinfo, which should be
+ # verbatim
+ $self->{options}{'_default_untranslated'} .= " <classsynopsis>";
+ $self->{options}{'_default_placeholder'} .= " <classsynopsis>";
+
+ # classsynopsisinfo; contains text;
+ # NOTE: see above
+ $self->{options}{'_default_translated'} .= " W<classsynopsisinfo>";
+ $self->{options}{'_default_inline'} .= " <classsynopsisinfo>";
+
+ # cmdsynopsis; does not contain text; may be in a para
+ $self->{options}{'_default_untranslated'} .= " <cmdsynopsis>";
+ $self->{options}{'_default_placeholder'} .= " <cmdsynopsis>";
+
+ # co; does not contain text; Formatted inline
+ # XXX: tranlsated or not? (label attribute)
+ $self->{options}{'_default_translated'} .= " <co>";
+ $self->{options}{'_default_inline'} .= " <co>";
+
+ # code; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <code>";
+ $self->{options}{'_default_inline'} .= " <code>";
+
+ # col; does not contain text;
+ # NOTE: could be translated to change the layout in a translation
+ # To be done on colgroup in that case.
+ $self->{options}{'_default_untranslated'} .= " <col>";
+ $self->{options}{'_default_break'} .= " <col>";
+
+ # colgroup; does not contain text;
+ # NOTE: could be translated to change the layout in a translation
+ $self->{options}{'_default_untranslated'} .= " <colgroup>";
+ $self->{options}{'_default_break'} .= " <colgroup>";
+
+ # collab; does not contain text; Formatted inline or as a
+ # displayed block depending on context
+ # NOTE: could be in the break class
+ $self->{options}{'_default_untranslated'} .= " <collab>";
+ $self->{options}{'_default_inline'} .= " <collab>";
+
+ # collabname; contains text; Formatted inline or as a
+ # displayed block depending on context; v4, not in v5
+ $self->{options}{'_default_translated'} .= " <collabname>";
+ $self->{options}{'_default_inline'} .= " <collabname>";
+
+ # colophon; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <colophon>";
+ $self->{options}{'_default_break'} .= " <colophon>";
+
+ # colspec; does not contain text;
+ # NOTE: could be translated to change the layout in a translation
+ $self->{options}{'_default_untranslated'} .= " <colspec>";
+ $self->{options}{'_default_break'} .= " <colspec>";
+
+ # command; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <command>";
+ $self->{options}{'_default_inline'} .= " <command>";
+
+ # comment; contains text; Formatted inline or as a displayed block
+ # Renamed to remark in v4.0
+ $self->{options}{'_default_translated'} .= " <comment>";
+ $self->{options}{'_default_inline'} .= " <comment>";
+
+ # computeroutput; contains text; Formatted inline
+ # NOTE: "is not a verbatim environment, but an inline."
+ $self->{options}{'_default_translated'} .= " <computeroutput>";
+ $self->{options}{'_default_inline'} .= " <computeroutput>";
+
+ # confdates; contains text; Formatted inline or as a
+ # displayed block depending on context
+ $self->{options}{'_default_translated'} .= " <confdates>";
+ $self->{options}{'_default_inline'} .= " <confdates>";
+
+ # confgroup; does not contain text; Formatted inline or as a
+ # displayed block depending on context
+ # NOTE: could be in the break class
+ $self->{options}{'_default_untranslated'} .= " <confgroup>";
+ $self->{options}{'_default_inline'} .= " <confgroup>";
+
+ # confnum; contains text; Formatted inline or as a
+ # displayed block depending on context
+ $self->{options}{'_default_translated'} .= " <confnum>";
+ $self->{options}{'_default_inline'} .= " <confnum>";
+
+ # confsponsor; contains text; Formatted inline or as a
+ # displayed block depending on context
+ $self->{options}{'_default_translated'} .= " <confsponsor>";
+ $self->{options}{'_default_inline'} .= " <confsponsor>";
+
+ # conftitle; contains text; Formatted inline or as a
+ # displayed block depending on context
+ $self->{options}{'_default_translated'} .= " <conftitle>";
+ $self->{options}{'_default_inline'} .= " <conftitle>";
+
+ # constant; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <constant>";
+ $self->{options}{'_default_inline'} .= " <constant>";
+
+ # constraint; does not contain text;
+ # NOTE: it might be better to have the production as verbatim
+ # Keeping the constrainst inline to have it close to the
+ # lhs or rhs.
+ # The attribute is translatable
+ $self->{options}{'_default_untranslated'} .= " <constraint>";
+ $self->{options}{'_default_break'} .= " <constraint>";
+
+ # constraintdef; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <constraintdef>";
+ $self->{options}{'_default_break'} .= " <constraintdef>";
+
+ # constructorsynopsis; does not contain text; may be in a para
+ $self->{options}{'_default_untranslated'} .= " <constructorsynopsis>";
+ $self->{options}{'_default_placeholder'} .= " <constructorsynopsis>";
+
+ # contractnum; contains text; Formatted inline or as a displayed block
+ # NOTE: could be in the break class
+ $self->{options}{'_default_translated'} .= " <contractnum>";
+ $self->{options}{'_default_inline'} .= " <contractnum>";
+
+ # contractsponsor; contains text; Formatted inline or as a displayed block
+ # NOTE: could be in the break class
+ $self->{options}{'_default_translated'} .= " <contractsponsor>";
+ $self->{options}{'_default_inline'} .= " <contractsponsor>";
+
+ # contrib; contains text; Formatted inline or as a displayed block
+ $self->{options}{'_default_translated'} .= " <contrib>";
+ $self->{options}{'_default_inline'} .= " <contrib>";
+
+ # copyright; contains text; Formatted inline or as a displayed block
+ # NOTE: could be in the break class
+ $self->{options}{'_default_translated'} .= " <copyright>";
+ $self->{options}{'_default_inline'} .= " <copyright>";
+
+ # coref; does not contain text; Formatted inline
+ # XXX: tranlsated or not? (label attribute)
+ $self->{options}{'_default_translated'} .= " <coref>";
+ $self->{options}{'_default_inline'} .= " <coref>";
+
+ # corpauthor; contains text; Formatted inline or as a
+ # displayed block depending on context; v4, not in v5
+ $self->{options}{'_default_translated'} .= " <corpauthor>";
+ $self->{options}{'_default_inline'} .= " <corpauthor>";
+
+ # corpcredit; contains text; Formatted inline or as a
+ # displayed block depending on context; v4, not in v5
+ $self->{options}{'_default_translated'} .= " <corpcredit>";
+ $self->{options}{'_default_inline'} .= " <corpcredit>";
+
+ # corpname; contains text; Formatted inline or as a
+ # displayed block depending on context; v4, not in v5
+ $self->{options}{'_default_translated'} .= " <corpname>";
+ $self->{options}{'_default_inline'} .= " <corpname>";
+
+ # country; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <country>";
+ $self->{options}{'_default_inline'} .= " <country>";
+
+ # cover; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <cover>";
+ $self->{options}{'_default_break'} .= " <cover>";
+
+ # DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD
+
+ # database; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <database>";
+ $self->{options}{'_default_inline'} .= " <database>";
+
+ # date; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <date>";
+ $self->{options}{'_default_inline'} .= " <date>";
+
+ # dedication; contains text; Formatted as a displayed block
+ $self->{options}{'_default_translated'} .= " <dedication>";
+ $self->{options}{'_default_break'} .= " <dedication>";
+
+ # destructorsynopsis; does not contain text; may be in a para
+ $self->{options}{'_default_untranslated'} .= " <destructorsynopsis>";
+ $self->{options}{'_default_placeholder'} .= " <destructorsynopsis>";
+
+ # docinfo; does not contain text; removed in v4.0
+ $self->{options}{'_default_untranslated'} .= " <docinfo>";
+ $self->{options}{'_default_placeholder'} .= " <docinfo>";
+
+ # EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE
+
+ # edition; contains text; Formatted inline or as a displayed block
+ # NOTE: could be in the break class
+ $self->{options}{'_default_translated'} .= " <edition>";
+ $self->{options}{'_default_inline'} .= " <edition>";
+
+ # editor; does not contain text; Formatted inline or as a
+ # displayed block depending on context
+ $self->{options}{'_default_untranslated'} .= " <editor>";
+ $self->{options}{'_default_inline'} .= " <editor>";
+
+ # email; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <email>";
+ $self->{options}{'_default_inline'} .= " <email>";
+
+ # emphasis; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <emphasis>";
+ $self->{options}{'_default_inline'} .= " <emphasis>";
+
+ # entry; contains text;
+ $self->{options}{'_default_translated'} .= " <entry>";
+ $self->{options}{'_default_break'} .= " <entry>";
+
+ # entrytbl; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <entrytbl>";
+ $self->{options}{'_default_break'} .= " <entrytbl>";
+
+ # envar; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <envar>";
+ $self->{options}{'_default_inline'} .= " <envar>";
+
+ # epigraph; contains text; Formatted as a displayed block.
+ # NOTE: maybe contained in a para
+ $self->{options}{'_default_translated'} .= " <epigraph>";
+ $self->{options}{'_default_placeholder'} .= " <epigraph>";
+
+ # equation; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <equation>";
+ $self->{options}{'_default_break'} .= " <equation>";
+
+ # errorcode; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <errorcode>";
+ $self->{options}{'_default_inline'} .= " <errorcode>";
+
+ # errorname; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <errorname>";
+ $self->{options}{'_default_inline'} .= " <errorname>";
+
+ # errortext; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <errortext>";
+ $self->{options}{'_default_inline'} .= " <errortext>";
+
+ # errortype; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <errortype>";
+ $self->{options}{'_default_inline'} .= " <errortype>";
+
+ # example; does not contain text; Formatted as a displayed block.
+ # NOTE: maybe contained in a para
+ $self->{options}{'_default_untranslated'} .= " <example>";
+ $self->{options}{'_default_placeholder'} .= " <example>";
+
+ # exceptionname; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <exceptionname>";
+ $self->{options}{'_default_inline'} .= " <exceptionname>";
+
+ # extendedlink; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <extendedlink>";
+ $self->{options}{'_default_inline'} .= " <extendedlink>";
+
+ # FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+
+ # fax; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <fax>";
+ $self->{options}{'_default_inline'} .= " <fax>";
+
+ # fieldsynopsis; does not contain text; may be in a para
+ $self->{options}{'_default_untranslated'} .= " <fieldsynopsis>";
+ $self->{options}{'_default_inline'} .= " <fieldsynopsis>";
+
+ # figure; does not contain text; Formatted as a displayed block.
+ # NOTE: maybe contained in a para
+ $self->{options}{'_default_untranslated'} .= " <figure>";
+ $self->{options}{'_default_placeholder'} .= " <figure>";
+
+ # filename; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <filename>";
+ $self->{options}{'_default_inline'} .= " <filename>";
+
+ # firstname; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <firstname>";
+ $self->{options}{'_default_inline'} .= " <firstname>";
+
+ # firstterm; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <firstterm>";
+ $self->{options}{'_default_inline'} .= " <firstterm>";
+
+ # footnote; contains text;
+ $self->{options}{'_default_translated'} .= " <footnote>";
+ $self->{options}{'_default_placeholder'} .= " <footnote>";
+
+ # footnoteref; contains text;
+ $self->{options}{'_default_translated'} .= " <footnoteref>";
+ $self->{options}{'_default_inline'} .= " <footnoteref>";
+
+ # foreignphrase; contains text;
+ $self->{options}{'_default_translated'} .= " <foreignphrase>";
+ $self->{options}{'_default_inline'} .= " <foreignphrase>";
+
+ # formalpara; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <formalpara>";
+ $self->{options}{'_default_break'} .= " <formalpara>";
+
+ # funcdef; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <funcdef>";
+ $self->{options}{'_default_inline'} .= " <funcdef>";
+
+ # funcparams; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <funcparams>";
+ $self->{options}{'_default_inline'} .= " <funcparams>";
+
+ # funcprototype; does not contain text;
+ # NOTE: maybe contained in a funcsynopsis, contained in a para
+ $self->{options}{'_default_untranslated'} .= " <funcprototype>";
+ $self->{options}{'_default_placeholder'} .= " <funcprototype>";
+
+ # funcsynopsis; does not contain text;
+ # NOTE: maybe contained in a para
+ $self->{options}{'_default_untranslated'} .= " <funcsynopsis>";
+ $self->{options}{'_default_placeholder'} .= " <funcsynopsis>";
+
+ # funcsynopsisinfo; contains text; verbatim
+ # NOTE: maybe contained in a funcsynopsis, contained in a para
+ $self->{options}{'_default_translated'} .= " W<funcsynopsisinfo>";
+ $self->{options}{'_default_placeholder'} .= " <funcsynopsisinfo>";
+
+ # function; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <function>";
+ $self->{options}{'_default_inline'} .= " <function>";
+
+ # GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
+
+ # glossary; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <glossary>";
+ $self->{options}{'_default_break'} .= " <glossary>";
+
+ # glossaryinfo; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <glossaryinfo>";
+ $self->{options}{'_default_placeholder'} .= " <glossaryinfo>";
+
+ # glossdef; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <glossdef>";
+ $self->{options}{'_default_break'} .= " <glossdef>";
+
+ # glossdiv; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <glossdiv>";
+ $self->{options}{'_default_break'} .= " <glossdiv>";
+
+ # glossentry; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <glossentry>";
+ $self->{options}{'_default_break'} .= " <glossentry>";
+
+ # glosslist; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <glosslist>";
+ $self->{options}{'_default_break'} .= " <glosslist>";
+
+ # glosssee; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <glosssee>";
+ $self->{options}{'_default_break'} .= " <glosssee>";
+
+ # glossseealso; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <glossseealso>";
+ $self->{options}{'_default_break'} .= " <glossseealso>";
+
+ # glossterm; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <glossterm>";
+ $self->{options}{'_default_inline'} .= " <glossterm>";
+
+ # graphic; does not contain text; Formatted as a displayed block
+ # v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <graphic>";
+ $self->{options}{'_default_inline'} .= " <graphic>";
+ $self->{options}{'_default_attributes'} .= ' <graphic>fileref';
+
+ # graphicco; does not contain text; Formatted as a displayed block.
+ # v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <graphicco>";
+ $self->{options}{'_default_placeholder'} .= " <graphicco>";
+
+ # group; does not contain text; Formatted inline
+ $self->{options}{'_default_untranslated'} .= " <group>";
+ $self->{options}{'_default_inline'} .= " <group>";
+
+ # guibutton; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <guibutton>";
+ $self->{options}{'_default_inline'} .= " <guibutton>";
+
+ # guiicon; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <guiicon>";
+ $self->{options}{'_default_inline'} .= " <guiicon>";
+
+ # guilabel; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <guilabel>";
+ $self->{options}{'_default_inline'} .= " <guilabel>";
+
+ # guimenu; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <guimenu>";
+ $self->{options}{'_default_inline'} .= " <guimenu>";
+
+ # guimenuitem; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <guimenuitem>";
+ $self->{options}{'_default_inline'} .= " <guimenuitem>";
+
+ # guisubmenu; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <guisubmenu>";
+ $self->{options}{'_default_inline'} .= " <guisubmenu>";
+
+ # HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
+
+ # hardware; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <hardware>";
+ $self->{options}{'_default_inline'} .= " <hardware>";
+
+ # highlights; does not contain text; Formatted inline
+ # v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <highlights>";
+ $self->{options}{'_default_break'} .= " <highlights>";
+
+ # holder; contains text;
+ # NOTE: may depend on the copyright container
+ $self->{options}{'_default_translated'} .= " <holder>";
+ $self->{options}{'_default_inline'} .= " <holder>";
+
+ # honorific; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <honorific>";
+ $self->{options}{'_default_inline'} .= " <honorific>";
+
+ # html:button; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <html:button>";
+ $self->{options}{'_default_inline'} .= " <html:button>";
+
+ # html:fieldset; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <html:fieldset>";
+ $self->{options}{'_default_inline'} .= " <html:fieldset>";
+
+ # html:form; does not contain text;
+ $self->{options}{'_default_translated'} .= " <html:form>";
+ $self->{options}{'_default_inline'} .= " <html:form>";
+
+ # html:input; does not contain text; Formatted inline
+ # NOTE: attributes are translatable
+ $self->{options}{'_default_translated'} .= " <html:input>";
+ $self->{options}{'_default_inline'} .= " <html:input>";
+
+ # html:label; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <html:label>";
+ $self->{options}{'_default_inline'} .= " <html:label>";
+
+ # html:legend; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <html:legend>";
+ $self->{options}{'_default_inline'} .= " <html:legend>";
+
+ # html:option; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <html:option>";
+ $self->{options}{'_default_inline'} .= " <html:option>";
+
+ # html:select; does not contain text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <html:select>";
+ $self->{options}{'_default_inline'} .= " <html:select>";
+
+ # html:textarea; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <html:textarea>";
+ $self->{options}{'_default_placeholder'} .= " <html:textarea>";
+
+ # imagedata; does not contain text; May be formatted inline or
+ # as a displayed block, depending on context
+ $self->{options}{'_default_translated'} .= " <imagedata>";
+ $self->{options}{'_default_inline'} .= " <imagedata>";
+ $self->{options}{'_default_attributes'} .= ' <imagedata>fileref';
+
+ # imageobject; does not contain text; May be formatted inline or
+ # as a displayed block, depending on context
+ $self->{options}{'_default_untranslated'} .= " <imageobject>";
+ $self->{options}{'_default_inline'} .= " <imageobject>";
+
+ # imageobjectco; does not contain text; Formatted as a displayed block
+ # NOTE: may be in a inlinemediaobject
+ # TODO: check if this works when the inlinemediaobject is defined
+ # as inline
+ $self->{options}{'_default_untranslated'} .= " <imageobjectco>";
+ $self->{options}{'_default_break'} .= " <imageobjectco>";
+
+ # important; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <important>";
+ $self->{options}{'_default_break'} .= " <important>";
+
+ # index; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <index>";
+ $self->{options}{'_default_break'} .= " <index>";
+
+ # indexdiv; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <indexdiv>";
+ $self->{options}{'_default_break'} .= " <indexdiv>";
+
+ # indexentry; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <indexentry>";
+ $self->{options}{'_default_break'} .= " <indexentry>";
+
+ # indexinfo; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <indexinfo>";
+ $self->{options}{'_default_placeholder'} .= " <indexinfo>";
+
+ # indexterm; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <indexterm>";
+ $self->{options}{'_default_placeholder'} .= " <indexterm>";
+
+ # info; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <info>";
+ $self->{options}{'_default_placeholder'} .= " <info>";
+
+ # informalequation; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <informalequation>";
+ $self->{options}{'_default_placeholder'} .= " <informalequation>";
+
+ # informalexample; does not contain text; Formatted as a displayed block.
+ # NOTE: can be in a para
+ $self->{options}{'_default_untranslated'} .= " <informalexample>";
+ $self->{options}{'_default_break'} .= " <informalexample>";
+
+ # informalfigure; does not contain text; Formatted as a displayed block.
+ # NOTE: can be in a para
+ $self->{options}{'_default_untranslated'} .= " <informalfigure>";
+ $self->{options}{'_default_break'} .= " <informalfigure>";
+
+ # informaltable; does not contain text; Formatted as a displayed block.
+ # NOTE: can be in a para
+ $self->{options}{'_default_untranslated'} .= " <informaltable>";
+ $self->{options}{'_default_break'} .= " <informaltable>";
+
+ # initializer; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <initializer>";
+ $self->{options}{'_default_inline'} .= " <initializer>";
+
+ # inlineequation; does not contain text; Formatted inline
+ $self->{options}{'_default_translated'} .= " W<inlineequation>";
+ $self->{options}{'_default_placeholder'} .= " <inlineequation>";
+
+ # inlinegraphic; does not contain text; Formatted inline
+ # empty; v4, not in v5
+ $self->{options}{'_default_translated'} .= " W<inlinegraphic>";
+ $self->{options}{'_default_inline'} .= " <inlinegraphic>";
+
+ # inlinemediaobject; does not contain text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <inlinemediaobject>";
+ $self->{options}{'_default_placeholder'} .= " <inlinemediaobject>";
+
+ # interface; contains text; Formatted inline; v4, not in v5
+ $self->{options}{'_default_translated'} .= " <interface>";
+ $self->{options}{'_default_inline'} .= " <interface>";
+
+ # interfacedefinition; contains text; Formatted inline
+ # Removed in v4.0
+ $self->{options}{'_default_translated'} .= " <interfacedefinition>";
+ $self->{options}{'_default_inline'} .= " <interfacedefinition>";
+
+ # interfacename; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <interfacename>";
+ $self->{options}{'_default_inline'} .= " <interfacename>";
+
+ # invpartnumber; contains text; Formatted inline; v4, not in v5
+ $self->{options}{'_default_translated'} .= " <invpartnumber>";
+ $self->{options}{'_default_inline'} .= " <invpartnumber>";
+
+ # isbn; contains text; Formatted inline; v4, not in v5
+ $self->{options}{'_default_translated'} .= " <isbn>";
+ $self->{options}{'_default_inline'} .= " <isbn>";
+
+ # issn; contains text; Formatted inline; v4, not in v5
+ $self->{options}{'_default_translated'} .= " <issn>";
+ $self->{options}{'_default_inline'} .= " <issn>";
+
+ # issuenum; contains text; Formatted inline or as a displayed block
+ # NOTE: could be in the break class
+ $self->{options}{'_default_translated'} .= " <issuenum>";
+ $self->{options}{'_default_inline'} .= " <issuenum>";
+
+ # itemizedlist; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <itemizedlist>";
+ $self->{options}{'_default_break'} .= " <itemizedlist>";
+
+ # itermset; does not contain text;
+ # FIXME
+ $self->{options}{'_default_untranslated'} .= " <itermset>";
+ $self->{options}{'_default_inline'} .= " <itermset>";
+
+ # JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ
+
+ # jobtitle; contains text; Formatted inline or as a displayed block
+ # NOTE: can be in a para
+ $self->{options}{'_default_translated'} .= " <jobtitle>";
+ $self->{options}{'_default_inline'} .= " <jobtitle>";
+
+ # KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
+
+ # keycap; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <keycap>";
+ $self->{options}{'_default_inline'} .= " <keycap>";
+
+ # keycode; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <keycode>";
+ $self->{options}{'_default_inline'} .= " <keycode>";
+
+ # keycombo; does not contain text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <keycombo>";
+ $self->{options}{'_default_inline'} .= " <keycombo>";
+
+ # keysym; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <keysym>";
+ $self->{options}{'_default_inline'} .= " <keysym>";
+
+ # keyword; contains text;
+ # NOTE: could be inline
+ $self->{options}{'_default_translated'} .= " <keyword>";
+ $self->{options}{'_default_break'} .= " <keyword>";
+
+ # keywordset; contains text; Formatted inline or as a displayed block
+ # NOTE: could be placeholder/break
+ $self->{options}{'_default_translated'} .= " <keywordset>";
+ $self->{options}{'_default_break'} .= " <keywordset>";
+
+ # LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL
+
+ # label; contains text; Formatted as a displayed block
+ $self->{options}{'_default_translated'} .= " <label>";
+ $self->{options}{'_default_break'} .= " <label>";
+
+ # legalnotice; contains text; Formatted as a displayed block
+ $self->{options}{'_default_translated'} .= " <legalnotice>";
+ $self->{options}{'_default_break'} .= " <legalnotice>";
+
+ # lhs; contains text; Formatted as a displayed block.
+ # NOTE: it might be better to have the production as verbatim
+ # Keeping the constrainst inline to have it close to the
+ # lhs or rhs.
+ $self->{options}{'_default_translated'} .= " <lhs>";
+ $self->{options}{'_default_break'} .= " <lhs>";
+
+ # lineage; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <lineage>";
+ $self->{options}{'_default_inline'} .= " <lineage>";
+
+ # lineannotation; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <lineannotation>";
+ $self->{options}{'_default_inline'} .= " <lineannotation>";
+
+ # link; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <link>";
+ $self->{options}{'_default_inline'} .= " <link>";
+
+ # listitem; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <listitem>";
+ $self->{options}{'_default_break'} .= " <listitem>";
+
+ # literal; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <literal>";
+ $self->{options}{'_default_inline'} .= " <literal>";
+
+ # literallayout; contains text; verbatim
+ $self->{options}{'_default_translated'} .= " W<literallayout>";
+ $self->{options}{'_default_placeholder'} .= " <literallayout>";
+
+ # locator; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <locator>";
+ $self->{options}{'_default_inline'} .= " <locator>";
+
+ # lot; does not contain text; Formatted as a displayed block.
+ # v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <lot>";
+ $self->{options}{'_default_break'} .= " <lot>";
+
+ # lotentry; contains text; Formatted as a displayed block.
+ # v4, not in v5
+ $self->{options}{'_default_translated'} .= " <lotentry>";
+ $self->{options}{'_default_break'} .= " <lotentry>";
+
+ # MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM
+
+ # manvolnum; contains text;
+ $self->{options}{'_default_translated'} .= " <manvolnum>";
+ $self->{options}{'_default_inline'} .= " <manvolnum>";
+
+ # markup; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <markup>";
+ $self->{options}{'_default_inline'} .= " <markup>";
+
+ # mathphrase; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <mathphrase>";
+ $self->{options}{'_default_inline'} .= " <mathphrase>";
+
+ # medialabel; contains text; Formatted inline
+ # v4, not in v5
+ $self->{options}{'_default_translated'} .= " <medialabel>";
+ $self->{options}{'_default_inline'} .= " <medialabel>";
+
+ # mediaobject; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <mediaobject>";
+ $self->{options}{'_default_placeholder'} .= " <mediaobject>";
+
+ # mediaobjectco; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <mediaobjectco>";
+ $self->{options}{'_default_placeholder'} .= " <mediaobjectco>";
+
+ # member; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <member>";
+ $self->{options}{'_default_inline'} .= " <member>";
+
+ # menuchoice; does not contain text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <menuchoice>";
+ $self->{options}{'_default_inline'} .= " <menuchoice>";
+
+ # methodname; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <methodname>";
+ $self->{options}{'_default_inline'} .= " <methodname>";
+
+ # methodparam; does not contain text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <methodparam>";
+ $self->{options}{'_default_inline'} .= " <methodparam>";
+
+ # methodsynopsis; does not contain text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <methodsynopsis>";
+ $self->{options}{'_default_inline'} .= " <methodsynopsis>";
+
+ # modifier; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <modifier>";
+ $self->{options}{'_default_inline'} .= " <modifier>";
+
+ # mousebutton; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <mousebutton>";
+ $self->{options}{'_default_inline'} .= " <mousebutton>";
+
+ # msg; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <msg>";
+ $self->{options}{'_default_break'} .= " <msg>";
+
+ # msgaud; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <msgaud>";
+ $self->{options}{'_default_break'} .= " <msgaud>";
+
+ # msgentry; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <msgentry>";
+ $self->{options}{'_default_break'} .= " <msgentry>";
+
+ # msgexplan; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <msgexplan>";
+ $self->{options}{'_default_break'} .= " <msgexplan>";
+
+ # msginfo; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <msginfo>";
+ $self->{options}{'_default_break'} .= " <msginfo>";
+
+ # msglevel; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <msglevel>";
+ $self->{options}{'_default_break'} .= " <msglevel>";
+
+ # msgmain; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <msgmain>";
+ $self->{options}{'_default_break'} .= " <msgmain>";
+
+ # msgorig; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <msgorig>";
+ $self->{options}{'_default_break'} .= " <msgorig>";
+
+ # msgrel; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <msgrel>";
+ $self->{options}{'_default_break'} .= " <msgrel>";
+
+ # msgset; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <msgset>";
+ $self->{options}{'_default_placeholder'} .= " <msgset>";
+
+ # msgsub; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <msgsub>";
+ $self->{options}{'_default_break'} .= " <msgsub>";
+
+ # msgtext; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <msgtext>";
+ $self->{options}{'_default_break'} .= " <msgtext>";
+
+ # NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+
+ # nonterminal; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <nonterminal>";
+ $self->{options}{'_default_inline'} .= " <nonterminal>";
+
+ # note; does not contain text; Formatted inline
+ # NOTE: can be in a para
+ $self->{options}{'_default_untranslated'} .= " <note>";
+ $self->{options}{'_default_inline'} .= " <note>";
+
+ # OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO
+
+ # objectinfo; does not contain text; v3.1 -> v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <objectinfo>";
+ $self->{options}{'_default_placeholder'} .= " <objectinfo>";
+
+ # olink; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <olink>";
+ $self->{options}{'_default_inline'} .= " <olink>";
+
+ # ooclass; does not contain text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <ooclass>";
+ $self->{options}{'_default_inline'} .= " <ooclass>";
+
+ # ooexception; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <ooexception>";
+ $self->{options}{'_default_inline'} .= " <ooexception>";
+
+ # oointerface; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <oointerface>";
+ $self->{options}{'_default_inline'} .= " <oointerface>";
+
+ # option; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <option>";
+ $self->{options}{'_default_inline'} .= " <option>";
+
+ # optional; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <optional>";
+ $self->{options}{'_default_inline'} .= " <optional>";
+
+ # orderedlist; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <orderedlist>";
+ $self->{options}{'_default_placeholder'} .= " <orderedlist>";
+
+ # org; does not contain text; Formatted inline or as a
+ # displayed block depending on context
+ $self->{options}{'_default_untranslated'} .= " <org>";
+ $self->{options}{'_default_inline'} .= " <org>";
+
+ # orgdiv; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <orgdiv>";
+ $self->{options}{'_default_inline'} .= " <orgdiv>";
+
+ # orgname; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <orgname>";
+ $self->{options}{'_default_inline'} .= " <orgname>";
+
+ # otheraddr; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <otheraddr>";
+ $self->{options}{'_default_inline'} .= " <otheraddr>";
+
+ # othercredit; does not contain text; Formatted inline or as a
+ # displayed block depending on context
+ $self->{options}{'_default_untranslated'} .= " <othercredit>";
+ $self->{options}{'_default_inline'} .= " <othercredit>";
+
+ # othername; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <othername>";
+ $self->{options}{'_default_inline'} .= " <othername>";
+
+ # PPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP
+
+ # package; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <package>";
+ $self->{options}{'_default_inline'} .= " <package>";
+
+ # pagenums; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <pagenums>";
+ $self->{options}{'_default_inline'} .= " <pagenums>";
+
+ # para; contains text; Formatted as a displayed block
+ $self->{options}{'_default_translated'} .= " <para>";
+ $self->{options}{'_default_break'} .= " <para>";
+
+ # paramdef; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <paramdef>";
+ $self->{options}{'_default_inline'} .= " <paramdef>";
+
+ # parameter; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <parameter>";
+ $self->{options}{'_default_inline'} .= " <parameter>";
+
+ # part; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <part>";
+ $self->{options}{'_default_break'} .= " <part>";
+
+ # partinfo; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <partinfo>";
+ $self->{options}{'_default_placeholder'} .= " <partinfo>";
+
+ # partintro; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <partintro>";
+ $self->{options}{'_default_break'} .= " <partintro>";
+
+ # person; does not contain text; Formatted inline or as a
+ # displayed block depending on context
+ $self->{options}{'_default_untranslated'} .= " <person>";
+ $self->{options}{'_default_inline'} .= " <person>";
+
+ # personblurb; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <personblurb>";
+ $self->{options}{'_default_placeholder'} .= " <personblurb>";
+
+ # personname; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <personname>";
+ $self->{options}{'_default_inline'} .= " <personname>";
+
+ # phone; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <phone>";
+ $self->{options}{'_default_inline'} .= " <phone>";
+
+ # phrase; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <phrase>";
+ $self->{options}{'_default_inline'} .= " <phrase>";
+
+ # pob; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <pob>";
+ $self->{options}{'_default_inline'} .= " <pob>";
+
+ # postcode; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <postcode>";
+ $self->{options}{'_default_inline'} .= " <postcode>";
+
+ # preface; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <preface>";
+ $self->{options}{'_default_break'} .= " <preface>";
+
+ # prefaceinfo; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <prefaceinfo>";
+ $self->{options}{'_default_placeholder'} .= " <prefaceinfo>";
+
+ # primary; contains text;
+ $self->{options}{'_default_translated'} .= " <primary>";
+ $self->{options}{'_default_break'} .= " <primary>";
+
+ # primaryie; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <primaryie>";
+ $self->{options}{'_default_break'} .= " <primaryie>";
+
+ # printhistory; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <printhistory>";
+ $self->{options}{'_default_break'} .= " <printhistory>";
+
+ # procedure; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <procedure>";
+ $self->{options}{'_default_placeholder'} .= " <procedure>";
+
+ # production; doesnot contain text;
+ # NOTE: it might be better to have the production as verbatim
+ # Keeping the constrainst inline to have it close to the
+ # lhs or rhs.
+ $self->{options}{'_default_untranslated'} .= " <production>";
+ $self->{options}{'_default_break'} .= " <production>";
+
+ # productionrecap; does not contain text; like production
+ $self->{options}{'_default_untranslated'} .= " <productionrecap>";
+ $self->{options}{'_default_break'} .= " <productionrecap>";
+
+ # productionset; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <productionset>";
+ $self->{options}{'_default_placeholder'} .= " <productionset>";
+
+ # productname; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <productname>";
+ $self->{options}{'_default_inline'} .= " <productname>";
+
+ # productnumber; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <productnumber>";
+ $self->{options}{'_default_inline'} .= " <productnumber>";
+
+ # programlisting; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " W<programlisting>";
+ $self->{options}{'_default_placeholder'} .= " <programlisting>";
+
+ # programlistingco; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <programlistingco>";
+ $self->{options}{'_default_placeholder'} .= " <programlistingco>";
+
+ # prompt; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <prompt>";
+ $self->{options}{'_default_inline'} .= " <prompt>";
+
+ # property; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <property>";
+ $self->{options}{'_default_inline'} .= " <property>";
+
+ # pubdate; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <pubdate>";
+ $self->{options}{'_default_inline'} .= " <pubdate>";
+
+ # publisher; does not contain text; Formatted inline or as a displayed block
+ # NOTE: could be in the break class
+ $self->{options}{'_default_translated'} .= " <publisher>";
+ $self->{options}{'_default_inline'} .= " <publisher>";
+
+ # publishername; contains text; Formatted inline or as a displayed block
+ $self->{options}{'_default_translated'} .= " <publishername>";
+ $self->{options}{'_default_inline'} .= " <publishername>";
+
+ # QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ
+
+ # qandadiv; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <qandadiv>";
+ $self->{options}{'_default_break'} .= " <qandadiv>";
+
+ # qandaentry; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <qandaentry>";
+ $self->{options}{'_default_break'} .= " <qandaentry>";
+
+ # qandaset; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <qandaset>";
+ $self->{options}{'_default_break'} .= " <qandaset>";
+
+ # question; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <question>";
+ $self->{options}{'_default_break'} .= " <question>";
+
+ # quote; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <quote>";
+ $self->{options}{'_default_inline'} .= " <quote>";
+
+ # RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR
+
+ # refclass; contains text; Formatted inline or as a displayed block
+ # NOTE: could be in the inline class
+ $self->{options}{'_default_translated'} .= " <refclass>";
+ $self->{options}{'_default_break'} .= " <refclass>";
+
+ # refdescriptor; contains text; Formatted inline or as a displayed block
+ # NOTE: could be in the inline class
+ $self->{options}{'_default_translated'} .= " <refdescriptor>";
+ $self->{options}{'_default_break'} .= " <refdescriptor>";
+
+ # refentry; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <refentry>";
+ $self->{options}{'_default_break'} .= " <refentry>";
+
+ # refentryinfo; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <refentryinfo>";
+ $self->{options}{'_default_placeholder'} .= " <refentryinfo>";
+
+ # refentrytitle; contains text; Formatted as a displayed block
+ # FIXME: do not seems to be a block
+ $self->{options}{'_default_translated'} .= " <refentrytitle>";
+ $self->{options}{'_default_inline'} .= " <refentrytitle>";
+
+ # reference; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <reference>";
+ $self->{options}{'_default_break'} .= " <reference>";
+
+ # referenceinfo; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <referenceinfo>";
+ $self->{options}{'_default_placeholder'} .= " <referenceinfo>";
+
+ # refmeta; does not contains text;
+ # NOTE: could be in the inline class
+ $self->{options}{'_default_untranslated'} .= " <refmeta>";
+ $self->{options}{'_default_break'} .= " <refmeta>";
+
+ # refmiscinfo; contains text; Formatted inline or as a displayed block
+ # NOTE: could be in the inline class
+ $self->{options}{'_default_translated'} .= " <refmiscinfo>";
+ $self->{options}{'_default_break'} .= " <refmiscinfo>";
+
+ # refname; contains text; Formatted inline or as a displayed block
+ # NOTE: could be in the inline class
+ $self->{options}{'_default_translated'} .= " <refname>";
+ $self->{options}{'_default_break'} .= " <refname>";
+
+ # refnamediv; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <refnamediv>";
+ $self->{options}{'_default_break'} .= " <refnamediv>";
+
+ # refpurpose; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <refpurpose>";
+ $self->{options}{'_default_inline'} .= " <refpurpose>";
+
+ # refsect1; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <refsect1>";
+ $self->{options}{'_default_break'} .= " <refsect1>";
+
+ # refsect1info; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <refsect1info>";
+ $self->{options}{'_default_placeholder'} .= " <refsect1info>";
+
+ # refsect2; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <refsect2>";
+ $self->{options}{'_default_break'} .= " <refsect2>";
+
+ # refsect2info; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <refsect2info>";
+ $self->{options}{'_default_placeholder'} .= " <refsect2info>";
+
+ # refsect3; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <refsect3>";
+ $self->{options}{'_default_break'} .= " <refsect3>";
+
+ # refsect3info; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <refsect3info>";
+ $self->{options}{'_default_placeholder'} .= " <refsect3info>";
+
+ # refsection; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <refsection>";
+ $self->{options}{'_default_break'} .= " <refsection>";
+
+ # refsectioninfo; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <refsectioninfo>";
+ $self->{options}{'_default_placeholder'} .= " <refsectioninfo>";
+
+ # refsynopsisdiv; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <refsynopsisdiv>";
+ $self->{options}{'_default_break'} .= " <refsynopsisdiv>";
+
+ # refsynopsisdivinfo; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <refsynopsisdivinfo>";
+ $self->{options}{'_default_placeholder'} .= " <refsynopsisdivinfo>";
+
+ # releaseinfo; contains text; Formatted inline or as a displayed block
+ # NOTE: could be in the inline class
+ $self->{options}{'_default_translated'} .= " <releaseinfo>";
+ $self->{options}{'_default_break'} .= " <releaseinfo>";
+
+ # remark; contains text; Formatted inline or as a displayed block
+ $self->{options}{'_default_translated'} .= " <remark>";
+ $self->{options}{'_default_inline'} .= " <remark>";
+
+ # replaceable; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <replaceable>";
+ $self->{options}{'_default_inline'} .= " <replaceable>";
+
+ # returnvalue; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <returnvalue>";
+ $self->{options}{'_default_inline'} .= " <returnvalue>";
+
+ # revdescription; contains text; Formatted inline or as a displayed block
+ $self->{options}{'_default_translated'} .= " <revdescription>";
+ $self->{options}{'_default_break'} .= " <revdescription>";
+
+ # revhistory; does not contain text; Formatted as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <revhistory>";
+ $self->{options}{'_default_break'} .= " <revhistory>";
+
+ # revision; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <revision>";
+ $self->{options}{'_default_break'} .= " <revision>";
+
+ # revnumber; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <revnumber>";
+ $self->{options}{'_default_inline'} .= " <revnumber>";
+
+ # revremark; contains text; Formatted inline or as a displayed block
+ $self->{options}{'_default_translated'} .= " <revremark>";
+ $self->{options}{'_default_break'} .= " <revremark>";
+
+ # rhs; contains text; Formatted as a displayed block.
+ # NOTE: it might be better to have the production as verbatim
+ # Keeping the constrainst inline to have it close to the
+ # lhs or rhs.
+ $self->{options}{'_default_translated'} .= " <rhs>";
+ $self->{options}{'_default_break'} .= " <rhs>";
+
+ # row; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <row>";
+ $self->{options}{'_default_break'} .= " <row>";
+
+ # SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS
+
+ # sbr; does not contain text; line break
+ $self->{options}{'_default_untranslated'} .= " <sbr>";
+ $self->{options}{'_default_break'} .= " <sbr>";
+
+ # screen; contains text; verbatim
+ $self->{options}{'_default_translated'} .= " W<screen>";
+ $self->{options}{'_default_placeholder'} .= " <screen>";
+
+ # screenco; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <screenco>";
+ $self->{options}{'_default_placeholder'} .= " <screenco>";
+
+ # screeninfo; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <screeninfo>";
+ $self->{options}{'_default_placeholder'} .= " <screeninfo>";
+
+ # screenshot; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <screenshot>";
+ $self->{options}{'_default_placeholder'} .= " <screenshot>";
+
+ # secondary; contains text;
+ $self->{options}{'_default_translated'} .= " <secondary>";
+ $self->{options}{'_default_break'} .= " <secondary>";
+
+ # secondaryie; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <secondaryie>";
+ $self->{options}{'_default_break'} .= " <secondaryie>";
+
+ # sect1; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <sect1>";
+ $self->{options}{'_default_break'} .= " <sect1>";
+
+ # sect1info; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <sect1info>";
+ $self->{options}{'_default_placeholder'} .= " <sect1info>";
+
+ # sect2; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <sect2>";
+ $self->{options}{'_default_break'} .= " <sect2>";
+
+ # sect2info; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <sect2info>";
+ $self->{options}{'_default_placeholder'} .= " <sect2info>";
+
+ # sect3; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <sect3>";
+ $self->{options}{'_default_break'} .= " <sect3>";
+
+ # sect3info; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <sect3info>";
+ $self->{options}{'_default_placeholder'} .= " <sect3info>";
+
+ # sect4; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <sect4>";
+ $self->{options}{'_default_break'} .= " <sect4>";
+
+ # sect4info; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <sect4info>";
+ $self->{options}{'_default_placeholder'} .= " <sect4info>";
+
+ # sect5; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <sect5>";
+ $self->{options}{'_default_break'} .= " <sect5>";
+
+ # sect5info; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <sect5info>";
+ $self->{options}{'_default_placeholder'} .= " <sect5info>";
+
+ # section; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <section>";
+ $self->{options}{'_default_break'} .= " <section>";
+
+ # sectioninfo; does not contain text; v3.1 -> v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <sectioninfo>";
+ $self->{options}{'_default_placeholder'} .= " <sectioninfo>";
+
+ # see; contains text;
+ $self->{options}{'_default_translated'} .= " <see>";
+ $self->{options}{'_default_break'} .= " <see>";
+
+ # seealso; contains text;
+ $self->{options}{'_default_translated'} .= " <seealso>";
+ $self->{options}{'_default_break'} .= " <seealso>";
+
+ # seealsoie; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <seealsoie>";
+ $self->{options}{'_default_break'} .= " <seealsoie>";
+
+ # seeie; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <seeie>";
+ $self->{options}{'_default_break'} .= " <seeie>";
+
+ # seg; contains text;
+ $self->{options}{'_default_translated'} .= " <seg>";
+ $self->{options}{'_default_break'} .= " <seg>";
+
+ # seglistitem; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <seglistitem>";
+ $self->{options}{'_default_break'} .= " <seglistitem>";
+
+ # segmentedlist; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <segmentedlist>";
+ $self->{options}{'_default_break'} .= " <segmentedlist>";
+
+ # segtitle; contains text;
+ $self->{options}{'_default_translated'} .= " <segtitle>";
+ $self->{options}{'_default_break'} .= " <segtitle>";
+
+ # seriesinfo; does not contain text;
+ # Removed in v4.0
+ $self->{options}{'_default_untranslated'} .= " <seriesinfo>";
+ $self->{options}{'_default_placeholder'} .= " <seriesinfo>";
+
+ # seriesvolnums; contains text; Formatted inline
+ # NOTE: could be in the break class
+ $self->{options}{'_default_translated'} .= " <seriesvolnums>";
+ $self->{options}{'_default_inline'} .= " <seriesvolnums>";
+
+ # set; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <set>";
+ $self->{options}{'_default_break'} .= " <set>";
+
+ # setindex; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <setindex>";
+ $self->{options}{'_default_break'} .= " <setindex>";
+
+ # setindexinfo; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <setindexinfo>";
+ $self->{options}{'_default_placeholder'} .= " <setindexinfo>";
+
+ # setinfo; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <setinfo>";
+ $self->{options}{'_default_placeholder'} .= " <setinfo>";
+
+ # sgmltag; contains text; Formatted inline; v4, not in v5
+ $self->{options}{'_default_translated'} .= " <sgmltag>";
+ $self->{options}{'_default_inline'} .= " <sgmltag>";
+
+ # shortaffil; contains text; Formatted inline or as a
+ # displayed block depending on context
+ $self->{options}{'_default_translated'} .= " <shortaffil>";
+ $self->{options}{'_default_inline'} .= " <shortaffil>";
+
+ # shortcut; does not contain text; Formatted inline
+ $self->{options}{'_default_untranslated'} .= " <shortcut>";
+ $self->{options}{'_default_inline'} .= " <shortcut>";
+
+ # sidebar; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <sidebar>";
+ $self->{options}{'_default_break'} .= " <sidebar>";
+
+ # sidebarinfo; does not contain text; v4, not in v5
+ $self->{options}{'_default_untranslated'} .= " <sidebarinfo>";
+ $self->{options}{'_default_placeholder'} .= " <sidebarinfo>";
+
+ # simpara; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <simpara>";
+ $self->{options}{'_default_break'} .= " <simpara>";
+
+ # simplelist; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <simplelist>";
+ $self->{options}{'_default_inline'} .= " <simplelist>";
+
+ # simplemsgentry; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <simplemsgentry>";
+ $self->{options}{'_default_break'} .= " <simplemsgentry>";
+
+ # simplesect; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <simplesect>";
+ $self->{options}{'_default_break'} .= " <simplesect>";
+
+ # spanspec; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <spanspec>";
+ $self->{options}{'_default_break'} .= " <spanspec>";
+
+ # state; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <state>";
+ $self->{options}{'_default_inline'} .= " <state>";
+
+ # step; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <step>";
+ $self->{options}{'_default_break'} .= " <step>";
+
+ # stepalternatives; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <stepalternatives>";
+ $self->{options}{'_default_break'} .= " <stepalternatives>";
+
+ # street; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <street>";
+ $self->{options}{'_default_inline'} .= " <street>";
+
+ # structfield; contains text; Formatted inline; v4, not in v5
+ $self->{options}{'_default_translated'} .= " <structfield>";
+ $self->{options}{'_default_inline'} .= " <structfield>";
+
+ # structname; contains text; Formatted inline; v4, not in v5
+ $self->{options}{'_default_translated'} .= " <structname>";
+ $self->{options}{'_default_inline'} .= " <structname>";
+
+ # subject; does not contain text; Formatted inline or as a displayed block
+ # NOTE: could be in the inline class
+ $self->{options}{'_default_untranslated'} .= " <subject>";
+ $self->{options}{'_default_break'} .= " <subject>";
+
+ # subjectset; does not contain text; Formatted inline or as a displayed block
+ # NOTE: could be in the inline class
+ $self->{options}{'_default_untranslated'} .= " <subjectset>";
+ $self->{options}{'_default_break'} .= " <subjectset>";
+
+ # subjectterm; contains text; Formatted inline or as a displayed block
+ # NOTE: could be in the inline class
+ $self->{options}{'_default_translated'} .= " <subjectterm>";
+ $self->{options}{'_default_break'} .= " <subjectterm>";
+
+ # subscript; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <subscript>";
+ $self->{options}{'_default_inline'} .= " <subscript>";
+
+ # substeps; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <substeps>";
+ $self->{options}{'_default_break'} .= " <substeps>";
+
+ # subtitle; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <subtitle>";
+ $self->{options}{'_default_break'} .= " <subtitle>";
+
+ # superscript; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <superscript>";
+ $self->{options}{'_default_inline'} .= " <superscript>";
+
+ # surname; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <surname>";
+ $self->{options}{'_default_inline'} .= " <surname>";
+
+ #svg:svg
+
+ # symbol; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <symbol>";
+ $self->{options}{'_default_inline'} .= " <symbol>";
+
+ # synopfragment; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <synopfragment>";
+ $self->{options}{'_default_placeholder'} .= " <synopfragment>";
+
+ # synopfragmentref; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <synopfragmentref>";
+ $self->{options}{'_default_inline'} .= " <synopfragmentref>";
+
+ # synopsis; contains text; verbatim
+ $self->{options}{'_default_translated'} .= " W<synopsis>";
+ $self->{options}{'_default_placeholder'} .= " <synopsis>";
+
+ # systemitem; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <systemitem>";
+ $self->{options}{'_default_inline'} .= " <systemitem>";
+
+ # TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+
+ # table; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <table>";
+ $self->{options}{'_default_placeholder'} .= " <table>";
+
+ # tag; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <tag>";
+ $self->{options}{'_default_inline'} .= " <tag>";
+
+ # task; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <task>";
+ $self->{options}{'_default_placeholder'} .= " <task>";
+
+ # taskprerequisites; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <taskprerequisites>";
+ $self->{options}{'_default_break'} .= " <taskprerequisites>";
+
+ # taskrelated; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <taskrelated>";
+ $self->{options}{'_default_break'} .= " <taskrelated>";
+
+ # tasksummary; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <tasksummary>";
+ $self->{options}{'_default_break'} .= " <tasksummary>";
+
+ # tbody; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <tbody>";
+ $self->{options}{'_default_break'} .= " <tbody>";
+
+ # td; contains text;
+ $self->{options}{'_default_translated'} .= " <td>";
+ $self->{options}{'_default_break'} .= " <td>";
+
+ # term; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <term>";
+ $self->{options}{'_default_break'} .= " <term>";
+
+ # termdef; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <termdef>";
+ $self->{options}{'_default_inline'} .= " <termdef>";
+
+ # tertiary; contains text; Suppressed
+ $self->{options}{'_default_translated'} .= " <tertiary>";
+ $self->{options}{'_default_placeholder'} .= " <tertiary>";
+
+ # tertiaryie; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <tertiaryie>";
+ $self->{options}{'_default_break'} .= " <tertiaryie>";
+
+ # textdata; does not contain text; Formatted inline or as a displayed block
+ # NOTE: could be in the inline class
+ $self->{options}{'_default_untranslated'} .= " <textdata>";
+ $self->{options}{'_default_break'} .= " <textdata>";
+ $self->{options}{'_default_attributes'} .= ' <textdata>fileref';
+
+ # textobject; does not contain text; Formatted inline or as a displayed block
+ # NOTE: could be in the inline class
+ $self->{options}{'_default_untranslated'} .= " <textobject>";
+ $self->{options}{'_default_break'} .= " <textobject>";
+
+ # tfoot; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <tfoot>";
+ $self->{options}{'_default_break'} .= " <tfoot>";
+
+ # tgroup; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <tgroup>";
+ $self->{options}{'_default_break'} .= " <tgroup>";
+
+ # th; contains text;
+ $self->{options}{'_default_translated'} .= " <th>";
+ $self->{options}{'_default_break'} .= " <th>";
+
+ # thead; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <thead>";
+ $self->{options}{'_default_break'} .= " <thead>";
+
+ # tip; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <tip>";
+ $self->{options}{'_default_break'} .= " <tip>";
+
+ # title; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <title>";
+ $self->{options}{'_default_break'} .= " <title>";
+
+ # titleabbrev; contains text; Formatted inline or as a displayed block
+ # NOTE: could be in the inline class
+ $self->{options}{'_default_translated'} .= " <titleabbrev>";
+ $self->{options}{'_default_break'} .= " <titleabbrev>";
+
+ # toc; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <toc>";
+ $self->{options}{'_default_break'} .= " <toc>";
+
+ # tocback; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <tocback>";
+ $self->{options}{'_default_break'} .= " <tocback>";
+
+ # tocchap; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <tocchap>";
+ $self->{options}{'_default_break'} .= " <tocchap>";
+
+ # tocdiv; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <tocdiv>";
+ $self->{options}{'_default_break'} .= " <tocdiv>";
+
+ # tocentry; contains text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <tocentry>";
+ $self->{options}{'_default_break'} .= " <tocentry>";
+
+ # tocfront; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_translated'} .= " <tocfront>";
+ $self->{options}{'_default_break'} .= " <tocfront>";
+
+ # toclevel1; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <toclevel1>";
+ $self->{options}{'_default_break'} .= " <toclevel1>";
+
+ # toclevel2; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <toclevel2>";
+ $self->{options}{'_default_break'} .= " <toclevel2>";
+
+ # toclevel3; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <toclevel3>";
+ $self->{options}{'_default_break'} .= " <toclevel3>";
+
+ # toclevel4; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <toclevel4>";
+ $self->{options}{'_default_break'} .= " <toclevel4>";
+
+ # toclevel5; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <toclevel5>";
+ $self->{options}{'_default_break'} .= " <toclevel5>";
+
+ # tocpart; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <tocpart>";
+ $self->{options}{'_default_break'} .= " <tocpart>";
+
+ # token; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <token>";
+ $self->{options}{'_default_inline'} .= " <token>";
+
+ # tr; does not contain text;
+ $self->{options}{'_default_untranslated'} .= " <tr>";
+ $self->{options}{'_default_break'} .= " <tr>";
+
+ # trademark; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <trademark>";
+ $self->{options}{'_default_inline'} .= " <trademark>";
+
+ # type; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <type>";
+ $self->{options}{'_default_inline'} .= " <type>";
+
+ # UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU
+
+ # ulink; contains text; Formatted inline; v4, not in v5
+ $self->{options}{'_default_translated'} .= " <ulink>";
+ $self->{options}{'_default_inline'} .= " <ulink>";
+
+ # uri; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <uri>";
+ $self->{options}{'_default_inline'} .= " <uri>";
+
+ # userinput; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <userinput>";
+ $self->{options}{'_default_inline'} .= " <userinput>";
+
+ # VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV
+
+ # varargs; empty element;
+ $self->{options}{'_default_untranslated'} .= " <varargs>";
+ $self->{options}{'_default_inline'} .= " <varargs>";
+
+ # variablelist; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <variablelist>";
+ $self->{options}{'_default_placeholder'} .= " <variablelist>";
+
+ # varlistentry; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <varlistentry>";
+ $self->{options}{'_default_break'} .= " <varlistentry>";
+
+ # varname; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <varname>";
+ $self->{options}{'_default_inline'} .= " <varname>";
+
+ # videodata; contains text; Formatted inline or as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <videodata>";
+ $self->{options}{'_default_break'} .= " <videodata>";
+ $self->{options}{'_default_attributes'} .= ' <videodata>fileref';
+
+ # videoobject; contains text; Formatted inline or as a displayed block
+ $self->{options}{'_default_untranslated'} .= " <videoobject>";
+ $self->{options}{'_default_break'} .= " <videoobject>";
+
+ # void; empty element;
+ $self->{options}{'_default_untranslated'} .= " <void>";
+ $self->{options}{'_default_inline'} .= " <void>";
+
+ # volumenum; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <volumenum>";
+ $self->{options}{'_default_inline'} .= " <volumenum>";
+
+ # WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW
+
+ # warning; does not contain text; Formatted as a displayed block.
+ $self->{options}{'_default_untranslated'} .= " <warning>";
+ $self->{options}{'_default_break'} .= " <warning>";
+
+ # wordasword; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <wordasword>";
+ $self->{options}{'_default_inline'} .= " <wordasword>";
+
+ # XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+ # xref; empty element;
+ $self->{options}{'_default_untranslated'} .= " <xref>";
+ $self->{options}{'_default_inline'} .= " <xref>";
+
+ # YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY
+
+ # year; contains text; Formatted inline
+ $self->{options}{'_default_translated'} .= " <year>";
+ $self->{options}{'_default_inline'} .= " <year>";
+
+ # xreflabel is the label to use when referencing an element that has no title
+ $self->{options}{'_default_attributes'} .= " xreflabel";
+
+ # ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ
+
+ $self->{options}{'_default_attributes'} .= '
+ lang
+ xml:lang';
+
+ print "Call treat_options\n"
+ if $self->{options}{'debug'};
+ $self->treat_options;
+}
diff --git a/lib/Locale/Po4a/Gemtext.pm b/lib/Locale/Po4a/Gemtext.pm
new file mode 100644
index 0000000..7be810f
--- /dev/null
+++ b/lib/Locale/Po4a/Gemtext.pm
@@ -0,0 +1,193 @@
+#!/usr/bin/env perl -w
+
+# Po4a::Gemtext.pm
+#
+# extract and translate translatable strings from a Gemtext documents
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+########################################################################
+
+require Exporter;
+
+package Locale::Po4a::Gemtext;
+
+use 5.006;
+use strict;
+use warnings;
+
+use vars qw(@ISA @EXPORT @AUTOLOAD);
+@ISA = qw(Locale::Po4a::TransTractor);
+@EXPORT = qw();
+
+use Locale::Po4a::TransTractor qw(process new);
+use Locale::Po4a::Common;
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Gemtext - convert Gettext documents from/to PO files.
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::Gemtext is a module to help the translation of Gemtext documents into
+other [human] languages.
+
+=cut
+
+sub initialize { }
+
+sub parse {
+ my $self = shift;
+
+ my ( $line, $ref ) = $self->shiftline();
+
+ while ( defined($line) ) {
+ chomp($line);
+
+ $self->parse_heading( $line, $ref )
+ or $self->parse_preformatted_text( $line, $ref )
+ or $self->parse_list( $line, $ref )
+ or $self->parse_quote( $line, $ref )
+ or $self->parse_link( $line, $ref )
+ or $self->pushline( $self->translate( $line, $ref, "paragraph" ) . "\n" );
+
+ ( $line, $ref ) = $self->shiftline();
+ }
+}
+
+sub parse_heading() {
+ my $self = shift;
+ my $line = shift;
+ my $ref = shift;
+
+ $line =~ m/^(#{1,3}) *(.+)/ or return;
+ my $level = $1;
+ my $content = $2;
+
+ $self->pushline( "$level " . $self->translate( $content, $ref, "heading $level" ) . "\n" );
+
+ return 1;
+}
+
+sub parse_preformatted_text() {
+ my $self = shift;
+ my $line = shift;
+ my $ref = shift;
+
+ $line =~ m/^(``` *)(.*)/ or return;
+ my $prefix = $1;
+ my $content = $2;
+
+ my $toggle_line = $prefix;
+ $toggle_line .= $self->translate( $content, $ref, "alt text" ) if $content;
+ $self->pushline("$toggle_line\n");
+
+ my $paragraph;
+ ( $line, $ref ) = $self->shiftline();
+
+ while ( defined($line) ) {
+ chomp($line);
+
+ if ( $line =~ m/^```/ ) {
+ $self->pushline( $self->translate( $paragraph, $ref, "preformatted text" ) . "\n" );
+ $self->pushline("$line\n");
+
+ return 1;
+ }
+
+ if ($paragraph) {
+ $paragraph .= "\n$line";
+ } else {
+ $paragraph = $line;
+ }
+
+ ( $line, $ref ) = $self->shiftline();
+ }
+}
+
+sub parse_list() {
+ my $self = shift;
+ my $line = shift;
+ my $ref = shift;
+
+ $line =~ m/^\* (.+)/ or return;
+ my $content = $1;
+
+ $self->pushline( "* " . $self->translate( $content, $ref, "list" ) . "\n" );
+
+ return 1;
+}
+
+sub parse_quote() {
+ my $self = shift;
+ my $line = shift;
+ my $ref = shift;
+
+ $line =~ m/^(> *)(.+)/ or return;
+ my $prefix = $1;
+ my $content = $2;
+
+ $self->pushline( $prefix . $self->translate( $content, $ref, "quote" ) . "\n" );
+
+ return 1;
+}
+
+sub parse_link() {
+ my $self = shift;
+ my $line = shift;
+ my $ref = shift;
+
+ $line =~ m/^(=>[ \t]+[^ \t]+)(?:([ \t]+)(.*))?/ or return;
+ my $prefix = $1;
+ my $separator = $2;
+ my $content = $3;
+
+ my $result = $prefix;
+ $result .= $separator . $self->translate( $content, $ref, "link" )
+ if $content;
+ $self->pushline( $result . "\n" );
+
+ return 1;
+}
+
+1;
+
+=head1 STATUS OF THIS MODULE
+
+Tested successfully on simple Gemtext files, such as the official Gemtext documentation.
+
+=head1 SEE ALSO
+
+L<Locale::Po4a::TransTractor(3pm)>, L<po4a(7)|po4a.7>
+
+=head1 AUTHORS
+
+ gemmaro <gemmaro.dev@gmail.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright © 2024 gemmaro <gemmaro.dev@gmail.com>.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
diff --git a/lib/Locale/Po4a/Guide.pm b/lib/Locale/Po4a/Guide.pm
new file mode 100644
index 0000000..c3b3a83
--- /dev/null
+++ b/lib/Locale/Po4a/Guide.pm
@@ -0,0 +1,153 @@
+#!/usr/bin/perl
+
+# Po4a::Guide.pm
+#
+# extract and translate translatable strings from Guide XML documents.
+#
+# This code extracts plain text from tags and attributes on Guide XML
+# documents.
+#
+# Copyright © 2004 Jordi Vilalta <jvprat@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+########################################################################
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Guide - convert Guide XML documents from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::Guide is a module to help in the translation of the Gentoo
+Linux documentation in the Guide XML format into other [human] languages.
+
+This format is documented here: http://www.gentoo.org/doc/en/xml-guide.xml
+
+=head1 STATUS OF THIS MODULE
+
+This module is fully functional, as it relies in the L<Locale::Po4a::Xml>
+module. This only defines the translatable tags and attributes.
+
+The only known issue is that it doesn't include files with the <include
+href="..."> tag, but you can translate all those files alone, and it's usually
+better to have them separated.
+
+=head1 SEE ALSO
+
+L<Locale::Po4a::TransTractor(3pm)>, L<Locale::Po4a::Xml(3pm)>, L<po4a(7)|po4a.7>
+
+=head1 AUTHORS
+
+ Jordi Vilalta <jvprat@gmail.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright © 2004 Jordi Vilalta <jvprat@gmail.com>
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
+
+package Locale::Po4a::Guide;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+use Locale::Po4a::Common;
+use Locale::Po4a::Xml;
+
+use vars qw(@ISA);
+@ISA = qw(Locale::Po4a::Xml);
+
+sub initialize {
+ my $self = shift;
+ my %options = @_;
+
+ #TODO: <include href="..."> includes a file
+ $self->SUPER::initialize(%options);
+ $self->{options}{'_default_translated'} .= '
+ w<abstract>
+ <author>
+ <b>
+ <brite>
+ <c>
+ <codenote>
+ <comment>
+ <const>
+ <date>
+ w<dd>
+ w<dt>
+ <e>
+ <i>
+ <ident>
+ w<impo>
+ <keyword>
+ w<li>
+ <mail>
+ w<note>
+ w<p>
+ <path>
+ W<pre>
+ <stmt>
+ <sub>
+ w<subtitle>
+ w<summary>
+ <sup>
+ w<th>
+ w<ti>
+ w<title>
+ <uri>
+ <var>
+ <version>
+ w<warn>';
+ $self->{options}{'_default_attributes'} .= '
+ <author>title
+ <figure>caption
+ <figure>link
+ <figure>short
+ <guide>lang
+ <guide>link
+ <p>by
+ <pre>caption';
+ $self->{options}{'_default_inline'} .= '
+ <b>
+ <brite>
+ <c>
+ <const>
+ <e>
+ <i>
+ <ident>
+ <img>
+ <keyword>
+ <mail>
+ <path>
+ <stmt>
+ <sub>
+ <sup>
+ <uri>
+ <var>';
+ print "Call treat_options\n" if $self->{options}{'debug'};
+ $self->treat_options;
+}
diff --git a/lib/Locale/Po4a/Halibut.pm b/lib/Locale/Po4a/Halibut.pm
new file mode 100644
index 0000000..0e6926e
--- /dev/null
+++ b/lib/Locale/Po4a/Halibut.pm
@@ -0,0 +1,448 @@
+#!/usr/bin/perl -w
+
+# Copyright © 2004-2008 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>
+#
+# This file is part of po4a.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with po4a; if not, write to the Free Software
+# Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+########################################################################
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Halibut - convert Halibut documents and derivates from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::Halibut is a module to help the translation of Halibut documents into
+other [human] languages.
+
+This module contains the definitions of common Halibut commands and
+environments.
+
+=head1 STATUS OF THIS MODULE
+
+This module is still beta.
+Please send feedback and feature requests.
+
+=head1 CAVEAT
+
+Some constructs are badly supported. The known ones are documented below.
+
+=head2 Verbatim blocks
+
+ \c foo
+ \c bar
+
+The verbatim block is not considered as a whole. Each line will be
+translated separately.
+
+=head1 SEE ALSO
+
+L<Locale::Po4a::TeX(3pm)|Locale::Po4a::TeX>,
+L<Locale::Po4a::TransTractor(3pm)|Locale::Po4a::TransTractor>,
+L<po4a(7)|po4a.7>
+
+=head1 AUTHORS
+
+ Nicolas François <nicolas.francois@centraliens.net>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright © 2004-2008 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
+
+package Locale::Po4a::Halibut;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+require Exporter;
+use vars qw($VERSION @ISA @EXPORT);
+$VERSION = $Locale::Po4a::TeX::VERSION;
+@ISA = qw(Locale::Po4a::TeX);
+@EXPORT = qw();
+
+use Locale::Po4a::Common;
+use Locale::Po4a::TeX;
+use subs qw(&parse_definition_file
+ &register_generic_command &is_closed &translate_buffer
+ &register_verbatim_environment
+ &generic_command
+ &in_verbatim
+ &get_leading_command);
+*parse_definition_file = \&Locale::Po4a::TeX::parse_definition_file;
+*get_leading_command = \&Locale::Po4a::TeX::get_leading_command;
+*register_generic_command = \&Locale::Po4a::TeX::register_generic_command;
+*register_verbatim_environment = \&Locale::Po4a::TeX::register_verbatim_environment;
+*generic_command = \&Locale::Po4a::TeX::generic_command;
+*is_closed = \&Locale::Po4a::TeX::is_closed;
+*in_verbatim = \&Locale::Po4a::TeX::in_verbatim;
+*translate_buffer = \&Locale::Po4a::TeX::translate_buffer;
+use vars qw($RE_ESCAPE $ESCAPE
+ $RE_VERBATIM
+ $RE_COMMENT $RE_PRE_COMMENT
+ $no_wrap_environments $separated_commands
+ %commands %environments
+ %command_categories %separated
+ %env_separators %debug
+ %translate_buffer_env
+ @exclude_include @comments);
+*RE_ESCAPE = \$Locale::Po4a::TeX::RE_ESCAPE;
+*ESCAPE = \$Locale::Po4a::TeX::ESCAPE;
+*RE_VERBATIM = \$Locale::Po4a::TeX::RE_VERBATIM;
+*RE_COMMENT = \$Locale::Po4a::TeX::RE_COMMENT;
+*RE_PRE_COMMENT = \$Locale::Po4a::TeX::RE_PRE_COMMENT;
+*no_wrap_environments = \$Locale::Po4a::TeX::no_wrap_environments;
+*separated_commands = \$Locale::Po4a::TeX::separated_commands;
+*commands = \%Locale::Po4a::TeX::commands;
+*environments = \%Locale::Po4a::TeX::environments;
+*command_categories = \%Locale::Po4a::TeX::command_categories;
+*separated = \%Locale::Po4a::TeX::separated;
+*env_separators = \%Locale::Po4a::TeX::env_separators;
+*debug = \%Locale::Po4a::TeX::debug;
+*translate_buffer_env = \%Locale::Po4a::TeX::translate_buffer_env;
+*exclude_include = \@Locale::Po4a::TeX::exclude_include;
+*comments = \@Locale::Po4a::TeX::comments;
+
+#$ESCAPE = "\\";
+#$RE_ESCAPE = "\\\\";
+#$RE_VERBATIM = "\@example";
+$RE_VERBATIM = "PO4A_FAKE_VERBATIM";
+
+#$RE_COMMENT = "\\\@(?:c|comment)\\b";
+$RE_COMMENT = "PO4A_FAKE_COMMENT";
+
+sub docheader {
+ return "\\# This file was generated with po4a. Translate the source file.\n" . "\n";
+}
+
+my %break_line = ();
+
+# translate_line_command indicate if the arguments to the command handled
+# by line_command() should be translated:
+# undefined: arguments are not translated
+# 0: there should be no arguments
+# 1: arguments should be translated
+my %translate_line_command = ();
+
+sub parse {
+ my $self = shift;
+ my ( $line, $ref );
+ my $paragraph = ""; # Buffer where we put the paragraph while building
+ my @env = (); # environment stack
+ my $t = "";
+
+ # $docheader_pushed = 0;
+
+ LINE:
+ undef $self->{type};
+ ( $line, $ref ) = $self->shiftline();
+
+ while ( defined($line) ) {
+ chomp($line);
+ $self->{ref} = "$ref";
+
+ if ( $line =~ /^\s*\\\s*po4a\s*:/ ) {
+ parse_definition_line( $self, $line );
+ goto LINE;
+ }
+
+ my $t;
+ ( $paragraph, $t, @env ) = parse_line( $self, $line, $paragraph, \@env );
+ $self->pushline($t);
+
+ # Reinit the loop
+ ( $line, $ref ) = $self->shiftline();
+ undef $self->{type};
+ }
+
+ if ( length($paragraph) ) {
+ ( $t, @env ) = translate_buffer( $self, $paragraph, undef, @env );
+ $self->pushline($t);
+ $paragraph = "";
+ }
+} # end of parse
+
+sub parse_line {
+ my $self = shift;
+ my $line = shift;
+ my $paragraph = shift;
+ my $env = shift;
+ my @e = @$env;
+ my $translated = "";
+
+ my $closed = 1;
+ if ( !in_verbatim(@e) ) {
+ $closed = is_closed($paragraph);
+ }
+
+ # if (not $closed) {
+ # print "not closed. line: '$line'\n para: '$paragraph'\n";
+ # }
+
+ #warn "closed'$closed'$line'$paragraph'\n";
+ if ( $closed and $line =~ /^\s*$/ ) {
+
+ # An empty line. This indicates the end of the current
+ # paragraph.
+ $paragraph .= $line . "\n";
+ if ( length($paragraph) ) {
+ ( $translated, @e ) = translate_buffer( $self, $paragraph, undef, @e );
+ $paragraph = "";
+ }
+ } elsif ( $line =~ m/^\\input / ) {
+ if ( length($paragraph) ) {
+ ( $translated, @e ) = translate_buffer( $self, $paragraph, undef, @e );
+ $paragraph = "";
+ }
+ $translated .= $line . "\n";
+ } elsif ( $line =~ m/^$RE_COMMENT/ ) {
+ $translated = $line . "\n";
+ } elsif ( $closed
+ and ( is_closed($line) or $line =~ /^\\[ce] / )
+ and ( $line =~ /^\\([^ ]*?)( +.*)?$/ ) )
+ {
+ my ( $command, $variant, $args, $buffer );
+ if ( $break_line{$1} ) {
+ my @a = ();
+ $variant = "";
+ $args = \@a;
+ $command = $1;
+ $buffer = $2 || "";
+ } else {
+ ( $command, $variant, $args, $buffer ) = get_leading_command( $self, $line );
+ }
+ if (
+ $break_line{$command}
+ and not( ( $command eq "c" or $command eq "e" )
+ and defined $args->[0] )
+ )
+ {
+ # NOTE: This is just a workaround: "\c " is a verbatim line
+ # and \c{...} is just a verbatim block
+ my $t;
+ if ( length($paragraph) ) {
+ ( $t, @e ) = translate_buffer( $self, $paragraph, undef, @e );
+ $translated .= $t;
+ $paragraph = "";
+ }
+ ( $t, @e ) = generic_command( $self, $command, $variant, $args, \@e );
+ $translated .= $t;
+
+ my $arg = $buffer;
+ my @args = ();
+ if ( defined $arg and length $arg ) {
+
+ # FIXME: keep the spaces ?
+ $arg =~ s/\s*$//s;
+ @args = ( " ", $arg );
+ }
+ ( $t, @e ) = line_command( $self, $command, "", \@args, \@e, 1 );
+ $translated .= $t . "\n";
+ } else {
+
+ # continue the same paragraph
+ $paragraph .= $line . "\n";
+ }
+ } else {
+
+ # continue the same paragraph
+ $paragraph .= $line . "\n";
+ }
+
+ return ( $paragraph, $translated, @e );
+}
+
+sub line_command {
+ my $self = shift;
+ my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift );
+ my $no_wrap = shift;
+ print "line_command($command,$variant,@$args,@$env,$no_wrap)="
+ if ( $debug{'commands'} );
+
+ my $translated = ""; # $ESCAPE.$command;
+ my $line = $args->[1];
+
+ #warn "line_command: '$line'\n";
+ if ( defined $line and length $line ) {
+ if ( defined $translate_line_command{$command}
+ and $translate_line_command{$command} )
+ {
+ # $no_wrap could be forced to 1, but it should already be set
+ $no_wrap = 1;
+ $line =~ s/^(\s*)//;
+ my $spaces = $1 || "";
+ my ( $t, $e ) = $self->translate_buffer( $line, $no_wrap, @$env, $command );
+
+ #warn "line_command: '$t'\n";
+ $translated .= $spaces . $t;
+ } else {
+ $translated .= $line;
+ }
+ }
+ print "($translated,@$env)\n"
+ if ( $debug{'commands'} );
+ return ( $translated, @$env );
+}
+
+# 3.2 Simple inline formatting commands
+# 3.2.1 `\e': Emphasising text
+# inline. extract only if alone
+register_generic_command("-e,{_}");
+$translate_line_command{e} = 1;
+$break_line{e} = 1;
+
+# 3.2.2 `\c' and `\cw': Displaying computer code inline
+# inline. extract only if alone
+# NOTE: \c and \c{...} differs.
+# \c is marked as a break_line command, but this is reversed in
+# parse_line when the \c{...} form is used.
+register_generic_command("-c,{_}");
+$translate_line_command{c} = 1;
+$break_line{c} = 1;
+register_generic_command("-cw,{_}");
+
+# 3.2.3 `\q': Quotation marks
+# inline. extract only if alone
+register_generic_command("-q,{_}");
+
+# 3.2.4 `\-' and `\_': Non-breaking hyphens and spaces
+# inline.
+
+# 3.2.5 `\date': Automatic date generation
+# inline.
+
+# 3.2.6 `\W': WWW hyperlinks
+# inline. extract only if alone
+register_generic_command("-W,{_}");
+
+# 3.2.7 `\u': Specifying arbitrary Unicode characters
+# inline.
+
+# 3.2.8 `\k' and `\K': Cross-references to other sections
+# inline. They should not be translated. extract only if alone
+# FIXME: it will expand to "Section ..." or "section ..."
+# Section and section should be translated.
+register_generic_command("-k,{}");
+register_generic_command("-K,{}");
+
+# 3.2.9 `\#': Inline comments
+# inline. But can be removed from the head or tail.
+register_generic_command("-#,{}");
+$translate_line_command{"#"} = 0;
+$break_line{"#"} = 1;
+
+# 3.3 Paragraph-level commands
+# 3.3.1 `\c': Displaying whole paragraphs of computer code
+# see above
+# 3.3.2 `\b', `\n', `\dt', `\dd', `\lcont': Lists
+register_generic_command("*b,");
+register_generic_command("*n,"); # FIXME: \n{this-one} not supported?
+register_generic_command("*dd,");
+register_generic_command("*dt,");
+
+# 3.3.2.4 Continuing list items into further paragraphs
+register_generic_command("*lcont,{_}"); # registered, but redefined
+$commands{lcont} = sub {
+ my $self = shift;
+ my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift );
+ my $no_wrap = shift;
+ my ( $t, @e ) = ( "", @$env );
+ my $translated = $ESCAPE . $command . $variant . "{";
+ my $text = $args->[1];
+ my $paragraph = "";
+ while ($text =~ s/^(.*?)\n(.*)$/$2/s
+ or $text =~ s/^([^\n]+)$//s )
+ {
+ ( $paragraph, $t, @e ) = parse_line( $self, $1, $paragraph, \@e );
+ $translated .= $t;
+ }
+ ( $t, @e ) = translate_buffer( $self, $paragraph, $no_wrap, @e );
+ $translated .= $t;
+ $translated .= "}";
+
+ return ( $translated, @$env );
+};
+
+# 3.3.3 `\rule': Horizontal rules
+register_generic_command("rule,"); # TODO: TBC does it break paragraphs
+
+# 3.3.4 `\quote': Indenting multiple paragraphs as a long quotation
+register_generic_command("*quote,{_}"); # TODO: TBC
+
+# 3.3.5 `\C', `\H', `\S', `\A', `\U': Chapter and section headings
+# FIXME: What happens if the line is rewrapped?
+# NOTE: The name of the section is not translated.
+register_generic_command("*C,{}");
+register_generic_command("*S0,{}"); # Synonym for \H
+register_generic_command("*H,{}");
+register_generic_command("*S,{}");
+register_generic_command("*S1,{}"); # Synonym for \S
+register_generic_command("*S2,{}");
+register_generic_command("*S3,{}"); # FIXME: and so on
+
+# FIXME: \S{question-about-fish}{Question}
+register_generic_command("*A,{}");
+register_generic_command("*U,{}");
+
+# 3.3.6 `\copyright', `\title', `\versionid': Miscellaneous blurb commands
+register_generic_command("*title,");
+register_generic_command("*copyright,");
+register_generic_command("*versionid,");
+
+# 3.4 Creating a bibliography
+# nocite
+register_generic_command("*nocite,{}");
+
+# B
+register_generic_command("*B,{}");
+
+# BR
+register_generic_command("*BR,{}"); # FIXME: \BR{freds-book} [Fred1993]
+
+# 3.5 Creating an index
+# 3.5.1 Simple indexing
+# \i: inline \i{index} or \i\x{grep}
+# \ii
+register_generic_command("-ii,{_}");
+
+# \IM: inline. Variable number of arguments
+register_generic_command("*IM,{_}");
+$translate_line_command{IM} = 1;
+$break_line{IM} = 1;
+
+# 3.6 Configuring Halibut
+# \cfg
+register_generic_command("+cfg,{}{_}"); # NOTE: the new command is not registered
+
+# 3.7 Defining macros
+register_generic_command("*define,{}"); # FIXME: line
+$translate_line_command{define} = 1;
+$break_line{define} = 1;
+
+1;
diff --git a/lib/Locale/Po4a/InProgress/Debconf.pm b/lib/Locale/Po4a/InProgress/Debconf.pm
new file mode 100644
index 0000000..ab5d7c2
--- /dev/null
+++ b/lib/Locale/Po4a/InProgress/Debconf.pm
@@ -0,0 +1,226 @@
+#!/usr/bin/perl -w
+
+# Po4a::Debconf.pm
+#
+# extract and translate translatable strings from debconf templates
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+########################################################################
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Debconf - convert debconf templates from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::Debconf is a module to help the translation of the debconf
+templates into other [human] languages.
+
+=head1 OPTIONS ACCEPTED BY THIS MODULE
+
+NONE.
+
+=head1 STATUS OF THIS MODULE
+
+Not tested.
+
+
+DO NOT USE THIS MODULE TO PRODUCE TEMPLATES. It's only good to extract data.
+
+
+=cut
+
+# Note that the following works. It may help to write a multi-translate
+
+# sub toto {
+# do shift;
+# }
+# toto({print "ok"});
+
+package Locale::Po4a::Debconf;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+require Exporter;
+use vars qw(@ISA @EXPORT);
+@ISA = qw(Locale::Po4a::TransTractor);
+@EXPORT = qw();
+
+use Locale::Po4a::TransTractor;
+use Locale::Po4a::Common;
+
+sub initialize { }
+
+sub parse {
+ my $self = shift;
+
+ my ( $line, $lref );
+
+ my ( $field, $value, $extended, $ref, $type ) = ( '', '', '', '', '' );
+ my $verb = 0; # whether we are in verbatim mode
+
+ my $escape = sub {
+ my $str = shift;
+ $str =~ s/"/\\"/g;
+ return $str;
+ };
+
+ # function in charge of pushing the accumulated material to output
+ my $handle_field = sub {
+ my $field = shift;
+ my $value = shift;
+ my $extended = shift;
+ my $ref = shift;
+ my $type = shift;
+
+ $field =~ s/^(_*)(.*)$/$2/;
+ my $undercount = length($1) || 0; # number of _ leading the field name
+
+ # Only one leading _: regular translated field
+ if ( $undercount == 1 ) {
+
+ # the untranslated field
+ $self->pushline("$field: $value");
+ map { $self->pushline( ' ' . ( $_ || '.' ) ) } split( /\n/, $extended );
+
+ my $eval = '$self->pushline("' . $field . '[FIXME:LANGCODE.ENCODING]: "'; # what to multi-eval
+ $eval .=
+ '.$self->translate("' . $escape->($value) . "\",\"$ref\",\"$type/$field\",wrap=>1)" . '."\n".' . "\n";
+
+ my $count = 0;
+ foreach my $para ( split( /\n\n/, $extended ) ) {
+ my $wrap = 1;
+ if ( $para =~ /(^|\n)\s/m ) {
+ $wrap = 0;
+ }
+ $eval .= ( $count ? '.' : '' );
+ $count++;
+ $eval .=
+ '$self->translate("'
+ . $escape->($para)
+ . "\",\"$ref\",\"$type/$field\[$count\]\",wrap=>$wrap)" . "\n";
+ }
+
+ $eval .= ")\n";
+ print STDERR $eval if $self->{options}{'debug'};
+ eval $eval;
+ print STDERR "XXXXXXXXXXXXXXXXX\n" if $self->{options}{'debug'};
+
+ # two leading _: split on coma and multi-translate each part. No extended value.
+ } elsif ( $undercount == 2 ) {
+ $self->pushline("$field: $value"); # the untranslated field
+
+ my $eval = '$self->pushline("' . $field . 'FIXME[LANGCODE]: "'; # what to multi-eval
+
+ my $first = 1;
+ for my $part ( split( /(?<!\\), */, $value, 0 ) ) {
+ $part =~ s/\\,/,/g;
+ $eval .=
+ ( $first ? '' : '.", "' )
+ . '.$self->translate("'
+ . $escape->($part)
+ . "\",\"$ref\",\"$type/$field chunk\",wrap=>1)";
+ $first = 0;
+ }
+ $eval .= ")\n";
+
+ print $eval if $self->{options}{'debug'};
+ eval $eval;
+
+ # no leading _: don't touch it
+ } else {
+ $self->pushline("$field: $value");
+ map { $self->pushline( ' ' . ( $_ || '.' ) ) } split( /\n/, $extended );
+ }
+ };
+
+ # main loop
+ ( $line, $lref ) = $self->shiftline();
+
+ while ( defined($line) ) {
+
+ # a new field (within a stanza)
+ if ( $line =~ /^([-_.A-Za-z0-9]*):\s?(.*)/ ) {
+
+ $handle_field->( $field, $value, $extended, $ref, $type ); # deal with previously accumulated
+ ( $field, $value, $extended, $verb ) = ( '', '', '', 0 );
+
+ $field = $1;
+ $value = $2;
+ $value =~ s/\s*$//;
+ $extended = '';
+ $ref = $lref;
+
+ $type = $value if $field eq 'Type';
+
+ die wrap_mod( "po4a::debconf", dgettext( "po4a", "Translated field in master document: %s" ), $field )
+ if $field =~ m/-/;
+
+ # paragraph separator within extended value
+ } elsif ( $line =~ /^\s\.$/ ) {
+ $extended .= "\n\n";
+
+ # continuation of extended value
+ } elsif ( $line =~ /^\s(.*)/ ) {
+
+ my $bit = $1;
+ $verb = 1 if ( $bit =~ m/^\s/ );
+
+ $bit =~ s/\s*$//;
+
+ $extended .= ( $verb ? "\n" : ' ' ) if length $extended && $extended !~ /[\n ]$/;
+ $extended .= $bit . ( $verb ? "\n" : "" );
+
+ # this may be an empty line closing the stanza, a comment or even a parse error (if file not DebConf-clean).
+ } else {
+
+ $handle_field->( $field, $value, $extended, $ref, $type );
+ ( $field, $value, $extended, $verb ) = ( '', '', '', 0 );
+
+ $self->pushline($line);
+
+ }
+
+ ( $line, $lref ) = $self->shiftline();
+ }
+
+ $handle_field->( $field, $value, $extended, $ref, $type );
+}
+
+1;
+
+=head1 AUTHORS
+
+This module is loosely inspired from both po-debconf and debconf code. The
+adaptation for po4a was done by:
+
+ Martin Quinson (mquinson#debian.org)
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright © 2005 SPI, Inc.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
diff --git a/lib/Locale/Po4a/InProgress/NewsDebian.pm b/lib/Locale/Po4a/InProgress/NewsDebian.pm
new file mode 100644
index 0000000..167aa1b
--- /dev/null
+++ b/lib/Locale/Po4a/InProgress/NewsDebian.pm
@@ -0,0 +1,155 @@
+#!/usr/bin/perl -w
+
+# Po4a::NewsDebian.pm
+#
+# extract and translate translatable strings from a NEWS.Debian documents
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+########################################################################
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::NewsDebian - convert NEWS.Debian documents from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::NewsDebian is a module to help the translation of the
+NEWS.Debian files into other [human] languages. Those files are where
+maintainer are supposed to write the important news about their package.
+
+=head1 OPTIONS ACCEPTED BY THIS MODULE
+
+NONE.
+
+=head1 STATUS OF THIS MODULE
+
+Not tested.
+
+A finer split of the entries may be preferable (search for /^ */, for
+example), but this version is more robust and NEWS.Debian entries are not
+supposed to change that often.
+
+=cut
+
+package Locale::Po4a::NewsDebian;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+require Exporter;
+use vars qw(@ISA @EXPORT);
+@ISA = qw(Locale::Po4a::TransTractor);
+@EXPORT = qw();
+
+use Locale::Po4a::TransTractor;
+use Locale::Po4a::Common;
+
+sub initialize { }
+
+sub parse {
+ my $self = shift;
+
+ my ($blanklines) = (""); # We want to preserve the blank lines inside the entry, and strip the extrem ones
+
+ my ($body) = ""; # the accumulated paragraph
+ my ($bodyref) = "";
+ my ($bodytype) = "";
+
+ my ( $line, $lref );
+
+ # main loop
+ ( $line, $lref ) = $self->shiftline();
+ print "seen >>$line<<\n" if $self->{options}{'debug'};
+ while ( defined($line) ) {
+
+ # Begining of an entry
+ if ( $line =~ m/^(\w[-+0-9a-z.]*) \(([^\(\) \t]+)\)((\s+[-0-9a-z]+)+)\;/i ) {
+
+ die wrap_ref_mod( $lref, "po4a::newsdebian",
+ dgettext( "po4a", "Begin of a new entry before the end of previous one" ) )
+ if ( length($body) );
+
+ $self->pushline( $line . "\n" );
+
+ # Signature of this entry
+ $bodyref = $lref;
+ $bodytype = $line;
+
+ # eat all leading empty lines
+ ( $line, $lref ) = $self->shiftline();
+ while ( defined($line) && $line =~ m/^\s*$/ ) {
+ print "Eat >>$line<<\n" if $self->{options}{'debug'};
+ ( $line, $lref ) = $self->shiftline();
+ }
+
+ # ups, ate one line too much. Put it back.
+ $self->unshiftline( $line, $lref );
+
+ # get ready to read the entry (cleanups)
+ $blanklines = "";
+
+ # End of current entry
+ } elsif ( $line =~ m/^ \-\- (.*) <(.*)> .*$/ )
+ { #((\w+\,\s*)?\d{1,2}\s+\w+\s+\d{4}\s+\d{1,2}:\d\d:\d\d\s+[-+]\d{4}(\s+\([^\\\(\)]\))?) *$/) {
+
+ $self->translate( $body, $bodyref, $bodytype, wrap => 0 );
+ $body = "";
+
+ # non-specific line
+ } else {
+
+ if ( $line =~ /^\s*$/ ) {
+ $blanklines .= "$line";
+ } else {
+ $body .= $blanklines . $line;
+ $blanklines = "";
+ }
+ }
+
+ ( $line, $lref ) = $self->shiftline();
+ print "seen >>" . ( $line || '' ) . "<<\n" if $self->{options}{'debug'};
+ }
+}
+
+1;
+
+=head1 AUTHORS
+
+This module is loosely inspired from /usr/lib/dpkg/parsechangelog/debian, which is:
+
+ Copyright © 1996 Ian Jackson. This is free software; see the GNU
+ General Public License version 2 or later for copying conditions. There
+ is NO warranty.
+
+The adaptation for po4a was done by:
+
+ Martin Quinson (mquinson#debian.org)
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright © 1996 Ian Jackson.
+ Copyright © 2005 SPI, Inc.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
diff --git a/lib/Locale/Po4a/Ini.pm b/lib/Locale/Po4a/Ini.pm
new file mode 100644
index 0000000..f29cd11
--- /dev/null
+++ b/lib/Locale/Po4a/Ini.pm
@@ -0,0 +1,120 @@
+# Locale::Po4a::Ini -- Convert ini files to PO file, for translation.
+#
+# This program is free software; you may redistribute it and/or modify it
+# under the terms of GPL v2.0 or later (see COPYING).
+#
+
+############################################################################
+# Modules and declarations
+############################################################################
+
+use Locale::Po4a::TransTractor qw(process new);
+use Locale::Po4a::Common;
+
+package Locale::Po4a::Ini;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+require Exporter;
+
+use vars qw(@ISA @EXPORT $AUTOLOAD);
+@ISA = qw(Locale::Po4a::TransTractor);
+@EXPORT = qw();
+
+my $debug = 0;
+
+sub initialize { }
+
+sub parse {
+ my $self = shift;
+ my ( $line, $ref );
+ my $par;
+
+ LINE:
+ ( $line, $ref ) = $self->shiftline();
+
+ while ( defined($line) ) {
+ chomp($line);
+ print STDERR "begin\n" if $debug;
+
+ if ( $line =~ /\"/ ) {
+ print STDERR "Start of line containing \".\n" if $debug;
+
+ # Text before the first quote
+ $line =~ m/(^[^"\r\n]*)"/;
+ my $pre_text = $1;
+ print STDERR " PreText=" . $pre_text . "\n" if $debug;
+
+ # The text for translation
+ $line =~ m/"([^\r\n]*)"/;
+ my $quoted_text = $1;
+ print STDERR " QuotedText=" . $quoted_text . "\n" if $debug;
+
+ # Text after last quote
+ $line =~ m/"([^"\n]*$)/;
+ my $post_text = $1;
+ print STDERR " PostText=" . $post_text . "\n" if $debug;
+
+ # Translate the string it
+ $par = $self->translate( $quoted_text, $ref, $pre_text );
+
+ # Escape the \n characters
+ $par =~ s/\n/\\n/g;
+
+ # Now push the result
+ $self->pushline( $pre_text . '"' . $par . '"' . $post_text . "\n" );
+ print STDERR "End of line containing \".\n" if $debug;
+ } else {
+ print STDERR "Other stuff\n" if $debug;
+ $self->pushline("$line\n");
+ }
+
+ # Reinit the loop
+ ( $line, $ref ) = $self->shiftline();
+ }
+}
+
+##############################################################################
+# Module return value and documentation
+##############################################################################
+
+1;
+__END__
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Ini - convert INI files from/to PO files
+
+=head1 DESCRIPTION
+
+Locale::Po4a::Ini is a module to help the translation of INI files into other
+[human] languages.
+
+The module searches for lines of the following format and extracts the quoted
+text:
+
+identificator="text than can be translated"
+
+NOTE: If the text is not quoted, it will be ignored.
+
+=head1 SEE ALSO
+
+L<Locale::Po4a::TransTractor(3pm)>, L<po4a(7)|po4a.7>
+
+=head1 AUTHORS
+
+ Razvan Rusu <rrusu@bitdefender.com>
+ Costin Stroie <cstroie@bitdefender.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright © 2006 BitDefender
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
diff --git a/lib/Locale/Po4a/KernelHelp.pm b/lib/Locale/Po4a/KernelHelp.pm
new file mode 100644
index 0000000..96cd21f
--- /dev/null
+++ b/lib/Locale/Po4a/KernelHelp.pm
@@ -0,0 +1,170 @@
+# Locale::Po4a::KernelHelp -- Convert kernel configuration help from/to PO files
+#
+# This program is free software; you may redistribute it and/or modify it
+# under the terms of GPL v2.0 or later (see COPYING).
+#
+# See gettext documentation for more info about PO files.
+
+############################################################################
+# Modules and declarations
+############################################################################
+
+use Pod::Parser;
+use Locale::Po4a::TransTractor qw(process new);
+use Locale::Po4a::Common;
+
+package Locale::Po4a::KernelHelp;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+require Exporter;
+
+use vars qw(@ISA @EXPORT $AUTOLOAD);
+@ISA = qw(Locale::Po4a::TransTractor);
+@EXPORT = qw(); # new process write read writepo readpo);
+
+my $debug = 0;
+
+sub initialize { }
+
+sub parse {
+ my $self = shift;
+ my ( $line, $ref );
+ my $paragraph = ""; # Buffer where we put the paragraph while building
+ my ($status) = 0; # Syntax of KH is:
+ # description<nl>variable<nl>help text<nl><nl>
+ # Status will be:
+ # 0 1 2 3 0
+
+ my ( $desc, $variable );
+
+ LINE:
+ ( $line, $ref ) = $self->shiftline();
+
+ while ( defined($line) ) {
+ chomp($line);
+ print STDERR "status=$status;Seen >>$line<<:" if $debug;
+
+ if ( $line =~ /^\#/ ) {
+ print STDERR "comment.\n" if $debug;
+ $self->pushline("$line\n");
+ } elsif ( $status == 0 ) {
+ if ( $line =~ /\S/ ) {
+ print STDERR "short desc.\n" if $debug;
+ $desc = $line;
+ $status++;
+ } else {
+ print STDERR "empty line.\n" if $debug;
+ $self->pushline("$line\n");
+ }
+ } elsif ( $status == 1 ) {
+ print STDERR "var name.\n" if $debug;
+ $variable = $line;
+ $status++;
+
+ $self->pushline( $self->translate( $desc, $ref, "desc_$variable" ) . "\n$variable\n" );
+
+ } elsif ( $status == 2 ) {
+ $line =~ s/^ //;
+ if ( $line =~ /\S/ ) {
+ print STDERR "paragraph line.\n" if $debug;
+ $paragraph .= $line . "\n";
+ } else {
+ print STDERR "end of paragraph.\n" if $debug;
+ $status++;
+ $paragraph = $self->translate( $paragraph, $ref, "helptxt_$variable" );
+ $paragraph =~ s/^/ /gm;
+ $self->pushline("$paragraph\n");
+ $paragraph = "";
+ }
+ } elsif ( $status == 3 ) {
+ if ( $line =~ s/^ // ) {
+ if ( $line =~ /\S/ ) {
+ print "begin of paragraph.\n" if $debug;
+ $paragraph = $line . "\n";
+ $status--;
+ } else {
+ print "end of config option.\n" if $debug;
+ $status = 0;
+ $self->pushline("\n");
+ }
+ } else {
+ $self->unshiftline( $line, $ref );
+ $status = 0;
+ }
+ } else {
+ die wrap_ref_mod( $ref, "po4a::kernelhelp", gettext("Syntax error") );
+ }
+
+ # Reinit the loop
+ ( $line, $ref ) = $self->shiftline();
+ }
+}
+
+sub docheader {
+ return <<EOT;
+#
+# *****************************************************
+# * GENERATED FILE, DO NOT EDIT *
+# * THIS IS NO SOURCE FILE, BUT RESULT OF COMPILATION *
+# *****************************************************
+#
+# This file was generated by po4a(7). Do not store it (in VCS, for example),
+# but store the PO file used as source file by pod-translate.
+#
+# In fact, consider this as a binary, and the PO file as a regular .c file:
+# If the PO get lost, keeping this translation up-to-date will be harder.
+#
+EOT
+}
+1;
+
+##############################################################################
+# Module return value and documentation
+##############################################################################
+
+1;
+__END__
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::KernelHelp - convert kernel configuration help from/to PO files
+
+=head1 DESCRIPTION
+
+Locale::Po4a::KernelHelp is a module to help the translation of
+documentation for the Linux kernel configuration options into other [human]
+languages.
+
+=head1 STATUS OF THIS MODULE
+
+This module is just written, and needs more tests. Most of the needed work
+will concern the tools used to parse this file (and configure the kernel),
+so that they accept to read the documentation from another (translated)
+file.
+
+=head1 SEE ALSO
+
+L<Pod::Parser>,
+L<Locale::Po4a::Man(3pm)>,
+L<Locale::Po4a::Pod(3pm)>,
+L<Locale::Po4a::TransTractor(3pm)>,
+L<po4a(7)|po4a.7>
+
+=head1 AUTHORS
+
+ Denis Barbier <barbier@linuxfr.org>
+ Martin Quinson (mquinson#debian.org)
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright © 2002 SPI, Inc.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
diff --git a/lib/Locale/Po4a/LaTeX.pm b/lib/Locale/Po4a/LaTeX.pm
new file mode 100644
index 0000000..c175110
--- /dev/null
+++ b/lib/Locale/Po4a/LaTeX.pm
@@ -0,0 +1,397 @@
+#!/usr/bin/perl -w
+
+# Copyright © 2004, 2005 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>
+#
+# This file is part of po4a.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with po4a; if not, write to the Free Software
+# Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+########################################################################
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::LaTeX - convert LaTeX documents and derivates from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::LaTeX is a module to help the translation of LaTeX documents into
+other [human] languages. It can also be used as a base to build modules for
+LaTeX-based documents.
+
+This module contains the definitions of common LaTeX commands and
+environments.
+
+See the L<Locale::Po4a::TeX(3pm)|Locale::Po4a::TeX> manpage for the list
+of recognized options.
+
+=head1 SEE ALSO
+
+L<Locale::Po4a::TeX(3pm)|Locale::Po4a::TeX>,
+L<Locale::Po4a::TransTractor(3pm)|Locale::Po4a::TransTractor>,
+L<po4a(7)|po4a.7>
+
+=head1 AUTHORS
+
+ Nicolas François <nicolas.francois@centraliens.net>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright © 2004, 2005 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
+
+package Locale::Po4a::LaTeX;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+require Exporter;
+use vars qw($VERSION @ISA @EXPORT);
+$VERSION = $Locale::Po4a::TeX::VERSION;
+@ISA = qw(Locale::Po4a::TeX);
+@EXPORT = qw();
+
+use Locale::Po4a::TeX;
+use subs qw(&generic_command
+ &parse_definition_file
+ &register_generic_command
+ &register_generic_environment);
+*parse_definition_file = \&Locale::Po4a::TeX::parse_definition_file;
+*generic_command = \&Locale::Po4a::TeX::generic_command;
+*register_generic_command = \&Locale::Po4a::TeX::register_generic_command;
+*register_generic_environment = \&Locale::Po4a::TeX::register_generic_environment;
+use vars qw($RE_ESCAPE $ESCAPE
+ $no_wrap_environments
+ %commands %environments
+ %separated_command %separated_environment
+ %command_parameters %environment_parameters
+ %env_separators
+ @exclude_include);
+*RE_ESCAPE = \$Locale::Po4a::TeX::RE_ESCAPE;
+*ESCAPE = \$Locale::Po4a::TeX::ESCAPE;
+*no_wrap_environments = \$Locale::Po4a::TeX::no_wrap_environments;
+*commands = \%Locale::Po4a::TeX::commands;
+*environments = \%Locale::Po4a::TeX::environments;
+*separated_command = \%Locale::Po4a::TeX::separated_command;
+*separated_environment = \%Locale::Po4a::TeX::separated_environment;
+*env_separators = \%Locale::Po4a::TeX::env_separators;
+*exclude_include = \@Locale::Po4a::TeX::exclude_include;
+*command_parameters = \%Locale::Po4a::TeX::command_parameters;
+*environment_parameters = \%Locale::Po4a::TeX::environment_parameters;
+
+# documentclass:
+# Only read the documentclass in order to find some po4a directives.
+# FIXME: The documentclass could contain translatable strings.
+# Maybe it should be implemented as \include{}.
+register_generic_command("*documentclass,[]{}");
+
+# We use register_generic_command to define the number and types of
+# parameters. The function is then overwritten:
+$commands{'documentclass'} = sub {
+ my $self = shift;
+ my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift );
+ my $no_wrap = shift;
+
+ # Only try to parse the file. We don't want to fail or parse this file
+ # if it is a standard documentclass.
+ my $name = ( $args->[0] eq '[' ) ? $args->[3] : $args->[1];
+ parse_definition_file( $self, $name . ".cls", 1 );
+
+ my ( $t, @e ) = generic_command( $self, $command, $variant, $args, $env, $no_wrap );
+
+ return ( $t, @$env );
+};
+
+# LaTeX 2
+# I chose not to translate files, counters, lengths
+register_generic_command("*addcontentsline,{}{}{_}");
+register_generic_command("address,{_}"); # lines are seperated by \\
+register_generic_command("*addtocontents,{}{_}");
+register_generic_command("*addtocounter,{}{}");
+register_generic_command("*addtolength,{}{}");
+register_generic_command("*addvspace,{}");
+register_generic_command("alph,{}"); # another language may not want this alphabet
+register_generic_command("arabic,{}"); # another language may not want an arabic numbering
+register_generic_command("*author,{_}"); # authors are separated by \and
+register_generic_command("bibitem,[]{}");
+register_generic_command("*bibliographystyle,{}"); # BibTeX
+register_generic_command("*bibliography,{}"); # BibTeX
+register_generic_command("*centerline,{_}");
+register_generic_command("*caption,[]{_}");
+register_generic_command("cc,{_}");
+register_generic_command("circle,[]{}");
+register_generic_command("cite,[_]{}");
+register_generic_command("cline,{}");
+register_generic_command("closing,{_}");
+register_generic_command("dashbox,{}"); # followed by a (w,h) argument
+register_generic_command("date,{_}");
+register_generic_command("*enlargethispage,{}");
+register_generic_command("ensuremath,{_}");
+register_generic_command("*fbox,{_}");
+register_generic_command("fnsymbol,{}");
+register_generic_command("*footnote,[]{_}");
+register_generic_command("*footnotemark,[]");
+register_generic_command("*footnotetext,[]{_}");
+register_generic_command("frac,{_}{_}");
+register_generic_command("*frame,{_}");
+register_generic_command("*framebox,[][]{_}"); # There is another form in picture environment
+register_generic_command("*hbox,{}");
+register_generic_command("*hspace,[]{}");
+register_generic_command("*hyphenation,{_}"); # Translators may wish to add/remove words
+register_generic_command("*include,{}");
+
+#register_generic_command("includeonly,{}"); # should not be supported for now
+register_generic_command("*index,{_}");
+register_generic_command("*input,{}");
+register_generic_command("*item,[_]");
+register_generic_command("*label,{}");
+register_generic_command("lefteqn,{_}");
+register_generic_command("line,"); # The first argument is (x,y)
+register_generic_command("*linebreak,[]");
+register_generic_command("linethickness,{}");
+register_generic_command("location,{_}");
+register_generic_command("makebox,[][]{_}"); # There's another form in picture environment
+register_generic_command("makelabels,{}");
+register_generic_command("*markboth,[]{_}{_}");
+register_generic_command("*markright,{_}");
+register_generic_command("mathcal,{_}");
+register_generic_command("mathop,{_}");
+register_generic_command("mbox,{_}");
+register_generic_command("multicolumn,{}{}{_}");
+register_generic_command("multiput,"); # The first arguments are (x,y)(dx,dy)
+register_generic_command("name,{_}");
+register_generic_command("*newcommand,{}[][]{_}");
+register_generic_command("*newcounter,{}[]");
+register_generic_command("*newenvironment,{}[]{_}{_}");
+register_generic_command("*newfont,{}{}");
+register_generic_command("*newlength,{}");
+register_generic_command("*newsavebox,{}");
+register_generic_command("*newtheorem,{}[]{_}[]"); # Two forms, {}[]{_} or {}{_}[]
+register_generic_command("nocite,{}");
+register_generic_command("nolinebreak,[]");
+register_generic_command("*nopagebreak,[]");
+register_generic_command("opening,{_}");
+register_generic_command("oval,"); # The first argument is (w,h)
+register_generic_command("overbrace,{_}");
+register_generic_command("overline,{_}");
+register_generic_command("*pagebreak,[]");
+register_generic_command("*pagenumbering,{_}");
+register_generic_command("pageref,{}");
+register_generic_command("*pagestyle,{}");
+register_generic_command("*parbox,[][][]{}{_}");
+register_generic_command("providecommand,{}[][]{_}");
+register_generic_command("put,"); # The first argument is (x,y)
+register_generic_command("raisebox,{}[][]{_}");
+register_generic_command("ref,{}");
+register_generic_command("*refstepcounter,{}");
+register_generic_command("*renewcommand,{}[][]{_}");
+register_generic_command("*renewenvironment,{}[]{_}{_}");
+register_generic_command("roman,{}"); # another language may not want a roman numbering
+register_generic_command("rule,[]{}{}");
+register_generic_command("savebox,{}"); # Optional arguments in 2nd & 3rd position
+register_generic_command("sbox,{}{_}");
+register_generic_command("*setcounter,{}{}");
+register_generic_command("*setlength,{}{}");
+register_generic_command("*settodepth,{}{_}");
+register_generic_command("*settoheight,{}{_}");
+register_generic_command("*settowidth,{}{_}");
+register_generic_command("shortstack,[]{_}");
+register_generic_command("signature,{_}");
+register_generic_command("sqrt,[_]{_}");
+register_generic_command("stackrel,{_}{_}");
+register_generic_command("stepcounter,{}");
+register_generic_command("*subfigure,[_]{_}");
+register_generic_command("symbol,{_}");
+register_generic_command("telephone,{_}");
+register_generic_command("thanks,{_}");
+register_generic_command("*thispagestyle,{}");
+register_generic_command("*title,{_}");
+register_generic_command("typeout,{_}");
+register_generic_command("typein,[]{_}");
+register_generic_command("twocolumn,[_]");
+register_generic_command("underbrace,{_}");
+register_generic_command("underline,{_}");
+register_generic_command("*usebox,{}");
+register_generic_command("usecounter,{}");
+register_generic_command("*usepackage,[]{}");
+register_generic_command("value,{}");
+register_generic_command("vector,"); # The first argument is (x,y)
+register_generic_command("vphantom,{_}");
+register_generic_command("*vspace,[]{}");
+register_generic_command("*vbox,{}");
+register_generic_command("*vcenter,{}");
+
+register_generic_command("*part,[_]{_}");
+register_generic_command("*chapter,[_]{_}");
+register_generic_command("*section,[_]{_}");
+register_generic_command("*subsection,[_]{_}");
+register_generic_command("*subsubsection,[_]{_}");
+register_generic_command("*paragraph,[_]{_}");
+register_generic_command("*subparagraph,[_]{_}");
+
+register_generic_command("textrm,{_}");
+register_generic_command("textit,{_}");
+register_generic_command("emph,{_}");
+register_generic_command("textmd,{_}");
+register_generic_command("textbf,{_}");
+register_generic_command("textup,{_}");
+register_generic_command("textsl,{_}");
+register_generic_command("textsf,{_}");
+register_generic_command("textsc,{_}");
+register_generic_command("texttt,{_}");
+register_generic_command("textnormal,{_}");
+register_generic_command("mathrm,{_}");
+register_generic_command("mathsf,{_}");
+register_generic_command("mathtt,{_}");
+register_generic_command("mathit,{_}");
+register_generic_command("mathnormal,{_}");
+register_generic_command("mathversion,{}");
+
+register_generic_command("*contentspage,");
+register_generic_command("*tablelistpage,");
+register_generic_command("*figurepage,");
+
+register_generic_command("*PassOptionsToPackage,{}{}");
+
+register_generic_command("*ifthenelse,{}{_}{_}");
+
+# graphics
+register_generic_command("*includegraphics,[]{}");
+register_generic_command("*graphicspath,{}");
+register_generic_command("*resizebox,{}{}{_}");
+register_generic_command("*scalebox,{}{_}");
+register_generic_command("*rotatebox,{}{_}");
+
+# url
+register_generic_command("UrlFont,{}");
+register_generic_command("*urlstyle,{}");
+
+# hyperref
+register_generic_command("href,{}{_}"); # 1:URL
+register_generic_command("url,{}"); # URL
+register_generic_command("nolinkurl,{}"); # URL
+register_generic_command("hyperbaseurl,{}"); # URL
+register_generic_command("hyperimage,{}"); # URL
+register_generic_command("hyperdef,{}{}{_}"); # 1:category, 2:name
+register_generic_command("hyperref,{}{}{}{_}"); # 1:URL, 2:category, 3:name
+register_generic_command("hyperlink,{}{_}"); # 1:name
+register_generic_command("*hypersetup,{_}");
+register_generic_command("hypertarget,{}{_}"); # 1:name
+register_generic_command("autoref,{}"); # 1:label
+
+register_generic_command("*selectlanguage,{}");
+
+# color
+register_generic_command("*definecolor,{}{}{}");
+register_generic_command("*textcolor,{}{_}");
+register_generic_command("*colorbox,{}{_}");
+register_generic_command("*fcolorbox,{}{}{_}");
+register_generic_command("*pagecolor,{_}");
+register_generic_command("*color,{}");
+
+# equations/theorems
+register_generic_command("*qedhere,");
+register_generic_command("*qedsymbol,");
+register_generic_command("*theoremstyle,{}");
+register_generic_command("*proclaim,{_}");
+register_generic_command("*endproclaim,");
+register_generic_command("*shoveleft,{_}");
+register_generic_command("*shoveright,{_}");
+
+# commands without arguments. This is better than untranslated or
+# translate_joined because the number of arguments will be checked.
+foreach (
+ qw(a *appendix *backmatter backslash *baselineskip *baselinestretch bf
+ *bigskip boldmath cal cdots *centering *cleardoublepage *clearpage
+ ddots dotfill em flushbottom *footnotesize frenchspacing
+ *frontmatter *glossary *hfill *hline hrulefill huge Huge indent it
+ kill large Large LARGE ldots left linewidth listoffigures
+ listoftables *mainmatter *makeatletter *makeglossary *makeindex
+ *maketitle *medskip *newline *newpage noindent nonumber *normalsize
+ not *null *onecolumn *par parindent *parskip *printindex protect ps
+ pushtabs *qquad *quad raggedbottom raggedleft raggedright right rm
+ sc scriptsize sf sl small *smallskip *startbreaks *stopbreaks
+ *tableofcontents textwidth textheight tiny today tt unitlength
+ vdots verb *vfill *vline *fussy *sloppy
+
+ aleph hbar imath jmath ell wp Re Im prime nabla surd angle forall
+ exists partial infty triangle Box Diamond flat natural sharp
+ clubsuit diamondsuit heartsuit spadesuit dag ddag S P copyright
+ pounds Delta ASCII
+
+ rmfamily itshape mdseries bfseries upshape slshape sffamily scshape
+ ttfamily *normalfont width height depth totalheight
+
+ *fboxsep *fboxrule
+ *itemi *itemii *itemiii *itemiv
+ *theitemi *theitemii *theitemiii *theitemiv)
+ )
+{
+ register_generic_command("$_,");
+}
+
+# standard environments.
+# FIXME: All these definitions should be re-checked
+foreach (
+ qw(abstract align align* cases center description displaymath document enumerate
+ eqnarray eqnarray* equation equation* flushleft flushright footnotesize itemize
+ letter lrbox multline multline* proof quotation quote
+ sloppypar tabbing theorem titlepage
+ trivlist verbatim verbatim* verse wrapfigure)
+ )
+{
+ register_generic_environment("$_,");
+}
+register_generic_environment("tabular,[]{}");
+register_generic_environment("tabular*,{}{}");
+register_generic_environment("tabularx,{}{}");
+register_generic_environment("multicols,{}");
+register_generic_environment("list,{_}{}");
+register_generic_environment("array,[]{}");
+register_generic_environment("figure,[]");
+register_generic_environment("minipage,[]{}");
+register_generic_environment("picture,{}{}");
+register_generic_environment("table,[]");
+register_generic_environment("thebibliography,{_}");
+
+# Commands and environments with separators.
+
+# & is the cell separator, \\ is the line separator
+# '\' is escaped twice
+$env_separators{'array'} = $env_separators{'tabular'} = $env_separators{'tabularx'} = "(?:&|\\\\\\\\|\\\\hline)";
+
+$env_separators{'trivlist'} = $env_separators{'list'} = $env_separators{'description'} = $env_separators{'enumerate'} =
+ $env_separators{'itemize'} = "\\\\item";
+
+$env_separators{'thebibliography'} = "\\\\bibitem";
+
+$env_separators{'displaymath'} = $env_separators{'eqnarray'} = $env_separators{'eqnarray*'} =
+ $env_separators{'flushleft'} = $env_separators{'flushright'} = $env_separators{'center'} =
+ $env_separators{'author{#1}'} = $env_separators{'title{#1}'} = "\\\\\\\\";
+
+# tabbing
+
+1;
diff --git a/lib/Locale/Po4a/Man.pm b/lib/Locale/Po4a/Man.pm
new file mode 100644
index 0000000..ff4157d
--- /dev/null
+++ b/lib/Locale/Po4a/Man.pm
@@ -0,0 +1,2904 @@
+#!/usr/bin/perl -w
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Man - convert manual pages from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::Man is a module to help the translation of documentation in
+the nroff format (the language of manual pages) into other [human]
+languages.
+
+=head1 TRANSLATING WITH PO4A::MAN
+
+This module tries pretty hard to make translator's life easier. For that,
+the text presented to translators isn't a verbatim copy of the text found
+in the man page. Indeed, the cruder parts of the nroff format are hidden, so
+that translators can't mess up with them.
+
+=head2 Text wrapping
+
+Unindented paragraphs are automatically rewrapped for the translator. This
+can lead to some minor difference in the generated output, since the
+rewrapping rules used by groff aren't very clear. For example, two spaces
+after a parenthesis are sometimes preserved.
+
+Anyway, the difference will only be about the position of the extra spaces
+in wrapped paragraph, and I think it's worth.
+
+=head2 Font specification
+
+The first change is about font change specifications. In nroff, there are
+several ways to specify if a given word should be written in small, bold or
+italics. In the text to translate, there is only one way, borrowed from the
+POD (Perl online documentation) format:
+
+=over
+
+=item IE<lt>textE<gt> -- italic text
+
+equivalent to \fItext\fP or ".I text"
+
+=item BE<lt>textE<gt> -- bold text
+
+equivalent to \fBtext\fP or ".B text"
+
+=item RE<lt>textE<gt> -- roman text
+
+equivalent to \fRtext\fP
+
+=item CWE<lt>textE<gt> -- constant width text
+
+equivalent to \f(CWtext\fP or ".CW text"
+
+=back
+
+Remark: The CW face is not available for all groff devices. It is not
+recommended to use it. It is provided for your convenience.
+
+=head2 Automatic characters transliteration
+
+Po4a automatically transliterate some characters to ease the translation
+or the review of the translation.
+Here is the list of the transliterations:
+
+=over
+
+=item hyphens
+
+Hyphens (-) and minus signs (\-) in man pages are all transliterated
+as simple dashes (-) in the PO file. Then all dash are transliterated into
+roff minus signs (\-) when the translation is inserted into the output
+document.
+
+Translators can force an hyphen by using the roff glyph '\[hy]' in their
+translations.
+
+=item non-breaking spaces
+
+Translators can use non-breaking spaces in their translations. These
+non-breaking spaces (0xA0 in latin1) will be transliterated into a roff
+non-breaking space ('\ ').
+
+=item quotes transliterations
+
+`` and '' are respectively tranliterated into \*(lq and \*(rq.
+
+To avoid these transliterations, translators can insert a zero width roff
+character (i.e., using `\&` or '\&' respectively).
+
+=back
+
+=head2 Putting 'E<lt>' and 'E<gt>' in translations
+
+Since these chars are used to delimit parts under font modification, you
+can't use them verbatim. Use EE<lt>ltE<gt> and EE<lt>gtE<gt> instead (as in
+POD, one more time).
+
+=head1 OPTIONS ACCEPTED BY THIS MODULE
+
+These are this module's particular options:
+
+=over
+
+=item B<debug>
+
+Activate debugging for some internal mechanisms of this module.
+Use the source to see which parts can be debugged.
+
+=item B<verbose>
+
+Increase verbosity.
+
+=item B<groff_code>
+
+This option controls the behavior of the module when it encounter
+a .de, .ie or .if section. It can take the following values:
+
+=over
+
+=item I<fail>
+
+This is the default value.
+The module will fail when a .de, .ie or .if section is encountered.
+
+=item I<verbatim>
+
+Indicates that the .de, .ie or .if sections must be copied as is
+from the original to the translated document.
+
+=item I<translate>
+
+Indicates that the .de, .ie or .if sections will be proposed for the
+translation.
+You should only use this option if a translatable string is
+contained in one of these section. Otherwise, I<verbatim>
+should be preferred.
+
+=back
+
+=item B<generated>
+
+This option specifies that the file was generated, and that po4a should not
+try to detect if the man pages was generated from another format.
+This option is mandatory to use po4a on generated man pages.
+Note that translating generated pages instead of sources ones is often more fragile, and thus a bad idea.
+
+=item B<mdoc>
+
+This option is only useful for mdoc pages.
+
+It selects a stricter support of the mdoc format by telling po4a not to
+translate the 'NAME' section.
+mdoc pages whose 'NAME' section is translated won't generate any header or
+footer.
+
+According to the groff_mdoc page, the NAME, SYNOPSIS and DESCRIPTION
+sections are mandatory.
+There are no known issues with translated SYNOPSIS or DESCRIPTION section,
+but you can also specify these sections this way:
+ -o mdoc=NAME,SYNOPSIS,DESCRIPTION
+
+This mdoc issue can also be solved with an addendum like this one:
+ PO4A-HEADER:mode=before;position=^.Dd
+ .TH DOCUMENT_TITLE 1 "Month day, year" OS "Section Name"
+
+
+=back
+
+The following options specify the behavior of a user-defined macro
+(with a .de request), or of a classical macro that is not supported by po4a.
+They take as argument a comma-separated list of macros.
+For example:
+
+ -o noarg=FO,OB,AR -o translate_joined=BA,ZQ,UX
+
+Note: if a macro is not supported by po4a and if you consider that it is a
+standard roff macro, you should submit it to the po4a development team.
+
+=over
+
+=item B<untranslated>
+
+B<untranslated> indicates that this macro (at its arguments) don't have to
+be translated.
+
+=item B<noarg>
+
+B<noarg> is like B<untranslated>, except that po4a will verify that no
+argument is added to this macro.
+
+=item B<translate_joined>
+
+B<translate_joined> indicates that po4a must propose to translate the
+arguments of the macro.
+
+=item B<translate_each>
+
+With B<translate_each>, the arguments will also be proposed for the
+translation, except that each one will be translated separately.
+
+=item B<no_wrap>
+
+This option takes as argument a list of comma-separated couples
+I<begin>:I<end>, where I<begin> and I<end> are commands that delimit
+the begin and end of a section that should not be rewrapped.
+
+Note: no test is done to ensure that an I<end> command matches its
+I<begin> command; any ending command stop the no_wrap mode.
+If you have a I<begin> (respectively I<end>) macro that has no I<end>
+(respectively I<begin>), you can specify an existing I<end> (like fi) or
+I<begin> (like nf) as a counterpart.
+These macros (and their arguments) won't be translated.
+
+=item B<inline>
+
+This option specifies a list of comma-separated macros that must
+not split the current paragraph. The string to translate will then contain
+I<foo E<lt>.bar baz quxE<gt> quux>, where I<bar> is the command that
+should be inlined, and I<baz qux> its arguments.
+
+=item B<unknown_macros>
+
+This option indicates how po4a should behave when an unknown macro is found.
+By default, po4a fails with a warning.
+It can take the following values: B<failed> (the default value),
+B<untranslated>, B<noarg>, B<translate_joined>, or B<translate_each> (see above
+for an explanation of these values).
+
+=back
+
+=head1 AUTHORING MAN PAGES COMPLIANT WITH PO4A::MAN
+
+This module is still very limited, and will always be, because it's not a
+real nroff interpreter. It would be possible to do a real nroff
+interpreter, to allow authors to use all the existing macros, or even to
+define new ones in their pages, but we didn't want to. It would be too
+difficult, and we thought it wasn't necessary. We do think that if
+manpages' authors want to see their productions translated, they may have to
+adapt to ease the work of translators.
+
+So, the man parser implemented in po4a have some known limitations we are
+not really inclined to correct, and which will constitute some pitfalls
+you'll have to avoid if you want to see translators taking care of your
+documentation.
+
+=head2 Don't program in nroff
+
+nroff is a complete programming language, with macro definition,
+conditionals and so on. Since this parser isn't a fully featured nroff
+interpreter, it will fail on pages using these facilities (There are about
+200 such pages on my box).
+
+=head2 Use the plain macro set
+
+There are still some macros which are not supported by po4a::man. This is
+only because I failed to find any documentation about them. Here is the
+list of unsupported macros used on my box. Note that this list isn't
+exhaustive since the program fails on the first encountered unsupported
+macro. If you have any information about some of these macros, I'll
+happily add support for them. Because of these macros, about 250 pages on
+my box are inaccessible to po4a::man.
+
+ .. ." .AT .b .bank
+ .BE ..br .Bu .BUGS .BY
+ .ce .dbmmanage .do .En
+ .EP .EX .Fi .hw .i
+ .Id .l .LO .mf
+ .N .na .NF .nh .nl
+ .Nm .ns .NXR .OPTIONS .PB
+ .pp .PR .PRE .PU .REq
+ .RH .rn .S< .sh .SI
+ .splitfont .Sx .T .TF .The
+ .TT .UC .ul .Vb .zZ
+
+=head2 Hiding text from po4a
+
+Sometimes, the author knows that some parts are not translatable, and
+should not be extracted by po4a. For example, an option may accept an
+I<other> argument, and I<other> may also appear as the last item of a
+list. In the first case, I<other> should be not be translatable. And in
+the second case, I<other> should be translated.
+
+In such case, the author can avoid po4a to extract some strings, using
+some special groff constructs:
+
+ .if !'po4a'hide' .B other
+
+(this will require the B<-o groff_code=verbatim> option)
+
+A new macro can also be defined to automate this:
+ .de IR_untranslated
+ . IR \\$@
+ ..
+
+ .IR_untranslated \-q ", " \-\-quiet
+
+(this will require the options B<-o groff_code=verbatim> and
+B<-o untranslated=IR_untranslated>; with this construct, the B<.if
+!'po4a'hide'> conditional is not strictly needed since po4a will not parse
+the internal of the macro definition)
+
+or using an alias:
+ .als IR_untranslated IR
+
+ .IR_untranslated \-q ", " \-\-quiet
+
+This will require the B<-o untranslated=als,IR_untranslated> option.
+
+=head2 Conclusion
+
+To summarise this section, keep simple, and don't try to be clever while
+authoring your man pages. A lot of things are possible in nroff, and not
+supported by this parser. For example, don't try to mess with \c to
+interrupt the text processing (like 40 pages on my box do). Or, be sure to
+put the macro arguments on the same line that the macro itself. I know that
+it's valid in nroff, but would complicate too much the parser to be
+handled.
+
+Of course, another possibility is to use another format, more translator
+friendly (like POD using po4a::pod, or one of the XML family like SGML),
+but thanks to po4a::man it isn't needed anymore. That being said, if the
+source format of your documentation is POD, or XML, it may be clever to
+translate the source format and not this generated one. In most cases,
+po4a::man will detect generated pages and issue a warning. It will even
+refuse to process POD generated pages, because those pages are perfectly
+handled by po4a::pod, and because their nroff counterpart defines a lot of
+new macros I didn't want to write support for. On my box, 1432 of the 4323
+pages are generated from POD and will be ignored by po4a::man.
+
+In most cases, po4a::man will detect the problem and refuse to process the
+page, issuing an adapted message. In some rare cases, the program will
+complete without warning, but the output will be wrong. Such cases are
+called "bugs" ;) If you encounter such case, be sure to report this, along
+with a fix when possible…
+
+=head1 STATUS OF THIS MODULE
+
+This module can be used for most of the existing man pages.
+
+Some tests are regularly run on Linux boxes:
+
+=over 4
+
+=item *
+
+one third of the pages are refused because they were generated from
+another format supported by po4a (e.g. POD or SGML).
+
+=item *
+
+10% of the remaining pages are rejected with an error (e.g. a
+groff macro is not supported).
+
+=item *
+
+Then, less than 1% of the pages are accepted silently by po4a, but with
+significant issues (i.e. missing words, or new words inserted)
+
+=item *
+
+The other pages are usually handled without differences more important
+than spacing differences or line rewrapped (font issues in less than 10% of
+the processed pages).
+
+=back
+
+=head1 SEE ALSO
+
+L<Locale::Po4a::Pod(3pm)>,
+L<Locale::Po4a::TransTractor(3pm)>,
+L<po4a(7)|po4a.7>
+
+=head1 AUTHORS
+
+ Denis Barbier <barbier@linuxfr.org>
+ Nicolas François <nicolas.francois@centraliens.net>
+ Martin Quinson (mquinson#debian.org)
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright © 2002-2008 SPI, Inc.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
+
+package Locale::Po4a::Man;
+use DynaLoader;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+require Exporter;
+use vars qw(@ISA @EXPORT);
+@ISA = qw(Locale::Po4a::TransTractor DynaLoader);
+@EXPORT = qw(); # new initialize);
+
+# Try to use a C extension if present.
+eval('bootstrap Locale::Po4a::Man "0.30"');
+
+use Locale::Po4a::TransTractor;
+use Locale::Po4a::Common;
+
+use File::Spec;
+use Getopt::Std;
+
+my %macro; # hash of known macro, with parsing sub. See end of this file
+my %default_macro; # The default known macros, when no options are used.
+
+# A font start by \f and is followed either by
+# [.*] - a font name within brackets (e.g. [P], [A_USER_FONT])
+# (.. - a parenthesis followed by two char (e.g. "(CW")
+# . - a single char (e.g. B, I, R, P, 1, 2, 3, 4, etc.)
+my $FONT_RE = "\\\\f(?:\\[[^\\]]*\\]|\\(..|[^\\(\\[])";
+
+# Variable used to identify non breaking spaces.
+# These non breaking spaces are used to ease the parsing, and a
+# translator can use them in their translation (and they will be translated
+# into the groff non-breaking space).
+my $nbs;
+
+# Indicate if the page uses the mdoc macros
+my $mdoc_mode = 0;
+
+my $unknown_macros = undef;
+
+#########################
+#### DEBUGGING STUFF ####
+#########################
+my %debug;
+
+# The following debug options can be set with '-o debug=...':
+# * splitargs see how macro args are separated
+# * pretrans see pre-conditioning of translation
+# * postrans see post-conditioning of translation
+# * fonts see font modifier handling
+
+######## CONFIG #########
+# This variable indicates the behavior of the module when a .de, .if or
+# .ie is encountered.
+my $groff_code;
+
+# %no_wrap_begin and %no_wrap_end are lists of macros that respectively
+# begins and ends a no_wrap paragraph.
+# Any ending macro will end the no_wrap paragraph started by any beginning
+# macro.
+my %no_wrap_begin;
+my %no_wrap_end;
+
+# List of macros that should be inlined (with E<.xx ...>)
+my %inline;
+
+# The default list of inlined macros (when no options are used)
+my %default_inline;
+
+# This variable indicates whether po4a should try to detect the generated
+# files.
+my $allow_generated;
+
+# This hash indicates section name that should not be translated in mdoc
+# mode.
+# The groff's mdoc processor requires the NAME section, otherwise headers
+# and footers of the pages are not generated.
+# The mdoc_groff man page indicates that NAME, SYNOPSIS and DESCRIPTION
+# are mandatory.
+my %mdoc;
+
+sub initialize {
+ my $self = shift;
+ my %options = @_;
+
+ $self->{options}{'debug'} = '';
+ $self->{options}{'verbose'} = '';
+ $self->{options}{'groff_code'} = '';
+ $self->{options}{'untranslated'} = '';
+ $self->{options}{'noarg'} = '';
+ $self->{options}{'translate_joined'} = '';
+ $self->{options}{'translate_each'} = '';
+ $self->{options}{'no_wrap'} = '';
+ $self->{options}{'inline'} = '';
+ $self->{options}{'generated'} = '';
+ $self->{options}{'mdoc'} = '';
+ $self->{options}{'unknown_macros'} = '';
+
+ foreach my $opt ( keys %options ) {
+ if ( defined $options{$opt} ) {
+ die wrap_mod( "po4a::man", dgettext( "po4a", "Unknown option: %s" ), $opt )
+ unless exists $self->{options}{$opt};
+ $self->{options}{$opt} = $options{$opt};
+ }
+ }
+
+ %debug = ();
+ if ( defined $options{'debug'} ) {
+ foreach ( $options{'debug'} ) {
+ $debug{$_} = 1;
+ }
+ }
+
+ $groff_code = "fail";
+ if ( defined $options{'groff_code'} ) {
+ unless ( $options{'groff_code'} =~ m/fail|verbatim|translate/ ) {
+ die wrap_mod( "po4a::man",
+ dgettext( "po4a", "Invalid 'groff_code' value. Must be one of 'fail', 'verbatim', 'translate'." ) );
+ }
+ $groff_code = $options{'groff_code'};
+ }
+
+ if (%default_macro) {
+ %macro = %default_macro;
+ } else {
+ %default_macro = %macro;
+ }
+ if ( defined $options{'untranslated'} ) {
+ foreach ( split( /,/, $options{'untranslated'} ) ) {
+ $macro{$_} = \&untranslated;
+ }
+ }
+ if ( defined $options{'noarg'} ) {
+ foreach ( split( /,/, $options{'noarg'} ) ) {
+ $macro{$_} = \&noarg;
+ }
+ }
+ if ( defined $options{'translate_joined'} ) {
+ foreach ( split( /,/, $options{'translate_joined'} ) ) {
+ $macro{$_} = \&translate_joined;
+ }
+ }
+ if ( defined $options{'translate_each'} ) {
+ foreach ( split( /,/, $options{'translate_each'} ) ) {
+ $macro{$_} = \&translate_each;
+ }
+ }
+
+ %no_wrap_begin = (
+ 'nf' => 1,
+ 'EX' => 1,
+ 'EQ' => 1
+ );
+ %no_wrap_end = (
+ 'fi' => 1,
+ 'EE' => 1,
+ 'EN' => 1
+ );
+ if ( defined $options{'no_wrap'} ) {
+ foreach ( split( /,/, $options{'no_wrap'} ) ) {
+ if ( $_ =~ m/^(.*):(.*)$/ ) {
+ $no_wrap_begin{$1} = 1;
+ $no_wrap_end{$2} = 1;
+ } else {
+ die wrap_mod( "po4a::man",
+ dgettext( "po4a", "The no_wrap parameters must be a set of comma-separated begin:end couples.\n" )
+ );
+ }
+ }
+ }
+
+ if (%default_inline) {
+ %inline = %default_inline;
+ } else {
+ %default_inline = %inline;
+ }
+ if ( defined $options{'inline'} ) {
+ foreach ( split( /,/, $options{'inline'} ) ) {
+ $inline{$_} = 1;
+ }
+ }
+
+ $allow_generated = 0;
+ if ( defined $options{'generated'} ) {
+ $allow_generated = 1;
+ }
+
+ %mdoc = ();
+ if ( defined $options{'mdoc'} ) {
+ if ( $options{'mdoc'} eq 1 ) {
+ $mdoc{"NAME"} = 1;
+ } else {
+ foreach ( split( /,/, $options{'mdoc'} ) ) {
+ $mdoc{$_} = 1;
+ }
+ }
+ }
+
+ $unknown_macros = undef;
+ if ( defined $options{'unknown_macros'} ) {
+ if ( $options{'unknown_macros'} eq "failed" ) {
+ $unknown_macros = undef;
+ } elsif ( $options{'unknown_macros'} eq "untranslated" ) {
+ $unknown_macros = \&untranslated;
+ } elsif ( $options{'unknown_macros'} eq "noarg" ) {
+ $unknown_macros = \&noarg;
+ } elsif ( $options{'unknown_macros'} eq "translate_joined" ) {
+ $unknown_macros = \&translate_joined;
+ } elsif ( $options{'unknown_macros'} eq "translate_each" ) {
+ $unknown_macros = \&translate_each;
+ } else {
+ die wrap_mod( "po4a::man",
+ dgettext( "po4a", "Invalid 'unknown_macros' value. Must be one of:\n" )
+ . "failed untranslated noarg translate_joined translate_each\n" );
+ }
+ }
+}
+
+my @comments = ();
+my @next_comments = ();
+
+# This function returns the next line of the document being parsed
+# (and its reference).
+# It overload the Transtractor shiftline to handle:
+# - font requests (.B, .I, .BR, .BI, ...)
+# because these requests can be present in a paragraph (handled
+# in the parse subroutine), or in argument (on the next line)
+# of some other request (for example .TP)
+# - font size requests (.SM,.SB) (not done yet)
+# - input escape (\ at the end of a line)
+sub shiftline {
+ my $self = shift;
+
+ # call Transtractor's shiftline
+ NEW_LINE:
+ my ( $line, $ref ) = $self->SUPER::shiftline();
+
+ if ( !defined $line ) {
+
+ # end of file
+ return ( $line, $ref );
+ }
+
+ # Do as few treatments as possible with the .de, .ie and .if sections
+ if ( $line =~ /^\.\s*(if|ie|de)/ ) {
+ chomp $line;
+ return ( $line, $ref );
+ }
+
+ # Handle some escapes
+ # * reduce the number of \ in macros
+ if ( $line =~ /^\\?[.']/ ) {
+
+ # The first backslash is consumed while the macro is read.
+ $line =~ s/\\\\/\\/g;
+ }
+
+ # * \\ is equivalent to \e, which is less error prone for the rest
+ # of the module (e.g. when searching for a font : \f, whe don't
+ # want to match \\f)
+ $line =~ s/\\\\/\\e/g;
+
+ # * \. is just a dot (this can even be used to introduce a macro)
+ $line =~ s/\\\././g;
+
+ chomp $line;
+ if ( $line =~ m/^(.*?)(?:(?<!\\)\\(["#])(.*))$/ ) {
+ my ( $l, $t, $c ) = ( $1, $2, $3 );
+ $line = $l;
+ unless ($allow_generated) {
+
+ # Check for comments indicating that the file was generated.
+ if ( $c =~ /Pod::Man/ ) {
+ warn wrap_ref_mod(
+ $ref,
+ "po4a::man",
+ dgettext(
+ "po4a",
+ "This file was generated with Pod::Man. Translate the POD file with the pod module of po4a."
+ )
+ );
+ exit 254;
+ } elsif ( $c =~ /generated by help2man/ ) {
+ warn wrap_ref_mod(
+ $ref,
+ "po4a::man",
+ dgettext(
+ "po4a",
+ "This file was generated with help2man. Translate the source file with the regular gettext."
+ )
+ );
+ } elsif ( $c =~ /with docbook-to-man/ ) {
+ warn wrap_ref_mod(
+ $ref,
+ "po4a::man",
+ dgettext(
+ "po4a",
+ "This file was generated with docbook-to-man. Translate the source file with the sgml module of po4a."
+ )
+ );
+ exit 254;
+ } elsif ( $c =~ /generated by docbook2man/ ) {
+ warn wrap_ref_mod(
+ $ref,
+ "po4a::man",
+ dgettext(
+ "po4a",
+ "This file was generated with docbook2man. Translate the source file with the sgml module of po4a."
+ )
+ );
+ exit 254;
+ } elsif ( $c =~ /created with latex2man/ ) {
+ warn wrap_ref_mod(
+ $ref,
+ "po4a::man",
+ dgettext(
+ "po4a",
+ "This file was generated with %s. "
+ . "You should translate the source file, but continuing anyway."
+ ),
+ "latex2man"
+ );
+ } elsif ( $c =~ /Generated by db2man.xsl/ ) {
+ warn wrap_ref_mod(
+ $ref,
+ "po4a::man",
+ dgettext(
+ "po4a",
+ "This file was generated with db2man.xsl. Translate the source file with the xml module of po4a."
+ )
+ );
+ exit 254;
+ } elsif ( $c =~ /generated automatically by mtex2man/ ) {
+ warn wrap_ref_mod(
+ $ref,
+ "po4a::man",
+ dgettext(
+ "po4a",
+ "This file was generated with %s. "
+ . "You should translate the source file, but continuing anyway."
+ ),
+ "mtex2man"
+ );
+ } elsif ( $c =~ /THIS FILE HAS BEEN AUTOMATICALLY GENERATED. DO NOT EDIT./
+ || $c =~ /DO NOT EDIT/i
+ || $c =~ /generated/i )
+ {
+ warn wrap_ref_mod(
+ $ref,
+ "po4a::man",
+ dgettext(
+ "po4a",
+ "This file contains the line '%s'. "
+ . "You should translate the source file, but continuing anyway."
+ ),
+ $l . "\\\"" . $c
+ );
+ }
+ }
+
+ if ( $line =~ m/^[.']*$/ ) {
+ if ( $c !~ m/^\s+$/ ) {
+
+ # This commented line may be comment for the next paragraph
+ push @next_comments, [ $line ? substr( $line, 0, 1 ) : '.', $c ];
+ }
+ if ( $line =~ m/^[.']+$/ ) {
+
+ # those lines are ignored
+ # (empty lines are a little bit different)
+ goto NEW_LINE;
+ }
+ if ( $line =~ m/^\s*$/ and $t eq "#" ) {
+
+ # Groff comments
+ goto NEW_LINE;
+ }
+ } else {
+ push @comments, [ '.', $c ];
+ }
+ } else {
+
+ # finally, we did not reach the end of the paragraph. The comments
+ # belong to the current paragraph.
+ push @comments, @next_comments;
+ @next_comments = ();
+ }
+
+ # A .I or .B request change the current font
+ # and on exit, switch the font to Roman
+ # When one of these request doesn't have its argument on its line
+ # (and when we support this usage), we must keep this font request to
+ # insert it later.
+ # It is a stack of fonts to be inserted (in case a .I is followed by
+ # a .B and then followed bysome text; note that in this case,
+ # only one \fR must be inserted at the end of the text)
+ my $insert_font = "";
+ while ( $line =~ /\\$/ || $line =~ /^(\.[BI])\s*$/ || $line =~ /^\.[BI][\t ].*?\\c$/ ) {
+ my ( $l2, $r2 ) = $self->SUPER::shiftline();
+ chomp($l2);
+ if ( $line =~ /^(\.[BI])\s*$/ ) {
+ if ( $l2 =~ /^[.'][\t ]*([BI]|BI|BR|IB|IR|RB|RI)(?:[\t ]|\s*$)/ ) {
+ my $font = $line;
+ $font =~ s/^\.([BI])\s*$/$1/;
+ $insert_font = "\\f$font$insert_font";
+ $line = $l2;
+ $ref = $r2;
+ } elsif ( $l2 =~ /^[.'][\t ]*(SH|TP|TQ|P|PP|LP)(?:[\t ]|\s*$)/ ) {
+ $line =~ s/^\.([BI])\s*$/$insert_font\\f$1/;
+ $self->SUPER::unshiftline( $l2, $r2 );
+ } elsif ( $l2 =~ /^([.'][\t ]*(?:IP)[\t ]+"?)(.*)$/ ) {
+
+ # Install the font modifier into the next line
+ # after a possible quote (")
+ my $macro = $1;
+ my $arg = $2;
+ $line =~ /^\.([BI])\s*$/;
+ $line = $macro . "$insert_font\\f$1" . $arg;
+ $ref = $r2;
+ } elsif ( $l2 =~ /^[.']/ ) {
+ warn wrap_ref_mod(
+ $ref,
+ "po4a::man",
+ dgettext(
+ "po4a",
+ "Font modifiers followed by a command may disturb "
+ . "po4a. You should either remove the font modifier "
+ . "'%s', or integrate a \\f font modifier in the "
+ . "following command ('%s'), but continuing anyway."
+ ),
+ $line, $l2
+ );
+ $line = "PO4A-INLINE:$line:PO4A-INLINE";
+ $self->SUPER::unshiftline( $l2, $r2 );
+ } else {
+
+ # convert " to the groff's double quote glyph; it will be
+ # converted back to " in pre_trans. It is needed because
+ # otherwise, these quotes will be taken as arguments
+ # delimiters.
+ $l2 =~ s/"/\\(dq/g;
+
+ # append this line to the macro, with surrounding quotes, so
+ # that the line appear as an uniq argument.
+ $line .= ' "' . $l2 . '"';
+ }
+ } else {
+ $line =~ s/\\$//;
+ $line =~ s/\\c$//;
+ $line .= $l2;
+ }
+ }
+
+ # Detect non-wrapped paragraphs
+ # This must be done before handling the .B, .RI ... font requests
+ $line =~ s/^($FONT_RE)(\s+)/$2$1/;
+
+ $line .= "\n";
+
+ # Handle font requests here
+ if ( $line =~ /^[.'][\t ]*([BI]|BI|BR|IB|IR|RB|RI)(?:(?: +|\t)(.*)|)$/ ) {
+ my $macro = $1;
+ my $arguments = $2;
+ my @args = splitargs( $ref, $arguments );
+ if ( $macro eq 'B' || $macro eq 'I' ) {
+
+ # To keep the space(s), we must introduce some \&
+ @args = map { $_ =~ s/^(\s*)$/\\&$1\\&/s; $_ } @args;
+ my $arg = join( " ", @args );
+ $arg =~ s/^ +//;
+ this_macro_needs_args( $macro, $ref, $arg );
+ $line = "$insert_font\\f$macro" . $arg . "\\fR\n";
+ $insert_font = "";
+ }
+
+ # .BI bold alternating with italic
+ # .BR bold/roman
+ # .IB italic/bold
+ # .IR italic/roman
+ # .RB roman/bold
+ # .RI roman/italic
+ if ( $macro eq 'BI'
+ || $macro eq 'BR'
+ || $macro eq 'IB'
+ || $macro eq 'IR'
+ || $macro eq 'RB'
+ || $macro eq 'RI' )
+ {
+ # num of seen args, first letter of macro name, second one
+ my ( $i, $a, $b ) = ( 0, substr( $macro, 0, 1 ), substr( $macro, 1 ) );
+ $line = join( "", map { $i++ % 2 ? "\\f$b$_" : "\\f$a$_" } @args ) . "\\fR\n";
+ if ( $i eq 0 ) {
+
+ # If a .BI is used without argument, we must insert a
+ # \fI\fR. The \fR was inserted previously.
+ $line = "\\f$b$line";
+ }
+ }
+
+ if ( length $insert_font ) {
+ $line =~ s/\n$//;
+ $line = "$insert_font$line\\fR\n";
+ }
+
+ if ( $line =~ /^(.*)\\c(\\f.)?\s*\\fR\n/ ) {
+ my $begin = $1;
+
+ my ( $l2, $r2 ) = $self->SUPER::shiftline();
+ if ( $l2 =~ /^[.']/ ) {
+ $self->SUPER::unshiftline( $l2, $r2 );
+ } else {
+ $l2 =~ s/\s*$//s;
+ $line = "$begin\\fR$l2\n";
+ }
+ }
+ }
+
+ return ( $line, $ref );
+}
+
+# Overload Transtractor's pushline.
+# This pushline first push comments (if there are comments for the
+# current line, and the line is not empty), and then push the line.
+sub pushline {
+ my ( $self, $line ) = ( shift, shift );
+ if ( $line !~ m/^\s*$/ ) {
+
+ # add comments
+ foreach my $c (@comments) {
+
+ # comments are pushed (maybe at the wrong place).
+ $self->SUPER::pushline( $self->r("$$c[0]\\\"$$c[1]\n") );
+ }
+ @comments = ();
+ }
+
+ $self->SUPER::pushline($line);
+}
+
+# The default unshiftline from Transtractor may fail because shiftline
+# is overloaded
+sub unshiftline {
+ die wrap_mod(
+ "po4a::man",
+ dgettext(
+ "po4a",
+ "The unshiftline is not supported for the man module. "
+ . "Please send a bug report with the groff page that generated "
+ . "this error."
+ )
+ );
+}
+
+###############################################
+#### FUNCTION TO TRANSLATE OR NOT THE TEXT ####
+###############################################
+sub pushmacro {
+ my $self = shift;
+ if ( scalar @_ ) {
+
+ # Do quote the arguments containing spaces, as it should.
+
+ # but do not do so if they already contain quotes and escaped spaces
+ # For example, cdrdao(1) uses:
+ # .IP CATALOG\ "ddddddddddddd" (Here, the quote have to be displayed)
+ # Adding extra quotes as in:
+ # .IP "CATALOG\ "ddddddddddddd""
+ # results in two args: 'CATALOG\ ' and 'ddddddddddddd""'
+ $self->pushline(
+ join(
+ " ",
+ map {
+ # Replace double quotes by \(dq (double quotes could be
+ # taken as an argument delimiter).
+ # Only quotes not preceded by \ are taken into account
+ # (\" introduces a comment).
+ s/(?<!\\)"/\\\(dq/g if ( defined $_ );
+
+ defined $_
+ ? (
+ length($_)
+ ? ( m/([^\\] |^ )/ ? "\"$_\"" : "$_" )
+
+ # Quote arguments that contain a space.
+ # (not needed for non breaknig spaces, i.e.
+ # spaces preceded by '\')
+ : '""' # empty argument
+ )
+ : '' # no argument
+ } @_
+ )
+ . "\n"
+ );
+ } else {
+ $self->pushline("\n");
+ }
+}
+
+sub this_macro_needs_args {
+ my ( $macroname, $ref, $args ) = @_;
+ unless ( length($args) ) {
+ die wrap_ref_mod(
+ $ref,
+ "po4a::man",
+ dgettext(
+ "po4a",
+ "macro %s called without arguments. "
+ . "Even if placing the macro arguments on the next line is authorized "
+ . "by man(7), handling this would make the po4a parser too complicate. "
+ . "Please simply put the macro args on the same line."
+ ),
+ $macroname
+ );
+ }
+}
+
+sub pre_trans {
+ my ( $self, $str, $ref, $type ) = @_;
+
+ # Preformatting, so that translators don't see
+ # strange chars
+ my $origstr = $str;
+ print STDERR "pre_trans($str)="
+ if ( $debug{'pretrans'} );
+
+ # Do as few treatments as possible with the .de, .ie and .if sections
+ if ( defined $self->{type} && $self->{type} =~ m/^(ie|if|de)$/ ) {
+ return $str;
+ }
+
+ # Note: if you want to implement \c support, the groff man page is your playground
+ if ( not defined $self->{type} ) {
+ $str =~ s/(\G|^(?:.*?)\n|^) # Last position, or begin of a line
+ ([ \t]*[^.'][^\n]*(?<!\\)(?:\\\\)*) # the new line, which
+ \\c[ \t]*\n # ends by \c and followed by a line
+ (?![ \t]*[.'])/$1$2/sgx; # not followed by a command (.')
+ }
+ die wrap_ref_mod( $ref, "po4a::man",
+ dgettext( "po4a", "Escape sequence \\c encountered. This is not completely handled yet. Faulty input: %s" ),
+ $str )
+ if ( $str =~ /\\c/ );
+
+ $str =~ s/>/E<gt>/sg;
+ $str =~ s/</E<lt>/sg;
+ $str =~ s/EE<lt>gt>/E<gt>/g; # could be done in a smarter way?
+
+ while ( $str =~ m/^(.*)PO4A-INLINE:(.*?):PO4A-INLINE(.*)$/s ) {
+ my ( $t1, $t2, $t3 ) = ( $1, $2, $3 );
+ $str = "$1E<$2>";
+ if ($mdoc_mode) {
+
+ # When a punctuation sign must be joined to an argument, mdoc allows such a construct:
+ # .Ar file1 , file2 , file3 ) .
+ # Here, we move the punctuation out of the E<...> tag.
+ # This is reverted in post_trans.
+ # FIXME: To be checked with the French punctuation
+ while ( $str =~ m/(?<!\\) +([.,;:\)\]]) *>/s ) {
+ $str =~ s/(?<!\\) +([.,;:\)\]]) *>/>$1/s;
+ }
+ }
+ if ( defined $t3 and length $t3 ) {
+ $t3 =~ s/^\n//s;
+ $str .= "\n$t3";
+ }
+ }
+
+ # simplify the fonts for the translators
+ if ( defined $self->{type} && $self->{type} =~ m/^(SH|SS)$/ ) {
+ set_regular("B");
+ }
+ $str = do_fonts( $str, $ref );
+ if ( defined $self->{type} && $self->{type} =~ m/^(SH|SS)$/ ) {
+ set_regular("R");
+ }
+
+ # After the simplification, the first char can be a \n.
+ # Simply push these newlines before the translation, but make sure the
+ # resulting string is not empty (or an additional line will be
+ # added).
+ if ( $str =~ /^(\n+)(.+)$/s ) {
+ $self->pushline($1);
+ $str = $2;
+ }
+
+ unless ($mdoc_mode) {
+
+ # Kill minus sign/hyphen difference.
+ # Aestetic of printed man pages may suffer, but:
+ # * they are translator-unfriendly
+ # * they break when using utf8 (for obscure reasons)
+ # * they forbid the searches, since keybords don't have hyphen key
+ # * they forbid copy/paste, since options need minus sign, not hyphen
+ $str =~ s|\\-|-|sg;
+
+ # Groff bestiary
+ $str =~ s/\\\*\(lq/``/sg;
+ $str =~ s/\\\*\(rq/''/sg;
+ $str =~ s/\\\(dq/"/sg;
+ }
+
+ # non-breaking spaces
+ # some non-breaking spaces may have been added during the parsing
+ $str =~ s/\Q$nbs/\\ /sg;
+ $str =~ s/\\~/\\ /sg;
+
+ print STDERR "$str\n" if ( $debug{'pretrans'} );
+ return $str;
+}
+
+sub post_trans {
+ my ( $self, $str, $ref, $type, $wrap ) = @_;
+ my $transstr = $str;
+
+ print STDERR "post_trans($str)="
+ if ( $debug{'postrans'} );
+
+ # Do as few treatments as possible with the .de, .ie and .if sections
+ if ( defined $self->{type} && $self->{type} =~ m/^(ie|if|de)$/ ) {
+ return $str;
+ }
+
+ unless ($mdoc_mode) {
+
+ # Post formatting, so that groff see the strange chars
+ $str =~ s|\\-|-|sg; # in case the translator added some of them manually
+ # change hyphens to minus signs
+ # (this shouldn't be done for \s-<number> font size modifiers)
+ # nor on .so/.mso args
+ unless ( defined $self->{type} && $self->{type} =~ m/^m?so$/ ) {
+ my $tmp = "";
+ while ( $str =~ m/^(.*?)-(.*)$/s ) {
+ my $begin = $1;
+ $str = $2;
+ my $tmp2 = $tmp . $begin;
+ if ( ( $begin =~ m/(?<!\\)(\\\\)*\\s$/s )
+ or ( $begin =~ m/(?<!\\)(\\\\)*\\\((.|E<[gl]t>)?$/s )
+ or ( $tmp2 =~ m/(?<!\\)(\\\\)*\\[ZHhCv]'([^']|(?<!\\)(\\\\)*\\')*$/ )
+ or ( $tmp2 =~ m/(?<!\\)(\\\\)*\\(\*)?\[([^\]]|(?<!\\)(\\\\)*\\\[)*$/ )
+ or ( $tmp2 =~ m/(?<!\\)(\\\\)*\\\*\(.?$/ ) )
+ {
+ # Do not change - to \- for
+ # * \s-n (reduce font size)
+ # * \(.. (e.g. '<-', '-D')
+ # * inside a \h'...'
+ # * inside a \C'...'
+ # * inside a \[...]
+ # * inside a \*(..
+ # * inside a \*[...]
+ # * inside a \v'...'
+ # * inside a \H'...'
+ # * inside a \Z'...'
+ $tmp = $tmp2 . "-";
+ } else {
+ $tmp = $tmp2 . "\\-";
+ }
+ }
+ $str = $tmp . $str;
+ }
+ }
+
+ # There must not be an end of line inside an inline macro
+ $str =~ s/(E<\.[^>]*)\n([^>]*>)/$1 $2/gs;
+
+ # No . or ' on first char, or nroff will think it's a macro
+ # * at the beginning of a paragraph, add \& (zero width space) at
+ # the beginning of the line
+ if ( not defined $self->{type} ) {
+
+ # Only do it on regular text, because
+ # his doesn't work after a TS (this macros shift
+ # lines, which may contain macros)
+ # or for the .ta arguments (e.g. .ta .5i 3i)
+ $str =~ s/^((?:
+ (?:CW|[RBI])<
+ |$FONT_RE
+ )?
+ [.']
+ )/\\&$1/mgx;
+ } elsif ( $self->{type} =~ m/^(TP|TQ)$/ ) {
+
+ # But it is also needed for some type (e.g. TP, if followed by a
+ # font macro)
+ # This regular expression is the same as above
+ $str =~ s/^((?:(?:CW|[RBI])<|$FONT_RE)?[.'])/\\&$1/mg;
+ }
+
+ # * degraded mode, doesn't work for the first line of a paragraph
+ $str =~ s/\n([.'])/ $1/mg;
+
+ # Change ascii non-breaking space to groff one
+ my $nbs_out = get_out_nbs( $self->get_out_charset );
+ $str =~ s/\Q$nbs_out/\\ /sg if defined $nbs_out;
+
+ # No nbsp (said "\ " in groff on the last pos of the line, or groff adds
+ # an extra space
+ $str =~ s/\\ \n(?=.)/\\ /sg;
+
+ # Make sure we compute internal sequences right.
+ # think about: B<AZE E<lt> EZA E<gt>>
+ while ( $str =~ m/^(.*)(CW|[RBI])<(.*)$/s ) {
+ my ( $done, $rest ) = ( $1 . "\\f$2", $3 );
+ $done =~ s/CW$/\(CW/;
+ my $lvl = 1;
+ while ( length $rest && $lvl > 0 ) {
+ my $first = substr( $rest, 0, 1 );
+ if ( $first eq '<' ) {
+ $lvl++;
+ } elsif ( $first eq '>' ) {
+ $lvl--;
+ }
+ if ( $first eq "\n" ) {
+
+ # Don't accept \n within B<> parameters as troff seems to reset font on new line (Debian's #1016753)
+ $done .= ' ' if ( $lvl > 0 );
+ } else {
+ $done .= $first if ( $lvl > 0 );
+ }
+ $rest = substr( $rest, 1 );
+ }
+ die wrap_ref_mod( $ref || $self->{ref},
+ "po4a::man", dgettext( "po4a", "Unbalanced '<' and '>' in font modifier. Faulty message: %s" ), $str )
+ if ( $lvl > 0 );
+
+ # Return to the regular font
+ $done .= "\\fP$rest";
+ $str = $done;
+ }
+
+ while ( $str =~ m/^(.*?)E<([.'][\t ]*.*?(?<!E<[gl]t))>(.*)$/s ) {
+ my ( $t1, $t2, $t3 ) = ( $1, $2, $3 );
+ $t1 =~ s/ +$//s;
+ $t2 =~ s/\n/ /gs;
+ if ($mdoc_mode) {
+
+ # restore the punctuation inside the line (see pre_trans)
+ if ( $t3 =~ s/^([.,;:\)\]]+)//s ) {
+ my $punctuation = $1;
+ $punctuation =~ s/([.,;:\)\]])/$1 /;
+ $t2 .= " $punctuation";
+ }
+ }
+ $t3 =~ s/^ +//s;
+ if ($wrap) {
+
+ # The no-wrap case should be checked
+ $t1 =~ s/\n$//s;
+ }
+ $str = $t1;
+ if ( length $t1 ) {
+ $t1 =~ s/\n$//s;
+ $str = "$t1\n";
+ }
+ $str .= $t2;
+ if ( defined $t3 and length $t3 ) {
+ $t3 =~ s/^\n//s;
+ $str .= "\n$t3";
+ }
+ }
+ my $str2 = $str;
+ $str2 =~ s/E<[gl]t>//g;
+ die wrap_ref_mod( $ref || $self->{ref},
+ "po4a::man", dgettext( "po4a", "Unknown '<' or '>' sequence. " . "Faulty message: %s" ), $str )
+ if $str2 =~ /[<>]/;
+ $str =~ s/E<gt>/>/mg;
+ $str =~ s/E<lt>/</mg;
+
+ # Don't do that, because we'll go into trouble if previous line was .TP
+ # $str =~ s/^\\f([BI])(.*?)\\f[RP]$/\.$1 $2/mg;
+
+ unless ($mdoc_mode) {
+ my $tmp = "";
+ while ( $str =~ m/^(.*?)(``|'')(.*)$/s ) {
+ $tmp .= $1;
+ my $q = $2;
+ $str = $3;
+
+ # There are probably many more exceptions, here are those I could
+ # detect in my manpages.
+ # \*(.' \*(.`
+ # \*' \*`
+ # \N'xxx'
+ if ( $tmp =~ m/(?<!\\)(?:\\\\)*\\\*\($/s ) {
+ $tmp .= $q;
+ } elsif (
+ $tmp =~ m/(?<!\\)(?:\\\\)*\\\*\(.$/s
+ or $tmp =~ m/(?<!\\)(?:\\\\)*\\\*$/s
+ or ( $tmp =~ m/(?<!\\)(?:\\\\)*\\N'[0-9]*$/s
+ and $q eq "''" )
+ )
+ {
+ $q =~ m/(.)(.)/;
+ $tmp .= $1;
+ $str = $2 . $str;
+ } else {
+ $q =~ s/``/\\\*\(lq/;
+ $q =~ s/''/\\\*\(rq/;
+ $tmp .= $q;
+ }
+ }
+ $str = $tmp . $str;
+ }
+ if ( not defined $self->{type} ) {
+ $str =~ s/(?<!\\) $//mg;
+ }
+
+ print STDERR "$str\n" if ( $debug{'postrans'} );
+ return $str;
+}
+
+sub translate {
+ my ( $self, $str, $ref, $type ) = ( shift, shift, shift, shift );
+ my (%options) = @_;
+ my $origstr = $str;
+
+ return $str unless ( defined $str ) && length($str);
+ return $str if ( $str eq "\n" );
+
+ # Do not translate the strings that only consist of fonts, spaces and
+ # \&. This is useful because we introduced \& in shiftline.
+ if ( $str =~ m/^($FONT_RE|\s|\\&)*$/s ) {
+ do_fonts( $str, $ref || $self->{ref} );
+ return $str;
+ }
+
+ # If a string is quoted, only translate the argument between the
+ # quotes.
+ if ( $options{'wrap'} or $str !~ m/\n/s ) {
+ if ( $str =~ m/^\"(.*)\"$/s and $1 !~ m/(?<!\\)\"/ ) {
+ $str = '"' . $self->translate( $1, $ref, $type, %options ) . '"';
+ $str =~ s/\n"$/"\n/s;
+ return $str;
+ }
+ }
+
+ $str = pre_trans( $self, $str, $ref || $self->{ref}, $type );
+ $options{'comment'} .= join( '\n', map { $$_[1] } @comments );
+
+ # Translate this
+ $str = $self->SUPER::translate( $str, $ref || $self->{ref}, $type || $self->{type}, %options );
+ if ( $options{'wrap'} ) {
+ my (@paragraph);
+ @paragraph = split( /\n/, $str );
+ if ( defined( $paragraph[0] ) && $paragraph[0] eq '' ) {
+ shift @paragraph;
+ }
+ $str = join( "\n", @paragraph ) . "\n";
+ }
+ $str = post_trans( $self, $str, $ref || $self->{ref}, $type, $options{'wrap'} );
+ return $str;
+}
+
+# shortcut
+sub t {
+ return $_[0]->translate( $_[1] );
+}
+
+# shortcut.
+# As a rule of thumb, I do not recode macro names, unless they may be
+# followed by other characters.
+sub r {
+ my $self = shift;
+ my $str = shift;
+
+ # non-breaking spaces
+ # some non-breaking spaces may have been added during the parsing
+ $str =~ s/\Q$nbs/\\ /sg;
+
+ return $str;
+}
+
+sub do_paragraph {
+ my ( $self, $paragraph, $wrapped_mode ) = ( shift, shift, shift );
+
+ # Following needed because of 'ft' (at least, see ft macro below)
+ unless ( $paragraph =~ m/\n$/s ) {
+ my @paragraph = split( /\n/, $paragraph );
+
+ $paragraph .= "\n"
+ unless scalar(@paragraph) == 1;
+ }
+
+ $self->pushline( $self->translate( $paragraph, $self->{ref}, "Plain text", "wrap" => ( $wrapped_mode eq 'YES' ) ) );
+}
+
+#############################
+#### MAIN PARSE FUNCTION ####
+#############################
+sub parse {
+ my $self = shift;
+ my ( $line, $ref );
+ my ($paragraph) = ""; # Buffer where we put the paragraph while building
+ my $wrapped_mode = 'YES'; # Should we wrap the paragraph? Three possible values:
+ # YES: do wrap
+ # NO: don't wrap because this paragraph contains indented lines
+ # this status disapear after the end of the paragraph
+ # MACRONO: don't wrap because we saw the nf macro. It stays so
+ # until the next fi macro.
+
+ # We want to change the non-breaking space according to the input
+ # document charset
+ $nbs = get_in_nbs( $self->{TT}{'file_in_charset'} );
+
+ LINE:
+ undef $self->{type};
+ ( $line, $ref ) = $self->shiftline();
+
+ while ( defined($line) ) {
+
+ # print STDERR "line=$line;ref=$ref";
+ chomp($line);
+ $self->{ref} = "$ref";
+
+ # print STDERR "LINE=$line<<\n";
+
+ if ( $line =~ /^[.']/ ) {
+ die wrap_mod( "po4a::man", dgettext( "po4a", "Unparsable line: %s" ), $line )
+ unless ( $line =~ /^([.']+\\*?)(\\["#])(.*)/
+ || $line =~ /^([.'])(\S*)(.*)/ );
+ my $arg1 = $1;
+ $arg1 .= $2;
+ my $macro = $2;
+ my $arguments = $3;
+
+ if ( $inline{$macro} ) {
+ $paragraph .= "PO4A-INLINE:" . $line . ":PO4A-INLINE\n";
+ goto LINE;
+ }
+
+ # Split on spaces for arguments, but not spaces within double quotes
+ my @args = ();
+ push @args, $arg1;
+ if ( $macro =~ /^(?:ta|TP|ie|if|de)$/ ) {
+
+ # The number of spaces may be critical for the 'ta' macro,
+ # and there is no need to split the arguments.
+ push @args, $arguments;
+ } else {
+ push @args, splitargs( $ref, $arguments );
+ }
+
+ if ( length($paragraph) ) {
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = $wrapped_mode eq 'NO' ? 'YES' : $wrapped_mode;
+ }
+
+ # Special case: Don't change these lines
+ # .\" => comments
+ # .\# => comments
+ # ." => comments
+ # . => empty point on the line
+ # .tr abcd...
+ # => substitution like Perl's tr/ac/bd/ on output.
+ if ( $macro eq '\\"'
+ || $macro eq ''
+ || $macro eq 'tr'
+ || $macro eq '"'
+ || $macro eq '\\#' )
+ {
+ $self->pushline( $self->r($line) . "\n" );
+ goto LINE;
+ }
+
+ # Special case:
+ # .nf => stop wrapped mode
+ # .fi => wrap again
+ if ( $no_wrap_begin{$macro} or $no_wrap_end{$macro} ) {
+ if ( $no_wrap_end{$macro} ) {
+ $wrapped_mode = 'YES';
+ } else {
+ $wrapped_mode = 'MACRONO';
+ }
+ $self->pushline( $self->r($line) . "\n" );
+ goto LINE;
+ }
+
+ # SH resets the wrapping (in addition to starting a section)
+ if ( $macro eq 'SH' ) {
+ $wrapped_mode = 'YES';
+ }
+
+ unshift @args, $self;
+
+ # Apply macro
+ $self->{type} = $macro;
+
+ if ( defined( $macro{$macro} ) ) {
+ &{ $macro{$macro} }(@args);
+ } else {
+ if ( defined $unknown_macros ) {
+ &{$unknown_macros}(@args);
+ } else {
+ $self->pushline( $self->r($line) . "\n" );
+ die wrap_ref_mod(
+ $ref,
+ "po4a::man",
+ dgettext(
+ "po4a",
+ "Unknown macro '%s'. Remove it from the document, or refer to the Locale::Po4a::Man manpage to see how po4a can handle new macros."
+ ),
+ $line
+ );
+ }
+ }
+
+ } elsif ( $line =~ /^ +[^. ]/ ) {
+
+ # (Lines containing only spaces are handled as empty lines)
+ # Not a macro, but not a wrapped paragraph either
+ $wrapped_mode = $wrapped_mode eq 'YES' ? 'NO' : $wrapped_mode;
+ $paragraph .= $line . "\n";
+ } elsif ( $line =~ /^[^.].*/ && $line !~ /^ *$/ ) {
+
+ # (Lines containing only spaces are handled latter as empty lines)
+ if ( $line =~ /^\\"/ ) {
+
+ # special case: the line is entirely a comment, keep the
+ # comment.
+ # NOTE: comment could also be found in the middle of a line.
+ # From info groff:
+ # Escape: \": Start a comment. Everything to the end of the
+ # input line is ignored.
+ $self->pushline( $self->r($line) . "\n" );
+ goto LINE;
+ } elsif ( $line =~ /^\\#/ ) {
+
+ # Special groff comment. Do not keep the new line
+ goto LINE;
+ } else {
+
+ # Not a macro
+ # * first, try to handle some "output line continuation" (\c)
+ $paragraph =~ s/\\c *(($FONT_RE)?)\n?$/$1/s;
+
+ # * append the line to the current paragraph
+ $paragraph .= $line . "\n";
+ }
+ } else { #empty line, or line containing only spaces
+ if ( length($paragraph) ) {
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ }
+ $wrapped_mode = $wrapped_mode eq 'NO' ? 'YES' : $wrapped_mode;
+ $self->pushline( $line . "\n" );
+ }
+
+ # finally, we did not reach the end of the paragraph. The comments
+ # belong to the current paragraph.
+ push @comments, @next_comments;
+ @next_comments = ();
+
+ # Reinit the loop
+ ( $line, $ref ) = $self->shiftline();
+ undef $self->{type};
+ }
+
+ if ( length($paragraph) ) {
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $wrapped_mode = $wrapped_mode eq 'NO' ? 'YES' : $wrapped_mode;
+ $paragraph = "";
+ }
+
+ # flush the last comments
+ push @comments, @next_comments;
+ @next_comments = @comments;
+ @comments = ();
+ for my $c (@next_comments) {
+ $self->pushline( $self->r("$$c[0]\\\"$$c[1]\n") );
+ }
+
+ # reinitialize the module
+ @next_comments = ();
+ set_regular("R");
+ set_font("R");
+ set_font("R");
+ $mdoc_mode = 0;
+} # end of main
+
+# Cache the results of get_in_nbs and get_out_nbs
+{
+ my $last_in_charset;
+ my $last_in_nbs;
+
+ # get_in_nbs(charset)
+ # Return the representation of a non breaking space in the input charset
+ # (given in argument).
+ # or PO4A:VERY_IMPROBABLE_STRING_USEDFOR_NON-BREAKING-SPACES if this
+ # character doesn't exist in this charset.
+ sub get_in_nbs() {
+ my $charset = shift;
+
+ return $last_in_nbs
+ if ( defined $charset
+ and defined $last_in_charset
+ and $charset eq $last_in_charset );
+
+ my $nbs = "\xA0";
+ my $length;
+ if ( defined $charset and length $charset ) {
+ eval(
+ "\$length = Encode::from_to(\$nbs, \"latin-1\",
+ \$charset,
+ 1)"
+ );
+ }
+
+ # fall back solution
+ $nbs = "PO4A:VERY_IMPROBABLE_STRING_USEDFOR_NON-BREAKING-SPACES"
+ unless defined $length;
+ $last_in_charset = $charset;
+ $last_in_nbs = $nbs;
+
+ return $last_in_nbs;
+ }
+
+ my $last_out_charset;
+ my $last_out_nbs;
+
+ # get_out_nbs(charset)
+ # Return the representation of a non breaking space in the output charset
+ # (given in argument).
+ # or undef if this character doesn't exist in this charset.
+ sub get_out_nbs() {
+ my $charset = shift;
+
+ return $last_out_nbs
+ if ( defined $charset
+ and defined $last_out_charset
+ and $charset eq $last_out_charset );
+
+ my $nbs = "\xA0";
+ my $length;
+ if ( defined $charset and length $charset ) {
+ eval(
+ "\$length = Encode::from_to(\$nbs, \"latin-1\",
+ \$charset,
+ 1)"
+ );
+ }
+
+ # fall back solution
+ undef $nbs
+ unless defined $length;
+ $last_out_charset = $charset;
+ $last_out_nbs = $nbs;
+
+ return $last_out_nbs;
+ }
+
+}
+
+# We can't push the header in the first line of the document, as in the
+# other module, because the first line may contain indications on how the
+# man page must be processed.
+sub docheader {
+ return "";
+}
+
+# The header is pushed just before the .TH macro (this macro is mandatory
+# and must be specified at the begining (there may be macro definitions
+# before).
+sub push_docheader {
+ my $self = shift;
+ $self->pushline( ".\\\"*******************************************************************\n"
+ . ".\\\"\n"
+ . ".\\\" This file was generated with po4a. Translate the source file.\n"
+ . ".\\\"\n"
+ . ".\\\"*******************************************************************\n" );
+}
+
+# Split request's arguments.
+# see:
+# info groff --index-search "Request Arguments"
+sub splitargs {
+ my ( $ref, $arguments ) = ( $_[0], $_[1] );
+ my @args = ();
+ my $buffer = "";
+ my $escaped = 0;
+ if ( !defined $arguments ) {
+ return @args;
+ }
+
+ # change non-breaking space before to ensure that split does what we want
+ # We change them back before pushing into the arguments. The one which
+ # will be translated will have the same change again (in pre_trans and
+ # post_trans), but the ones which won't get translated are not changed
+ # anymore. Let's play safe.
+ $arguments =~ s/\\ /$nbs/g;
+ $arguments =~ s/^ +//;
+ $arguments =~ s/\\&"/\\(dq/g;
+ $arguments =~ s/^ *//;
+ while ( length $arguments ) {
+ if ( $arguments =~ s/^"((?:[^"]|"")*)"(?!") *// ) {
+ my $a = $1;
+ $a =~ s/""/"/g if defined $a;
+ push @args, $a;
+ } elsif ( $arguments =~ s/^"((?:[^"]|"")*) *$// ) {
+
+ # Unterminated quote, but this seems to be handled by removing
+ # the trailing spaces and closing the quotes.
+ my $a = $1;
+ $a =~ s/""/"/g if defined $a;
+ push @args, $a;
+ } elsif ( $arguments =~ s/^([^ ]+) *// ) {
+ push @args, $1;
+ } else {
+ die wrap_ref_mod( $ref, "po4a::man", dgettext( "po4a", "Cannot parse command arguments: %s" ), $arguments );
+ }
+ }
+ if ( $debug{'splitargs'} ) {
+ print STDERR "ARGS=";
+ map { print STDERR "$_^" } @args;
+ print STDERR "\n";
+ }
+
+ return @args;
+}
+
+{
+ #static variables
+ # font stack.
+ # Keep track of the current font (because a font modifier can
+ # stay open at the end of a paragraph), and the previous font (to
+ # handle \fP)
+ my $current_font = "R";
+ my $previous_font = "R";
+
+ # $regular_font describe the "Regular" font, which is the font used
+ # when there is no font modifier.
+ # For example, .SS use a Bold font, and thus in
+ # .SS This is a \fRsubsection\fB header
+ # the \fR and \fB font modifiers have to be kept.
+ my $regular_font = "R";
+
+ # Set the regular font
+ # It takes the regular font in argument (when no argument is provided,
+ # it uses "R").
+ sub set_regular {
+ print STDERR "set_regular('@_')\n"
+ if ( $debug{'fonts'} );
+ set_font(@_);
+ $regular_font = $current_font;
+ }
+
+ sub set_font {
+ print STDERR "set_font('@_')\n"
+ if ( $debug{'fonts'} );
+ my $saved_previous = $previous_font;
+ $previous_font = $current_font;
+
+ if ( !defined $_[0] ) {
+ $current_font = "R";
+ } elsif ( $_[0] =~ /^(P|\[\]|\[P\])/ ) {
+ $current_font = $saved_previous;
+ } elsif ( length( $_[0] ) == 1 ) {
+ $current_font = $_[0];
+ } elsif ( length( $_[0] ) == 2 ) {
+ $current_font = "($_[0]";
+ } else {
+ $current_font = "[$_[0]]";
+ }
+ print STDERR "r:'$regular_font', p:'$previous_font', c:'$current_font'\n"
+ if ( $debug{'fonts'} );
+ }
+
+ sub do_fonts {
+
+ # one argument: a string
+ my ( $str, $ref ) = ( shift, shift );
+ print STDERR "do_fonts('$str', '$ref')="
+ if ( $debug{'fonts'} );
+
+ # restore the font stack
+ $str = "\\f$previous_font\\f$current_font" . $str;
+
+ # In order to be able to split on /\\f/, without problem with
+ # \\foo, groff backslash (\\) are changed to the (equivalent)
+ # form: \e (this should be done in shiftline).
+ my @array1 = split( /\\f/, $str );
+
+ $str = shift @array1; # The first element is always empty because
+ # the $current_font was put at the beginning
+ # $last_font indicates the last font that was appended to the buffer.
+ # It differ from $current_font because concecutive identical fonts
+ # are not written in the buffer.
+ my $last_font = $regular_font;
+
+ foreach my $elem (@array1) {
+
+ # Do not touch the fonts in the inline macros
+ # These inline macros may have their argument in bold or italic,
+ # we can't know.
+ if ( $str =~ m/E<\.([^>]|E<gt>|E<lt>)*$/s ) {
+
+ # We can't use \\f here, otherwise the font simplifier regexp
+ # will use the fonts of the inline macros.
+ $str .= "PO4A-FAKE-FONT" . $elem;
+ next;
+ }
+
+ # Replace \fP by the exact font (because some font modifiers will
+ # be removed or added, which will break groff's font stack)
+ $elem =~ s/^(P|\[\]|\[P\])/$previous_font/s;
+
+ # change \f1 to \fR, etc.
+ # Those fonts are defined in the DESC file, which
+ # may depend on the groff device.
+ # fonts 1 to 4 are usually mapped to R, I, B, BI
+ # TODO: use an array for the font positions. This
+ # array should be updated by .fp requests.
+ $elem =~ s/^1/R/;
+ $elem =~ s/^2/I/;
+ $elem =~ s/^3/B/;
+ $elem =~ s/^4/(BI/;
+
+ if ( $elem =~ /^([1-4]|B|I|R|\(..|\[[^]]*\]|L)(.*)$/s ) {
+
+ # Each element should now start by a recognized font modifier
+ my $new_font = $1;
+ my $arg = $2;
+
+ # Update the font stack
+ $previous_font = $current_font;
+ $current_font = $new_font;
+
+ if ( $new_font eq $last_font ) {
+
+ # continue with the same font.
+ $str .= $arg;
+ } else {
+
+ # A new font is used, update $last_font
+ $last_font = $new_font;
+ $str .= "\\f" . $elem;
+ }
+ } else {
+ die wrap_ref_mod( $ref, "po4a::man", dgettext( "po4a", "Unsupported font in: '%s'." ), "\\f" . $elem );
+ }
+ }
+
+ # Do some simplification (they don't change the font stack)
+ # Remove empty font modifiers at the end
+ $str =~ s/($FONT_RE)*$//s;
+
+ # close any font modifier
+ if ( $str =~ /.*($FONT_RE)(.*?)$/s && $1 ne "\\f$regular_font" ) {
+ $str =~ s/(\n?)$/\\f$regular_font$1/;
+ }
+
+ # remove fonts with empty argument
+ while ( $str =~ /($FONT_RE){2}/ ) {
+
+ # while $str has two consecutive font modifiers
+ # only keep the second one.
+ $str =~ s/($FONT_RE)($FONT_RE)/$2/s;
+ }
+
+ # when there are two consecutive switches to the regular font,
+ # remove the last one.
+ while (
+ $str =~ /^(.*)\\f$regular_font # anything followed by a
+ # regular font
+ ((?:\\(?!f)|[^\\])*) # the text concerned by
+ # this font (i.e. without any
+ # font modifier, i.e. it
+ # contains no '\' followed by
+ # an 'f')
+ \\f$regular_font # another regular font
+ (.*)$/sx
+ )
+ {
+ $str = "$1\\f$regular_font$2$3";
+ }
+
+ # the regular font modifier at the beginning of the string is not
+ # needed (the do_fonts subroutine ensure that every paragraph ends with
+ # the regular font.
+ if ( $str =~ /^(.*?)\\f$regular_font(.*)$/s && $1 !~ /$FONT_RE/ ) {
+ $str = "$1$2";
+ }
+
+ # Use special markup for common fonts, so that translators don't see
+ # groff's font modifiers
+ my $PO_FONTS = "B|I|R|\\(CW";
+
+ # remove the regular font from this list
+ $PO_FONTS =~ s/^$regular_font\|//;
+ $PO_FONTS =~ s/\|$regular_font\|/|/;
+ $PO_FONTS =~ s/\|$regular_font$//;
+ while (
+ $str =~ /^(.*?) # $1: anything (non greedy: as
+ # few as possible)
+ \\f($PO_FONTS) # followed by a common font
+ # modifier ($2)
+ ((?:\\[^f]|[^\\])*) # $3: the text concerned by
+ # this font (i.e. without any
+ # font modifier, i.e. it
+ # contains no '\' followed by
+ # an 'f')
+ \\f # the next font modifier
+ (.*)$/sx
+ )
+ { # $4: anything up to the end
+ my ( $begin, $font, $arg, $end ) = ( $1, $2, $3, $4 );
+ if ( $end =~ /^$regular_font(.*)$/s ) {
+
+ # no need to add a switch to $regular_font
+ $str = $begin . "$font<$arg>$1";
+ } else {
+ $str = $begin . "$font<$arg>\\f$end";
+ }
+ }
+ $str =~ s/\(CW</CW</sg;
+ $str =~ s/PO4A-FAKE-FONT/\\f/sg;
+
+ print STDERR "'$str'\n" if ( $debug{'fonts'} );
+ return $str;
+ }
+}
+
+##########################################
+#### DEFINITION OF THE MACROS WE KNOW ####
+##########################################
+# Each sub is passed self as first arg,
+# plus the args present on the roff line
+# ie, <<.TH LS "1" "October 2002" "ls (coreutils) 4.5.2" "User Commands">>
+# is passed (".TH","LS","1","October 2002","ls (coreutils) 4.5.2","User Commands")
+# Macro name is also passed, because .B (bold) will be encoded in pod format (and mangeled).
+# They should return a list, which will be join'ed(' ',..)
+# or undef when they don't want to add anything
+
+# Some well known macro handling
+
+# For macro taking only one argument, but people may forget the quotes.
+# Example: >>.SH Another Section<< which should be >>.SH "Another Section"<<
+sub translate_joined {
+ my ( $self, $macroname, $macroarg ) = ( shift, shift, join( " ", @_ ) );
+
+ #section# .S[HS] name
+
+ $self->pushmacro( $macroname, $self->t($macroarg) );
+}
+
+# For macro taking several arguments, having to be translated separately
+sub translate_each {
+ my ( $self, $first ) = ( shift, 0 );
+ $self->pushmacro( map { $first++ ? $self->t($_) : $_ } @_ );
+}
+
+# For macro which shouldn't be given any arg
+sub noarg {
+ my $self = shift;
+ warn "Macro $_[0] does not accept any argument\n"
+ if ( defined( $_[1] ) );
+ $self->pushmacro(@_);
+}
+
+# For macro whose arguments shouldn't be translated
+sub untranslated {
+ my ( $self, $first ) = ( shift, 0 );
+ $self->pushmacro( map { $first++ ? $self->r($_) : $_ } @_ );
+}
+
+###
+### man 7 man
+###
+
+$macro{'TH'} = sub {
+ my $self = shift;
+ my ( $th, $title, $section, $date, $source, $manual ) = @_;
+
+ #Preamble#.TH title section date source manual
+ # print STDERR "TH=$th;titre=$title;sec=$section;date=$date;source=$source;manual=$manual\n";
+
+ # Reset the memories
+ $self->push_docheader();
+
+ $self->pushmacro( $th, $self->t($title), $section, $self->t($date), $self->t($source), $self->t($manual) );
+};
+
+# .SS t Subheading t (like .SH, but used for a subsection inside a section).
+$macro{'SS'} = $macro{'SH'} = sub {
+ if ( !defined $_[2] ) {
+
+ # The argument is on the next line.
+ my ( $self, $macroname ) = ( shift, shift );
+ my ( $l2, $ref2 ) = $self->shiftline();
+ if ( $l2 =~ /^\./ ) {
+ $self->SUPER::unshiftline( $l2, $ref2 );
+ } else {
+ chomp($l2);
+ $self->pushmacro( $macroname, $self->t($l2) );
+ }
+ return;
+ } else {
+ return translate_joined(@_);
+ }
+};
+
+# Macro: .SM [text]
+# Set the text on the same line or the text on the next line in a
+# font that is one point size smaller than the default font.
+# FIXME: Maybe we should find a better way to represent this (inline is
+# not really nice in the PO).
+$inline{'SM'} = 1;
+
+# .SP n Skip n lines (I think)
+$macro{'SP'} = \&untranslated;
+
+#Normal Paragraphs
+# .LP Same as .PP (begin a new paragraph).
+# .P Same as .PP (begin a new paragraph).
+# .PP Begin a new paragraph and reset prevailing indent.
+#Relative Margin Indent
+# .RS i Start relative margin indent - moves the left margin i to the right
+# As a result, all following paragraph(s) will be indented until
+# the corresponding .RE.
+# .RE End relative margin indent.
+$macro{'LP'} = $macro{'P'} = $macro{'PP'} = sub {
+ noarg(@_);
+
+ # From info groff:
+ # The font size and shape are reset to the default value (10pt roman if no
+ # `-rS' option is given on the command line).
+ set_font("R");
+};
+$macro{'RE'} = \&noarg;
+$macro{'RS'} = \&untranslated;
+
+sub parse_tp_tq {
+ my $self = shift;
+ my ( $line, $l2, $ref2 );
+ $line .= $_[0] if defined( $_[0] );
+ $line .= ' ' . $_[1] if defined( $_[1] );
+ $self->pushline( $self->r($line) . "\n" );
+
+ ( $l2, $ref2 ) = $self->shiftline();
+ chomp($l2);
+ while ( $l2 =~ /^\.PD/ ) {
+ $self->pushline( $self->r($l2) . "\n" );
+ ( $l2, $ref2 ) = $self->shiftline();
+ chomp($l2);
+ }
+
+ # Deal with \c line continuation in .TP or .TQ
+ while ( $l2 =~ s/\\c$// ) {
+ my ( $l3, $ref3 ) = $self->shiftline();
+ $l2 .= $l3;
+ chomp($l2);
+ }
+ if ( $l2 =~ /^([.'][\t ]*([^\t ]*))(?:([\t ]+)(.*)$|$)/ ) {
+ if ( $inline{$2} ) {
+ my $tmp = "";
+ if ( defined $4 and length $4 ) {
+ $tmp = $3 . $self->t( $4, "wrap" => 0 );
+ }
+ $self->pushline( $1 . $tmp . "\n" );
+ } else {
+
+ # If the line after a .TP is a macro,
+ # let the parser do it's job.
+ # Note: use Transtractor unshiftline for now. This may require an
+ # implementation of the man module's own implementation.
+ # This may be a problem if, for example, the line resulted
+ # of a line continuation.
+ $self->SUPER::unshiftline( $l2, $ref2 );
+ }
+ } else {
+ $self->pushline( $self->t( $l2, "wrap" => 0 ) . "\n" );
+ }
+}
+
+#Indented Paragraph Macros
+# .TP i Begin paragraph with hanging tag. The tag is given on the next line,
+# but its results are like those of the .IP command.
+$macro{'TP'} = sub {
+ parse_tp_tq(@_);
+
+ # From info groff:
+ # Note that neither font shape nor font size of the label [i.e. argument
+ # or first line] is set to a default value; on the other hand, the rest of
+ # the text has default font settings.
+ set_font("R");
+};
+
+# Indented Paragraph Macros
+# .TQ Indicates continuation of the .TP labels that precede the indented
+# paragraph.
+$macro{'TQ'} = sub {
+ warn "Macro $_[1] does not accept any argument\n"
+ if ( defined( $_[2] ) );
+
+ parse_tp_tq(@_);
+};
+
+# Indented Paragraph Macros
+# .HP i Begin paragraph with a hanging indent (the first line of the paragraph
+# is at the left margin of normal paragraphs, and the rest of the para-
+# graph's lines are indented).
+#
+$macro{'HP'} = sub {
+ untranslated(@_);
+
+ # From info groff:
+ # Font size and face are reset to their default values.
+ set_font("R");
+};
+
+# Indented Paragraph Macros
+# .IP [designator] [nnn]
+# Sets up an indented paragraph, using designator as a tag to mark
+# its beginning. The indentation is set to nnn if that argument is
+# supplied (default unit is `n'), otherwise the default indentation
+# value is used. Font size and face of the paragraph (but not the
+# designator) are reset to its default values. To start an indented
+# paragraph with a particular indentation but without a designator,
+# use `""' (two doublequotes) as the second argument.
+
+# Note that the above is the groff_man(7) version, which of course differs radically
+# from man(7). In one case, the designator is optional and the nnn is not, and the
+# contrary in the other. This implies that when sticking to groff_man(7), we should
+# mark an uniq argument as translatable.
+
+$macro{'IP'} = sub {
+ my $self = shift;
+ if ( defined $_[2] ) {
+ $self->pushmacro( $_[0], $self->t( $_[1] ), $_[2] );
+ } elsif ( defined $_[1] ) {
+ $self->pushmacro( $_[0], $self->t( $_[1] ) );
+ } else {
+ $self->pushmacro(@_);
+ }
+
+ # From info groff:
+ # Font size and face of the paragraph (but not the designator) are reset
+ # to their default values.
+ set_font("R");
+};
+
+# Hypertext Link Macros
+# .UR u Begins a hypertext link to the URI (URL) u; it will end with
+# the corresponding UE command. When generating HTML this should
+# translate into the HTML command <A HREF="u">.
+# There is an exception: if u is the special value ":", then no
+# hypertext link of any kind will be generated until after the
+# closing UE (this permits disabling hypertext links in
+# phrases like LALR(1) when linking is not appropriate).
+# .UE Ends the corresponding UR command; when generating HTML this
+# should translate into </A>.
+# .UN u Creates a named hypertext location named u; do not include a
+# corresponding UE command.
+# When generating HTML this should translate into the HTML command
+# <A NAME="u" id="u">&nbsp;</A>
+#
+# E-Mail address Macros
+# .MT m Begins a mailto link to the adress m; it will end with
+# the corresponding ME command. When generating HTML this should
+# translate into the HTML command <A HREF="mailto:m">.
+# .ME Ends the corresponding MT command; when generating HTML this
+# should translate into </A>.
+$inline{'UR'} = $inline{'MT'} = 1;
+$inline{'UE'} = $inline{'ME'} = 1;
+$macro{'UN'} = \&translate_joined;
+
+# Macros to describe command synopses
+#
+# These macros are a convenience for authors. They also assist
+# automated translation tools and help browsers in recognizing command
+# synopses and treating them differently from running text.
+#
+# .OP key value
+# Describe an optional command argument. The arguments of this
+# macro are set surrounded by option braces in the default Roman
+# font; the first argument is printed with a bold face, while
+# the second argument is typeset as italic.
+#
+# .SY command
+# Begin synopsis. Takes a single argument, the name of a
+# command. Text following, until closed by .YS, is set with a
+# hanging indentation with the width of command plus a space.
+# This produces the traditional look of a Unix command synopsis.
+#
+# .YS This macro restores normal indentation at the end of a command
+# synopsis.
+$macro{'OP'} = \&translate_each;
+$macro{'SY'} = \&translate_joined;
+$macro{'YS'} = \&noarg;
+
+# Miscellaneous Macros
+# .DT Reset tabs to default tab values (every 0.5 inches); does not
+# cause a break.
+# .PD d Set inter-paragraph vertical distance to d (if omitted, d=0.4v);
+# does not cause a break.
+$macro{'DT'} = \&noarg;
+$macro{'PD'} = \&untranslated;
+
+# Indexing term (printed on standard error).
+# (ms macros)
+$macro{'IX'} = \&translate_each;
+
+###
+### groff macros
+###
+# .br
+$macro{'br'} = \&noarg;
+
+# .bp N Eject current page and begin new page.
+$macro{'bp'} = \&untranslated;
+
+# .ad Begin line adjustment for output lines in current adjust mode.
+# .ad c Start line adjustment in mode c (c=l,r,b,n).
+$macro{'ad'} = \&untranslated;
+
+my %ds_variables;
+
+# .ds stringvar anything
+# Set stringvar to anything.
+$macro{'ds'} = sub {
+ my ( $self, $m ) = ( shift, shift );
+ my $name = shift;
+ my $string = "@_";
+ $ds_variables{$name} = $string;
+
+ # indicate to which variable this corresponds. The translator can
+ # find references to this string in the translation "\*(name" or
+ # "\*[name]"
+ $self->{type} = "ds $name";
+ $self->pushline( $m . " " . $self->r($name) . " " . $self->translate($string) . "\n" );
+};
+
+# .de macro [end] Define or redefine macro until end is encountered (end=.. by default).
+# .de1 macro [end] Define or redefine macro until end is encountered (end=.. by default), turns off compatibility mode while executing the macro.
+# .dei macro [end] Define or redefine macro until end is encountered (end=.. by default) substituting parameters with '.ds' content
+# .dei1 macro [end] Define or redefine macro until end is encountered (end=.. by default) substituting parameters and turning compatibility off
+
+$macro{'de'} = $macro{'de1'} = $macro{'dei'} = $macro{'dei1'} = sub {
+ my $self = shift;
+ if ( $groff_code ne "fail" ) {
+ my $paragraph = "@_";
+ my $end = ".";
+ my $comment;
+ my $macroname = $_[1];
+ $macroname = $ds_variables{$macroname} if ( $_[0] eq "dei" || $_[0] eq "dei1" );
+ if ( $macroname =~ m/(.*?)\\"(.*)$/ ) {
+
+ # Remove comments after the macro name (Debian's #1017837)
+ ( $macroname, $comment ) = ( $1, $2 );
+ }
+ $macroname =~ s/^ *//;
+ $macroname =~ s/ *$//;
+ unless ( exists $macro{$macroname} || exists $inline{$macroname} ) {
+ if ( defined $comment ) {
+ $comment =~ s/^ *//;
+ $comment =~ s/ *$//;
+ warn wrap_mod(
+ "po4a::man",
+ dgettext(
+ "po4a",
+ "This page defines a new macro '%s' with '%s' (inline comment: %s), but you did not specify the expected po4a behavior "
+ . "when '%s' is used. You will get an error if this macro is actually used in your page.\n"
+ . "Add your macro to one of the '%s', '%s', '%s', '%s', '%s' or '%s' parameters to avoid issues.\n"
+ . "For example, passing '%s' to po4a will ensure that the defined macro remains hidden from translators.\n"
+ . "Please refer to the manpage of Locale::Po4a::Man for more info on these parameters.\n"
+ ),
+ $macroname,
+ $_[0],
+ $comment,
+ $macroname,
+ 'untranslated',
+ 'noarg',
+ 'translate_joined',
+ 'translate_each',
+ 'no_wrap',
+ 'inline',
+ "-o untranslated=$macroname"
+ );
+ } else {
+ warn wrap_mod(
+ "po4a::man",
+ dgettext(
+ "po4a",
+ "This page defines a new macro '%s' with '%s', but you did not specify the expected po4a behavior "
+ . "when '%s' is used. You will get an error if this macro is actually used in your page.\n"
+ . "Add your macro to one of the '%s', '%s', '%s', '%s', '%s' or '%s' parameters to avoid issues.\n"
+ . "For example, passing '%s' to po4a will ensure that the defined macro remains hidden from translators.\n"
+ . "Please refer to the manpage of Locale::Po4a::Man for more info on these parameters.\n"
+ ),
+ $macroname,
+ $_[0],
+ $macroname,
+ 'untranslated',
+ 'noarg',
+ 'translate_joined',
+ 'translate_each',
+ 'no_wrap',
+ 'inline',
+ "-o untranslated=$macroname"
+ );
+ }
+ }
+ if ( $paragraph =~ /^[.'][\t ]*d[ei1]*[\t ]+([^\t ]+)[\t ]+([^\t ]+)[\t ]$/ ) {
+ $end = $2;
+ $end = $ds_variables{$end} if ( $_[0] eq "dei" || $_[0] eq "dei1" );
+ }
+ my ( $line, $ref ) = $self->SUPER::shiftline();
+ chomp $line;
+ $paragraph .= "\n" . $line;
+ while ( defined($line) and $line ne ".$end" ) {
+ ( $line, $ref ) = $self->SUPER::shiftline();
+ if ( defined $line ) {
+ chomp $line;
+ $paragraph .= "\n" . $line;
+ }
+ }
+ $paragraph .= "\n";
+ if ( $groff_code eq "verbatim" ) {
+ $self->pushline( $self->r($paragraph) );
+ } else {
+ $self->pushline( $self->translate( $paragraph, $self->{ref}, "groff code", "wrap" => 0 ) );
+ }
+ } else {
+ die wrap_ref_mod(
+ $self->{ref},
+ "po4a::man",
+ dgettext(
+ "po4a",
+ "This page defines a new macro with '%s'. Since po4a is not a real groff parser, this is not supported. "
+ . "The option '%s' gets these macros copied verbatim in the translated file, but it's not very robust. "
+ . "'%s' shows these macros to the translators, but groff macros are not user-friendly for translators."
+ ),
+ $_[0],
+ "groff_code=verbatim",
+ "groff_code=translate"
+ );
+ }
+};
+
+# .fam Return to previous font family.
+# .fam name Set the current font family to name.
+$macro{'fam'} = \&untranslated;
+
+# .fc a b Set field delimiter to a and pad character to b.
+$macro{'fc'} = \&untranslated;
+
+# .ft font Change to font name or number font;
+$macro{'ft'} = sub {
+ if ( defined $_[2] ) {
+ set_font( $_[2] );
+ } else {
+ set_font("P");
+ }
+};
+
+# .hc c Set up additional hyphenation indicator character c.
+$macro{'hc'} = \&untranslated;
+
+# .hy Enable hyphenation (see nh)
+# .hy N Switch to hyphenation mode N.
+# .hym n Set the hyphenation margin to n (default scaling indicator m).
+# .hys n Set the hyphenation space to n.
+$macro{'hy'} = $macro{'hym'} = $macro{'hys'} = \&untranslated;
+
+# .ie cond anything If cond then anything else goto .el.
+# .if cond anything If cond then anything; otherwise do nothing.
+$macro{'ie'} = $macro{'if'} = sub {
+ my $self = shift;
+ if ( $groff_code ne "fail" ) {
+ my $m = $_[0];
+ my $paragraph = "@_";
+ my ( $line, $ref );
+ my $count = 0;
+ $count = 1 if ( $paragraph =~ m/(?<!\\)\\\{/s );
+ while (( $paragraph =~ m/(?<!\\)\\$/s )
+ or ( $count > 0 ) )
+ {
+ ( $line, $ref ) = $self->SUPER::shiftline();
+ chomp $line;
+ $paragraph .= "\n" . $line;
+ $count += 1 if ( $line =~ m/(?<!\\)\\\{/s );
+ $count -= 1 if ( $line =~ m/(?<!\\)\\\}/s );
+ }
+ if ( $m eq '.ie' ) {
+
+ # The .el line may be preceded by comments
+ ( $line, $ref ) = $self->SUPER::shiftline();
+ chomp $line;
+ while ( $line =~ m/^[.']\\"/ ) {
+ $paragraph .= "\n" . $line;
+ ( $line, $ref ) = $self->SUPER::shiftline();
+ chomp $line;
+ }
+
+ if ( $line !~ m/^[.'][ \t]*el(\s|\\\{)/ ) {
+ die wrap_ref_mod( $self->{ref}, "po4a::man",
+ dgettext( "po4a", "The .ie macro must be followed by a .el macro." ) );
+ }
+ my $paragraph2 = $line;
+ $count = 0;
+ $count = 1 if ( $line =~ m/(?<!\\)\\\{/s );
+ while (( $paragraph2 =~ m/(?<!\\)\\$/s )
+ or ( $count > 0 ) )
+ {
+ ( $line, $ref ) = $self->SUPER::shiftline();
+ chomp $line;
+ $paragraph2 .= "\n" . $line;
+ $count += 1 if ( $line =~ m/(?<!\\)\\\{/s );
+ $count -= 1 if ( $line =~ m/(?<!\\)\\\}/s );
+ }
+ $paragraph .= "\n" . $paragraph2;
+ }
+ $paragraph .= "\n";
+ if ( $groff_code eq "verbatim" ) {
+ $self->pushline( $self->r($paragraph) );
+ } else {
+ $self->pushline( $self->translate( $paragraph, $self->{ref}, "groff code", "wrap" => 0 ) );
+ }
+ } else {
+ die wrap_ref_mod(
+ $self->{ref},
+ "po4a::man",
+ dgettext(
+ "po4a",
+ "This page uses conditionals with '%s'. Since po4a is not a real groff parser, this is not supported by default. "
+ . "The option '%s' gets these macros copied verbatim in the translated file, but it's not very robust. "
+ . "'%s' shows these macros to the translators, but groff macros are not user-friendly for translators."
+ ),
+ $_[0],
+ "groff_code=verbatim",
+ "groff_code=translate"
+ );
+ }
+};
+
+# .el anything Display the parameter (part of a if-then-else with .ie)
+$macro{'el'} = \&translate_joined;
+
+# .in N Change indent according to N (default scaling indicator m).
+$macro{'in'} = \&untranslated;
+
+# .ig end Ignore text until .end.
+$macro{'ig'} = sub {
+ my $self = shift;
+ $self->pushmacro(@_);
+ my ( $name, $end ) = ( shift, shift || '' );
+ $end = '' if ( $end =~ m/^\\\"/ );
+ my ( $line, $ref ) = $self->shiftline();
+ while ( defined($line) ) {
+ $self->pushline( $self->r($line) );
+ last if ( $line =~ /^\.$end\./ );
+ ( $line, $ref ) = $self->shiftline();
+ }
+};
+
+# .it n macro Set an input line trap.
+$macro{'it'} = \&untranslated;
+
+# .lf N file Set input line number to N and filename to file.
+$macro{'lf'} = \&untranslated;
+
+# .ll N Set line length according to N
+$macro{'ll'} = \&untranslated;
+
+# .nh disable hyphenation (see hy)
+$macro{'nh'} = \&untranslated;
+
+# .na No Adjusting (see ad)
+$macro{'na'} = \&untranslated;
+
+# .ne N Need N vertical space
+$macro{'ne'} = \&untranslated;
+
+# .nr register N M
+# Define or modify register
+$macro{'nr'} = \&untranslated;
+
+# .ps N Point size; same as \s[N]
+$macro{'ps'} = \&untranslated;
+
+# .so filename Include source file.
+# .mso groff variant of .so (other search path)
+$macro{'so'} = $macro{'mso'} = sub {
+ warn wrap_mod( "po4a::man",
+ dgettext( "po4a", "This page includes another file with '%s'. Do not forget to translate this file ('%s')." ),
+ $_[1], $_[2] );
+ my $self = shift;
+ $self->pushmacro(@_);
+};
+
+# .sp Skip one line vertically.
+# .sp N Space vertical distance N
+$macro{'sp'} = \&untranslated;
+
+# .vs [space]
+# .vs +space
+# .vs -space
+# Change (increase, decrease) the vertical spacing by SPACE. The
+# default scaling indicator is `p'.
+$macro{'vs'} = \&untranslated;
+
+# .ta T N Set tabs after every position that is a multiple of N.
+# .ta n1 n2 ... nn T r1 r2 ... rn
+# Set tabs at positions n1, n2, ..., nn, [...]
+$macro{'ta'} = sub {
+
+ # In some cases, a ta request can contain a translatable argument.
+ # FIXME: detect those cases (something like 5i does not need to be
+ # translated)
+ my ( $self, $m ) = ( shift, shift );
+ my $line = "@_";
+ $line =~ s/^ +//;
+ $self->pushline( $m . " " . $self->translate( $line, $self->{ref}, 'ta' ) . "\n" );
+};
+
+# .ti +N Temporary indent next line (default scaling indicator m).
+$macro{'ti'} = \&untranslated;
+
+###
+### tbl macros
+###
+$macro{'TS'} = sub {
+ my $self = shift;
+ my ( $in_headers, $tab, $buffer ) = ( 1, "\t", "" );
+ my ( $in_textblock, $preline, $postline ) = ( 0, "", "" );
+ my ( $line, $ref ) = $self->shiftline();
+ my @options;
+
+ # Push table start
+ $self->pushmacro(@_);
+ while ( defined($line) ) {
+ if ( $line =~ /^\.TE/ ) {
+
+ # Table end
+ $self->pushline( $self->r($line) );
+ return;
+ }
+ if ($in_headers) {
+ if ( $line =~ /;$/ ) { # global options line
+ if ( $line =~ /\btab\s*\((.)\)/ ) {
+ $tab = $1;
+ }
+ } elsif ( $line =~ /\.$/ ) {
+ $in_headers = 0;
+ }
+ $self->pushline( $self->r($line) );
+ } elsif ( $in_textblock && $line =~ /^T}\s*/ ) { # end of text block
+ $in_textblock = 0;
+ $preline = $&; # save the `T}' marker to be output later
+ $line = $'; # save the remaing part of the line
+ # Drop any EOL from entry to be translated and save it for
+ # output below.
+ if ( chomp $buffer ) {
+ $postline .= "\n";
+ }
+ $self->pushline( $self->translate( $buffer, $ref, 'tbl table' ) . $postline );
+ $buffer = $postline = "";
+ next; # continue processing with the remaining part of the line
+ } elsif ( $in_textblock && $line =~ /^[.']/ ) {
+
+ # TODO: properly handle macros inside text blocks, currently we mark them
+ # for translations just like the previous version did
+ $self->pushline( $self->translate( $buffer, $ref, 'tbl table' ) );
+ $self->pushline( $self->translate( $line, $ref, 'tbl table' ) );
+ $buffer = "";
+ } elsif ( $line =~ /\\$/ || $in_textblock ) {
+
+ # Lines are continued on \ at the end of line
+ $buffer .= $line;
+ } else {
+ if ( $line =~ s/\s*T\{\s*$// ) { # start of text block
+ $in_textblock = 1;
+ $postline = $&; # save the `T{' to be outputed below
+ }
+
+ $buffer .= $line;
+
+ # Drop any EOL from entry to be translated and save it for
+ # output below.
+ if ( chomp $buffer ) {
+ $postline .= "\n";
+ }
+
+ # Arguments to translate are separated by the table's tab
+ # character (\t by default). We must be careful to preserve
+ # empty trailing fields, since in particular a text block is
+ # likely to show up as an empty trailing field here.
+ $self->pushline( $preline
+ . join( $tab, map { $self->translate( $_, $ref, 'tbl table' ) } split( /\Q$tab/, $buffer, -1 ) )
+ . $postline );
+
+ $buffer = $preline = $postline = "";
+
+ }
+ ( $line, $ref ) = $self->shiftline();
+ }
+};
+
+###
+### info groff
+###
+
+## Builtin register, of course they do not need to be translated
+
+$macro{'F'} = $macro{'H'} = $macro{'V'} = $macro{'A'} = $macro{'T'} = \&untranslated;
+
+## ms package
+##
+#
+# Displays and keeps. None of these macro accept a translated argument
+# (they allow to make blocks of text which cannot be broken by new page)
+
+$macro{'DS'} = $macro{'LD'} = $macro{'DE'} = \&untranslated;
+$macro{'ID'} = $macro{'BD'} = $macro{'CD'} = \&untranslated;
+$macro{'RD'} = $macro{'KS'} = $macro{'KE'} = \&untranslated;
+$macro{'KF'} = $macro{'B1'} = $macro{'B2'} = \&untranslated;
+$macro{'DA'} = \&translate_joined;
+
+# .pc c Change page number character
+$macro{'pc'} = \&translate_joined;
+
+# .ns Disable .sp and such
+# .rs Enable them again
+$macro{'ns'} = $macro{'rs'} = \&untranslated;
+
+# .cs font [width [em-size]]
+# Switch to and from "constant glyph space mode".
+$macro{'cs'} = \&untranslated;
+
+# .ss word_space_size [sentence_space_size]
+# Change the minimum size of a space between filled words.
+$macro{'ss'} = \&untranslated;
+
+# .ce Center one line horizontally
+# .ce N Center N lines
+# .ul N Underline N lines (but not the spaces)
+# .cu N Underline N lines (even the spaces)
+$macro{'ce'} = $macro{'ul'} = $macro{'cu'} = sub {
+ my $self = shift;
+ if ( defined $_[1] ) {
+ if ( $_[1] <= 0 ) {
+
+ # disable centering, underlining, ...
+ $self->pushmacro( $_[0] );
+ } else {
+
+ # All of these are not handled yet because the number of line may change
+ # during the translation
+ die wrap_mod(
+ "po4a::man",
+ dgettext(
+ "po4a",
+ "This page uses the '%s' request with the number of lines in argument. This is not supported yet."
+ ),
+ $_[0]
+ );
+ }
+ } else {
+ $self->pushmacro( $_[0] );
+ }
+};
+
+# .ec [c]
+# Set the escape character to C. With no argument the default
+# escape character `\' is restored. It can be also used to
+# re-enable the escape mechanism after an `eo' request.
+$macro{'ec'} = sub {
+ my $self = shift;
+ if ( defined $_[1] ) {
+ die wrap_mod(
+ "po4a::man",
+ dgettext(
+ "po4a", "This page uses the '%s' request. This request is only supported when no argument is provided."
+ ),
+ $_[0]
+ );
+ } else {
+ $self->pushmacro( $_[0] );
+ }
+};
+
+###
+### BSD compatibility macros: .AT and .UC
+### (define the version of Berkley used)
+### FIXME: the header ("3rd Berkeley Distribution" or such) declared
+### by this macro isn't translatable we may want to remove
+### this from the generated manpage, and declare our own header
+###
+$macro{'UC'} = $macro{'AT'} = \&untranslated;
+
+# Request: .hw word1 word2 ...
+# Define how WORD1, WORD2, etc. are to be hyphenated. The words
+# must be given with hyphens at the hyphenation points.
+#
+# If the English page needs to specify how a word must be hyphenated, the
+# translated page may also have this need.
+$macro{'hw'} = \&translate_each;
+
+#############################################################################
+#
+# mdoc macros
+#
+# The macros are defined in mdoc(7) and groff_mdoc(7)
+#
+# TBC: Should the font processing be disabled in the mdoc mode?
+#############################################################################
+# FIXME: Maybe we should verify that the page is an mdoc page
+# (add a flag in Dd, and always check that this flag is set in the
+# other mdoc macros)
+sub translate_mdoc {
+ my ( $self, $macroname ) = ( shift, shift );
+ my $macroarg = "";
+ foreach (@_) {
+ $macroarg .= " " if ( length $macroarg );
+ if ( $_ =~ m/((?<!\\) |\t|^$)/ ) {
+ $macroarg .= "\"$_\"";
+ } else {
+ $macroarg .= $_;
+ }
+ }
+
+ $self->pushline( "$macroname " . $self->t($macroarg) . "\n" );
+}
+
+sub translate_mdoc_no_quotes {
+ my ( $self, $macroname, $macroarg ) = ( shift, shift, join( " ", @_ ) );
+
+ $self->pushline( "$macroname " . $self->t($macroarg) . "\n" );
+}
+#
+# Title Macros
+# ============
+# .Dd Month day, year Document date.
+$macro{'Dd'} = sub {
+ my ( $self, $macroname, $macroarg ) = ( shift, shift, join( " ", @_ ) );
+
+ $mdoc_mode = 1;
+ $self->push_docheader();
+
+ # FIXME: It would be nice if we could switch from one set of macros to the
+ # other.
+ #
+ # This does not work at this time. If we erase the current set of macros,
+ # po4a fails when a configuration file uses both mdoc and groff pages.
+ #
+ # # Erase the current macro definitions
+ # %macro=();
+ # %inline=();
+ # %no_wrap_begin=();
+ # %no_wrap_end=();
+ # Use the mdoc macros
+ define_mdoc_macros();
+
+ $self->translate_mdoc_no_quotes( $macroname, $macroarg );
+};
+
+sub define_mdoc_macros {
+
+ # .Dt DOCUMENT_TITLE [section] [volume] Title, in upper case.
+ $macro{'Dt'} = \&translate_mdoc;
+
+ # .Os OPERATING_SYSTEM [version/release] Operating system (BSD).
+ $macro{'Os'} = \&translate_each;
+
+ # Keep the quotes e.g. finger.1
+ # Don't add quotes e.g. logger.1
+
+ # Page Layout Macros
+ # ==================
+ # .Sh Section Headers.
+ # (man mdoc indicates only a limited set of valid headers,
+ # but it should be OK to translate the header)
+ $macro{'Sh'} = sub {
+ my ( $self, $macroname ) = ( shift, shift );
+ my $macroarg = "";
+ foreach (@_) {
+ $macroarg .= " " if ( length $macroarg );
+ if ( $_ =~ m/((?<!\\) |\t|^$)/ ) {
+ $macroarg .= "\"$_\"";
+ } else {
+ $macroarg .= $_;
+ }
+ }
+ if ( $mdoc{$macroarg} ) {
+ $self->pushline( "$macroname " . $self->r($macroarg) . "\n" );
+ } else {
+ $self->pushline( "$macroname " . $self->t($macroarg) . "\n" );
+ }
+ };
+
+ # .Ss Subsection Headers.
+ $macro{'Ss'} = \&translate_mdoc;
+
+ # .Pp Paragraph Break. Vertical space (one line).
+ $macro{'Pp'} = \&noarg;
+
+ # .Lp Same as .Pp
+ $macro{'Lp'} = \&noarg;
+
+ # .D1 (D-one) Display-one Indent and display one text line.
+ $macro{'D1'} = \&translate_mdoc;
+
+ # .Dl (D-ell) Display-one literal.
+ # Indent and display one line of literal text
+ $macro{'Dl'} = \&translate_mdoc;
+
+ # .Bd Begin-display block.
+ # FIXME: Note: there are some options, some of the options argument
+ # may be translatable (-file <name>, -offset <string>)
+ $no_wrap_begin{'Bd'} = 1;
+
+ # .Ed End-display (matches .Bd).
+ $no_wrap_end{'Ed'} = 1;
+
+ # .Bl Begin-list. Create lists or columns.
+ # FIXME: As for .Bd, there are some options
+ $macro{'Bl'} = \&untranslated;
+
+ # .El End-list.
+ $macro{'El'} = \&noarg;
+
+ # .It List item.
+ # FIXME: Maybe we could extract other modifiers
+ # as in .It Fl l Ar num
+ $macro{'It'} = \&translate_mdoc;
+
+ # .Lk html link
+ $macro{'Lk'} = \&untranslated;
+
+ # Manual Domain Macros
+ # ====================
+ # FIXME: I think most Manual and General text domain are in the inline category
+ foreach (qw(Ad An Ar Cd Cm Dv Er Ev Fa Fd Fn Ic Li Nm Op Ot Pa St Va Vt Xr)) {
+ $inline{$_} = 1;
+ }
+
+ # FIXME: some of these macros introduce a line in bold.
+ # Using \fP in these line is not supported.
+ # do_fonts should be called for every inline line
+
+ # General Text Domain
+ # ===================
+ foreach (
+ qw(%A %B %C %D %I %J %N %O %P %Q %R %T %U %V
+ Ac Ao Ap Aq At Bc Bf Bo Bq Brc Bro Brq Bx Db Dc Do Dq Ec Ef Em Eo Eq Fx No Ns
+ Pc Pf Po Pq Qc Ql Qo Qq Re Rs Rv Sc So Sq Sm Sx Sy Tn Ux Xc Xo)
+ )
+ {
+ $inline{$_} = 1;
+ }
+
+ # FIXME: Maybe it should be joined with the preceding .Nm
+ $macro{'Nd'} = \&translate_mdoc;
+
+ # Command line flags
+ $inline{'Fl'} = 1;
+
+ # Exit status
+ $inline{'Ex'} = 1;
+
+ # Opening option bracket
+ $inline{'Oo'} = 1;
+
+ # Closing option bracket
+ $inline{'Oc'} = 1;
+
+ # Begin keep (keep words in the same line)
+ $inline{'Bk'} = 1;
+
+ # End keep
+ $inline{'Ek'} = 1;
+
+ # Library Names
+ $inline{'Lb'} = 1;
+
+ # Function Types
+ $inline{'Ft'} = 1;
+
+ # Function open (for functions with many arguments)
+ $inline{'Fo'} = 1;
+
+ # Function close
+ $inline{'Fc'} = 1;
+
+ # OpenBSD macro
+ $inline{'Ox'} = 1;
+
+ # BSD/OS Macro
+ $inline{'Bsx'} = 1;
+
+ # #include statements
+ $macro{'In'} = \&translate_mdoc;
+
+ # NetBSD Macro
+ $inline{'Nx'} = 1;
+
+ # Math symbol
+ $inline{'Ms'} = 1;
+
+ # Prints 'under development'
+ $inline{'Ud'} = 1;
+
+ # This macro is a groff macro. I don't know if ot is valid in an mdoc page.
+ # But this is used in some pages and seems to work
+ $macro{'br'} = \&noarg;
+
+} # end of define_mdoc_macros
+
+__END__
+
+# LocalWords: Charset charset po UTF gettext msgid nostrip
diff --git a/lib/Locale/Po4a/Po.pm b/lib/Locale/Po4a/Po.pm
new file mode 100644
index 0000000..d87a145
--- /dev/null
+++ b/lib/Locale/Po4a/Po.pm
@@ -0,0 +1,1618 @@
+# Locale::Po4a::Po -- manipulation of PO files
+#
+# This program is free software; you may redistribute it and/or modify it
+# under the terms of GPL v2.0 or later (see COPYING).
+
+############################################################################
+# Modules and declarations
+############################################################################
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Po - PO file manipulation module
+
+=head1 SYNOPSIS
+
+ use Locale::Po4a::Po;
+ my $pofile=Locale::Po4a::Po->new();
+
+ # Read PO file
+ $pofile->read('file.po');
+
+ # Add an entry
+ $pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour',
+ 'flags' => "wrap", 'reference'=>'file.c:46');
+
+ # Extract a translation
+ $pofile->gettext("Hello"); # returns 'bonjour'
+
+ # Write back to a file
+ $pofile->write('otherfile.po');
+
+=head1 DESCRIPTION
+
+Locale::Po4a::Po is a module that allows you to manipulate message
+catalogs. You can load and write from/to a file (which extension is often
+I<po>), you can build new entries on the fly or request for the translation
+of a string.
+
+For a more complete description of message catalogs in the PO format and
+their use, please refer to the info documentation of the gettext program (node "`PO Files"').
+
+This module is part of the po4a project, which objective is to use PO files
+(designed at origin to ease the translation of program messages) to
+translate everything, including documentation (man page, info manual),
+package description, debconf templates, and everything which may benefit
+from this.
+
+=head1 OPTIONS ACCEPTED BY THIS MODULE
+
+=over 4
+
+=item B<--porefs> I<type>
+
+Specify the reference format. Argument I<type> can be one of B<never>
+to not produce any reference, B<file> to only specify the file
+without the line number, B<counter> to replace line number by an
+increasing counter, and B<full> to include complete references (default: full).
+
+=item B<--wrap-po> B<no>|B<newlines>|I<number> (default: 76)
+
+Specify how the po file should be wrapped. This gives the choice between either
+files that are nicely wrapped but could lead to git conflicts, or files that are
+easier to handle automatically, but harder to read for humans.
+
+Historically, the gettext suite has reformatted the po files at the 77th column
+for cosmetics. This option specifies the behavior of po4a. If set to a numerical
+value, po4a will wrap the po file after this column and after newlines in the
+content. If set to B<newlines>, po4a will only split the msgid and msgstr after
+newlines in the content. If set to B<no>, po4a will not wrap the po file at all.
+The reference comments are always wrapped by the gettext tools that we use internally.
+
+Note that this option has no impact on how the msgid and msgstr are wrapped, i.e.
+on how newlines are added to the content of these strings.
+
+=item B<--msgid-bugs-address> I<email@address>
+
+Set the report address for msgid bugs. By default, the created POT files
+have no Report-Msgid-Bugs-To fields.
+
+=item B<--copyright-holder> I<string>
+
+Set the copyright holder in the POT header. The default value is
+"Free Software Foundation, Inc."
+
+=item B<--package-name> I<string>
+
+Set the package name for the POT header. The default is "PACKAGE".
+
+=item B<--package-version> I<string>
+
+Set the package version for the POT header. The default is "VERSION".
+
+=back
+
+=cut
+
+use IO::File;
+
+require Exporter;
+
+package Locale::Po4a::Po;
+use DynaLoader;
+
+use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext);
+
+use subs qw(makespace);
+use vars qw(@ISA @EXPORT_OK);
+@ISA = qw(Exporter DynaLoader);
+@EXPORT = qw(%debug);
+@EXPORT_OK = qw(&move_po_if_needed);
+
+use Locale::Po4a::TransTractor;
+
+# Try to use a C extension if present.
+eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION");
+
+use 5.16.0;
+use strict;
+use warnings;
+
+use Carp qw(croak);
+use File::Basename;
+use File::Path; # mkdir before write
+use File::Copy; # move
+use POSIX qw(strftime floor);
+use Time::Local;
+
+use Encode;
+use Config;
+
+my @known_flags = qw(
+ wrap no-wrap fuzzy
+ c-format no-c-format
+ objc-format no-objc-format
+ sh-format no-sh-format
+ python-format no-python-format
+ python-brace-format no-python-brace-format
+ lisp-format no-lisp-format
+ elisp-format no-elisp-format
+ librep-format no-librep-format
+ scheme-format no-scheme-format
+ smalltalk-format no-smalltalk-format
+ java-format no-java-format
+ csharp-format no-csharp-format
+ awk-format no-awk-format
+ object-pascal-format no-object-pascal-format
+ ycp-format no-ycp-format
+ tcl-format no-tcl-format
+ perl-format no-perl-format
+ perl-brace-format no-perl-brace-format
+ php-format no-php-format
+ gcc-internal-format no-gcc-internal-format
+ gfc-internal-format no-gfc-internal-format
+ qt-format no-qt-format
+ qt-plural-format no-qt-plural-format
+ kde-format no-kde-format
+ boost-format no-boost-format
+ lua-format no-lua-format
+ javascript-format no-javascript-format
+);
+
+# Custom flags, used for example by weblate
+push @known_flags, 'markdown-text';
+
+our %debug = (
+ 'canonize' => 0,
+ 'quote' => 0,
+ 'escape' => 0,
+ 'encoding' => 0,
+ 'filter' => 0
+);
+
+=head1 Functions concerning entire message catalogs
+
+=over 4
+
+=item new()
+
+Creates a new message catalog. If an argument is provided, it's the name of
+a PO file we should load.
+
+=cut
+
+sub new {
+ my ( $this, $options ) = ( shift, shift );
+ my $class = ref($this) || $this;
+ my $self = {};
+ bless $self, $class;
+ $self->initialize($options);
+
+ my $filename = shift;
+ $self->read($filename) if length($filename);
+ return $self;
+}
+
+# Return the numerical timezone (e.g. +0200)
+# Neither the %z nor the %s formats of strftime are portable:
+# '%s' is not supported on Solaris and '%z' indicates
+# "2006-10-25 19:36E. Europe Standard Time" on MS Windows.
+sub timezone {
+ my ($time) = @_;
+ my @l = localtime($time);
+
+ my $diff = floor( timegm(@l) / 60 + 0.5 ) - floor( $time / 60 + 0.5 );
+ my $sign = ( $diff >= 0 ? 1 : -1 );
+ $diff = abs($diff);
+
+ my $h = $sign * floor( $diff / 60 );
+ my $m = $diff % 60;
+
+ return sprintf "%+03d%02d\n", $h, $m;
+}
+
+sub initialize {
+ my ( $self, $options ) = ( shift, shift );
+ my $time = time;
+ my $date = strftime( "%Y-%m-%d %H:%M", localtime($time) ) . timezone($time);
+ chomp $date;
+
+ $self->{options}{'porefs'} = 'full';
+ $self->{options}{'msgid-bugs-address'} = undef;
+ $self->{options}{'copyright-holder'} = "Free Software Foundation, Inc.";
+ $self->{options}{'package-name'} = "PACKAGE";
+ $self->{options}{'package-version'} = "VERSION";
+ $self->{options}{'wrap-po'} = 76;
+ $self->{options}{'pot-language'} = "";
+
+ foreach my $opt ( keys %$options ) {
+
+ # print STDERR "$opt: ".(defined($options->{$opt})?$options->{$opt}:"(undef)")."\n";
+ if ( $options->{$opt} ) {
+ die wrap_mod( "po4a::po", dgettext( "po4a", "Unknown option: %s" ), $opt )
+ unless exists $self->{options}{$opt};
+ $self->{options}{$opt} = $options->{$opt};
+ }
+ }
+ $self->{options}{'wrap-po'} =~ /^(no|newlines|\d+)$/
+ || die wrap_mod(
+ "po4a::po",
+ dgettext( "po4a", "Invalid value for option 'wrap-po' ('%s' is not 'no' nor 'newlines' nor a number)" ),
+ $self->{options}{'wrap-po'}
+ );
+
+ $self->{options}{'porefs'} =~ /^(full|counter|noline|file|none|never)?$/
+ || die wrap_mod(
+ "po4a::po",
+ dgettext(
+ "po4a",
+ "Invalid value for option 'porefs' ('%s' is "
+ . "not one of 'full', 'counter', 'noline', 'file' or 'never')"
+ ),
+ $self->{options}{'porefs'}
+ );
+ $self->{options}{'porefs'} =~ s/noline/file/; # backward compat. 'file' used to be called 'noline'.
+ $self->{options}{'porefs'} =~ s/none/never/; # backward compat. 'never' used to be called 'none'.
+ if ( $self->{options}{'porefs'} =~ m/^counter/ ) {
+ $self->{counter} = {};
+ }
+
+ $self->{po} = ();
+ $self->{count} = 0; # number of msgids in the PO
+ # count_doc: number of strings in the document
+ # (duplicate strings counted multiple times)
+ $self->{count_doc} = 0;
+ $self->{gettextize_types} = (); # Type of each msgid found in the doc, in order
+ # We cannot use {$msgid}{'type'} as a type because for duplicate entries, the type is overwritten.
+ # So we have to copy the same info to this separate array, which is accessed through type_doc()
+ $self->{header_comment} =
+ " SOME DESCRIPTIVE TITLE\n"
+ . " Copyright (C) YEAR "
+ . $self->{options}{'copyright-holder'} . "\n"
+ . " This file is distributed under the same license "
+ . "as the "
+ . $self->{options}{'package-name'}
+ . " package.\n"
+ . " FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n" . "\n"
+ . ", fuzzy";
+
+ # $self->header_tag="fuzzy";
+ $self->{header} = escape_text(
+ "Project-Id-Version: "
+ . $self->{options}{'package-name'} . " "
+ . $self->{options}{'package-version'} . "\n"
+ . (
+ ( defined $self->{options}{'msgid-bugs-address'} )
+ ? "Report-Msgid-Bugs-To: " . $self->{options}{'msgid-bugs-address'} . "\n"
+ : ""
+ )
+ . "POT-Creation-Date: $date\n"
+ . "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+ . "Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n"
+ . "Language-Team: LANGUAGE <LL\@li.org>\n"
+ . "Language: "
+ . $self->{options}{'pot-language'} . "\n"
+ . "MIME-Version: 1.0\n"
+ . "Content-Type: text/plain; charset=UTF-8\n"
+ . "Content-Transfer-Encoding: 8bit\n"
+ );
+
+ $self->{footer} = [];
+
+ # To make stats about gettext hits
+ $self->stats_clear();
+}
+
+=item read($)
+
+Reads a PO file (which name is given as argument). Previously existing
+entries in self are not removed, the new ones are added to the end of the
+catalog.
+
+=cut
+
+sub read {
+ my $self = shift;
+ my $filename = shift
+ or croak wrap_mod( "po4a::po", dgettext( "po4a", "Please provide a non-null filename" ) );
+
+ my $charset = shift // 'UTF-8';
+ $charset = 'UTF-8' if $charset eq "CHARSET";
+ warn "Read $filename with encoding: $charset" if $debug{'encoding'};
+
+ my $checkvalidity = shift // 1;
+
+ my $lang = basename($filename);
+ $lang =~ s/\.po$//;
+ $self->{lang} = $lang;
+
+ if ($checkvalidity) { # We sometimes need to read a file even if it may be invalid (eg to test whether it's empty)
+ my $cmd = "msgfmt" . $Config{_exe} . " --check-format --check-domain -o /dev/null \"" . $filename . '"';
+
+ my $locale = $ENV{'LC_ALL'};
+ $ENV{'LC_ALL'} = "C";
+ my $out = qx/$cmd 2>&1/;
+ $ENV{'LC_ALL'} = $locale;
+
+ die wrap_msg( dgettext( "po4a", "Invalid po file %s:\n%s" ), $filename, $out )
+ unless ( $? == 0 );
+ }
+
+ my $fh;
+ if ( $filename eq '-' ) {
+ $fh = *STDIN;
+ } else {
+ open( $fh, "<:encoding($charset)", $filename )
+ or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot read from %s: %s" ), $filename, $! );
+ }
+
+ ## Read paragraphs line-by-line
+ my $pofile = "";
+ while ( defined( my $textline = <$fh> ) ) {
+ $pofile .= $textline;
+ }
+ $pofile =~ s/\r\n/\n/sg; # Reading a DOS-encoded file from Linux (native files are handled in all cases)
+
+ # If we did not get the charset right, reload the file with the right one
+ if ( $pofile =~ /charset=(.*?)[\s\\]/ ) {
+ my $detected_charset = $1;
+
+ if ( $detected_charset ne $charset && uc($detected_charset) ne $charset && uc($detected_charset) ne 'CHARSET' )
+ {
+ warn "Reloading the PO file, changing the charset from '$charset' to '$detected_charset'"
+ if $debug{'encoding'};
+ $self->read( $filename, $detected_charset, $checkvalidity );
+ return;
+ }
+ }
+
+ if ( $pofile =~ m/^\N{BOM}/ ) { # UTF-8 BOM detected
+ croak "BOM detected";
+ croak wrap_msg(
+ dgettext(
+ "po4a",
+ "The file %s starts with a BOM char indicating that its encoding is UTF-8, but you specified %s instead."
+ ),
+ $filename,
+ $charset
+ ) if ( uc($charset) ne 'UTF-8' );
+ $pofile =~ s/^\N{BOM}//;
+ }
+
+ if ( $filename ne '-' ) {
+ close $fh
+ or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot close %s after reading: %s" ), $filename, $! );
+ }
+
+ my $linenum = 0;
+
+ foreach my $msg ( split( /\n\n/, $pofile ) ) {
+ my ( $msgid, $msgstr, $comment, $previous, $automatic, $reference, $flags, $buffer );
+ my ( $msgid_plural, $msgstr_plural );
+ if ( $msg =~ m/^#~/m ) {
+ push( @{ $self->{footer} }, $msg );
+ next;
+ }
+ foreach my $line ( split( /\n/, $msg ) ) {
+ $linenum++;
+ if ( $line =~ /^#\. ?(.*)$/ ) { # Automatic comment
+ $automatic .= ( defined($automatic) ? "\n" : "" ) . $1;
+
+ } elsif ( $line =~ /^#: ?(.*)$/ ) { # reference
+ $reference .= ( defined($reference) ? "\n" : "" ) . $1;
+
+ } elsif ( $line =~ /^#, ?(.*)$/ ) { # flags
+ $flags .= ( defined($flags) ? "\n" : "" ) . $1;
+
+ } elsif ( $line =~ /^#\| ?(.*)$/ ) { # previous translation
+ $previous .= ( defined($previous) ? "\n" : "" ) . ( $1 || "" );
+
+ } elsif ( $line =~ /^#(.*)$/ ) { # Translator comments
+ $comment .= ( defined($comment) ? "\n" : "" ) . ( $1 || "" );
+
+ } elsif ( $line =~ /^msgid (".*")$/ ) { # begin of msgid
+ $buffer = $1;
+
+ } elsif ( $line =~ /^msgid_plural (".*")$/ ) {
+
+ # begin of msgid_plural, end of msgid
+
+ $msgid = $buffer;
+ $buffer = $1;
+
+ } elsif ( $line =~ /^msgstr (".*")$/ ) {
+
+ # begin of msgstr, end of msgid
+
+ $msgid = $buffer;
+ $buffer = "$1";
+
+ } elsif ( $line =~ /^msgstr\[([0-9]+)\] (".*")$/ ) {
+
+ # begin of msgstr[x], end of msgid_plural or msgstr[x-1]
+
+ # Note: po4a cannot uses plural forms
+ # (no integer to use the plural form)
+ # * drop the msgstr[x] where x >= 2
+ # * use msgstr[0] as the translation of msgid
+ # * use msgstr[1] as the translation of msgid_plural
+
+ if ( $1 eq "0" ) {
+ $msgid_plural = $buffer;
+ $buffer = "$2";
+ } elsif ( $1 eq "1" ) {
+ $msgstr = $buffer;
+ $buffer = "$2";
+ } elsif ( $1 eq "2" ) {
+ $msgstr_plural = $buffer;
+ warn wrap_ref_mod( "$filename:$linenum", "po4a::po",
+ dgettext( "po4a", "Messages with more than 2 plural forms are not supported." ) );
+ }
+ } elsif ( $line =~ /^(".*")$/ ) {
+
+ # continuation of a line
+ $buffer .= "\n$1";
+
+ } else {
+ warn wrap_ref_mod( "$filename:$linenum", "po4a::po", dgettext( "po4a", "Parse error at: -->%s<--" ),
+ $line );
+ }
+ }
+ $linenum++;
+ if ( defined $msgid_plural ) {
+ $msgstr_plural = $buffer;
+
+ $msgid = unquote_text($msgid) if ( defined($msgid) );
+ $msgstr = unquote_text($msgstr) if ( defined($msgstr) );
+
+ $self->push_raw(
+ 'msgid' => $msgid,
+ 'msgstr' => $msgstr,
+ 'reference' => $reference,
+ 'flags' => $flags,
+ 'comment' => $comment,
+ 'previous' => $previous,
+ 'automatic' => $automatic,
+ 'plural' => 0
+ );
+
+ $msgid_plural = unquote_text($msgid_plural)
+ if ( defined($msgid_plural) );
+ $msgstr_plural = unquote_text($msgstr_plural)
+ if ( defined($msgstr_plural) );
+
+ $self->push_raw(
+ 'msgid' => $msgid_plural,
+ 'msgstr' => $msgstr_plural,
+ 'reference' => $reference,
+ 'flags' => $flags,
+ 'comment' => $comment,
+ 'previous' => $previous,
+ 'automatic' => $automatic,
+ 'plural' => 1
+ );
+ } else {
+ $msgstr = $buffer;
+
+ $msgid = unquote_text($msgid) if ( defined($msgid) );
+ $msgstr = unquote_text($msgstr) if ( defined($msgstr) );
+
+ $self->push_raw(
+ 'msgid' => $msgid,
+ 'msgstr' => $msgstr,
+ 'reference' => $reference,
+ 'flags' => $flags,
+ 'comment' => $comment,
+ 'previous' => $previous,
+ 'automatic' => $automatic
+ );
+ }
+ }
+}
+
+=item write($)
+
+Writes the current catalog to the given file.
+
+=cut
+
+sub write {
+ my $self = shift;
+ my $filename = shift
+ or croak dgettext( "po4a", "Cannot write to a file without filename" ) . "\n";
+
+ my $fh;
+ if ( $filename eq '-' ) {
+ $fh = \*STDOUT;
+ } else {
+
+ # make sure the directory in which we should write the localized
+ # file exists
+ my $dir = $filename;
+ if ( $dir =~ m|/| ) {
+ $dir =~ s|/[^/]*$||;
+
+ File::Path::mkpath( $dir, 0, 0755 ) # Croaks on error
+ if ( length($dir) && !-e $dir );
+ }
+ open( $fh, '>:encoding(UTF-8)', $filename )
+ or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot write to %s: %s" ), $filename, $! );
+ }
+
+ print $fh "" . format_comment( $self->{header_comment}, "" )
+ if length( $self->{header_comment} );
+
+ # Force the encoding of PO files in UTF-8 on disk, because msgmerge can get messed up when mixing encodings
+ # See https://savannah.gnu.org/bugs/index.php?65104
+ my $header = $self->{header};
+ $header =~ /charset=([^\s\\]*)/i;
+ my $oldcharset = $1 // '';
+ warn sprintf(
+ dgettext(
+ "po4a",
+ "msgmerge suffers some bugs when PO files are not encoded in UTF-8; Recoding %s to UTF-8 (was %s) to circumvent the issue.\n"
+ ),
+ $filename,
+ $oldcharset
+ ) if $oldcharset ne 'UTF-8';
+ $header =~ s/charset=[^\s\\]*/charset=UTF-8/i;
+
+ print $fh "msgid \"\"\n";
+ print $fh "msgstr " . quote_text( $header, $self->{options}{'wrap-po'} ) . "\n\n";
+
+ my $buf_msgstr_plural; # Used to keep the first msgstr of plural forms
+ my $first = 1;
+ foreach my $msgid ( sort { ( $self->{po}{"$a"}{'pos'} ) <=> ( $self->{po}{"$b"}{'pos'} ) } keys %{ $self->{po} } ) {
+ my $output = "";
+
+ if ($first) {
+ $first = 0;
+ } else {
+ $output .= "\n";
+ }
+
+ $output .= format_comment( $self->{po}{$msgid}{'comment'}, "" )
+ if length( $self->{po}{$msgid}{'comment'} );
+ if ( length( $self->{po}{$msgid}{'automatic'} ) ) {
+ foreach my $comment ( split( /\\n/, $self->{po}{$msgid}{'automatic'} ) ) {
+ $output .= format_comment( $comment, ". " );
+ }
+ }
+ $output .= format_comment( $self->{po}{$msgid}{'type'}, ". type: " )
+ if length( $self->{po}{$msgid}{'type'} );
+
+ if ( length( $self->{po}{$msgid}{'reference'} ) ) {
+ my $output_ref = wrap( $self->{po}{$msgid}{'reference'} );
+ $output_ref =~ s/\s+$//mg;
+ $output .= format_comment( $output_ref, ": " );
+ }
+ $output .= "#, " . join( ", ", sort split( /\s+/, $self->{po}{$msgid}{'flags'} ) ) . "\n"
+ if length( $self->{po}{$msgid}{'flags'} );
+ $output .= format_comment( $self->{po}{$msgid}{'previous'}, "| " )
+ if length( $self->{po}{$msgid}{'previous'} );
+
+ if ( exists $self->{po}{$msgid}{'plural'} ) {
+ if ( $self->{po}{$msgid}{'plural'} == 0 ) {
+ $output .= "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n";
+ $buf_msgstr_plural =
+ "msgstr[0] " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n";
+ } elsif ( $self->{po}{$msgid}{'plural'} == 1 ) {
+
+ # TODO: there may be only one plural form
+ $output = "msgid_plural " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n";
+ $output .= $buf_msgstr_plural;
+ $output .=
+ "msgstr[1] " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n";
+ } else {
+ die wrap_msg( dgettext( "po4a", "Cannot write PO files with more than two plural forms." ) );
+ }
+ } else {
+ $output .= "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n";
+ $output .= "msgstr " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n";
+ }
+
+ print $fh $output;
+ }
+ print $fh join( "\n\n", @{ $self->{footer} } ) if scalar @{ $self->{footer} };
+
+ if ( $filename ne '-' ) {
+ close $fh
+ or croak wrap_mod( dgettext( "po4a", "Cannot close %s after writing: %s\n" ), $filename, $! );
+ }
+}
+
+=item write_if_needed($$)
+
+Like write, but if the PO or POT file already exists, the object will be
+written in a temporary file which will be compared with the existing file
+to check if the update is needed (this avoids to change a POT just to
+update a line reference or the POT-Creation-Date field).
+
+=cut
+
+sub move_po_if_needed {
+ my ( $new_po, $old_po, $backup ) = ( shift, shift, shift );
+ my $diff;
+
+ if ( -e $old_po ) {
+ $diff = qx(diff -q -I'^#:' -I'^\"POT-Creation-Date:' -I'^\"PO-Revision-Date:' $old_po $new_po);
+ if ( $diff eq "" ) {
+ unlink $new_po
+ or die wrap_msg( dgettext( "po4a", "Cannot unlink %s: %s." ), $new_po, $! );
+
+ # touch the old PO
+ my ( $atime, $mtime ) = ( time, time );
+ utime $atime, $mtime, $old_po;
+ } else {
+ move $new_po, $old_po
+ or die wrap_msg( dgettext( "po4a", "Cannot move %s to %s: %s." ), $new_po, $old_po, $! );
+ }
+ } else {
+ move $new_po, $old_po
+ or die wrap_msg( dgettext( "po4a", "Cannot move %s to %s: %s." ), $new_po, $old_po, $! );
+ }
+}
+
+sub write_if_needed {
+ my $self = shift;
+ my $filename = shift
+ or croak dgettext( "po4a", "Cannot write to a file without filename" ) . "\n";
+
+ if ( -e $filename ) {
+ my ($tmp_filename);
+ my $basename = basename($filename);
+ ( undef, $tmp_filename ) = File::Temp::tempfile(
+ $basename . "XXXX",
+ DIR => File::Spec->tmpdir(),
+ OPEN => 0,
+ UNLINK => 0
+ );
+ $self->write($tmp_filename);
+ move_po_if_needed( $tmp_filename, $filename );
+ } else {
+ $self->write($filename);
+ }
+}
+
+=item filter($)
+
+This function extracts a catalog from an existing one. Only the entries having
+a reference in the given file will be placed in the resulting catalog.
+
+This function parses its argument, converts it to a Perl function definition,
+evals this definition and filters the fields for which this function returns
+true.
+
+I love Perl sometimes ;)
+
+=cut
+
+sub filter {
+ my $self = shift;
+ our $filter = shift;
+
+ my $res;
+ $res = Locale::Po4a::Po->new();
+
+ # Parse the filter
+ our $code = "sub apply { return ";
+ our $pos = 0;
+ our $length = length $filter;
+
+ # explode chars to parts. How to subscript a string in Perl?
+ our @filter = split( //, $filter );
+
+ sub gloups {
+ my $fmt = shift;
+ my $space = "";
+ for ( 1 .. $pos ) {
+ $space .= ' ';
+ }
+ die wrap_msg("$fmt\n$filter\n$space^ HERE");
+ }
+
+ sub showmethecode {
+ return unless $debug{'filter'};
+ my $fmt = shift;
+ my $space = "";
+ for ( 1 .. $pos ) {
+ $space .= ' ';
+ }
+ print STDERR "$filter\n$space^ $fmt\n"; #"$code\n";
+ }
+
+ # I dream of a lex in perl :-/
+ sub parse_expression {
+ showmethecode("Begin expression")
+ if $debug{'filter'};
+
+ gloups( "Begin of expression expected, got '%s'", $filter[$pos] )
+ unless ( $filter[$pos] eq '(' );
+ $pos++; # pass the '('
+ if ( $filter[$pos] eq '&' ) {
+
+ # AND
+ $pos++;
+ showmethecode("Begin of AND")
+ if $debug{'filter'};
+ $code .= "(";
+ while (1) {
+ gloups("Unfinished AND statement.")
+ if ( $pos == $length );
+ parse_expression();
+ if ( $filter[$pos] eq '(' ) {
+ $code .= " && ";
+ } elsif ( $filter[$pos] eq ')' ) {
+ last; # do not eat that char
+ } else {
+ gloups( "End of AND or begin of sub-expression expected, got '%s'", $filter[$pos] );
+ }
+ }
+ $code .= ")";
+ } elsif ( $filter[$pos] eq '|' ) {
+
+ # OR
+ $pos++;
+ $code .= "(";
+ while (1) {
+ gloups("Unfinished OR statement.")
+ if ( $pos == $length );
+ parse_expression();
+ if ( $filter[$pos] eq '(' ) {
+ $code .= " || ";
+ } elsif ( $filter[$pos] eq ')' ) {
+ last; # do not eat that char
+ } else {
+ gloups( "End of OR or begin of sub-expression expected, got '%s'", $filter[$pos] );
+ }
+ }
+ $code .= ")";
+ } elsif ( $filter[$pos] eq '!' ) {
+
+ # NOT
+ $pos++;
+ $code .= "(!";
+ gloups("Missing sub-expression in NOT statement.")
+ if ( $pos == $length );
+ parse_expression();
+ $code .= ")";
+ } else {
+
+ # must be an equal. Let's get field and argument
+ my ( $field, $arg, $done );
+ $field = substr( $filter, $pos );
+ gloups("EQ statement contains no '=' or invalid field name")
+ unless ( $field =~ /([a-z]*)=/i );
+ $field = lc($1);
+ $pos += ( length $field ) + 1;
+
+ # check that we've got a valid field name,
+ # and the number it referes to
+ # DO NOT CHANGE THE ORDER
+ my @names = qw(msgid msgstr reference flags comment previous automatic);
+ my $fieldpos;
+ for ( $fieldpos = 0 ; $fieldpos < scalar @names && $field ne $names[$fieldpos] ; $fieldpos++ ) { }
+ gloups( "Invalid field name: %s", $field )
+ if $fieldpos == scalar @names; # not found
+
+ # Now, get the argument value. It has to be between quotes,
+ # which can be escaped
+ # We point right on the first char of the argument
+ # (first quote already eaten)
+ my $escaped = 0;
+ my $quoted = 0;
+ if ( $filter[$pos] eq '"' ) {
+ $pos++;
+ $quoted = 1;
+ }
+ showmethecode( ( $quoted ? "Quoted" : "Unquoted" ) . " argument of field '$field'" )
+ if $debug{'filter'};
+
+ while ( !$done ) {
+ gloups("Unfinished EQ argument.")
+ if ( $pos == $length );
+
+ if ($quoted) {
+ if ( $filter[$pos] eq '\\' ) {
+ if ($escaped) {
+ $arg .= '\\';
+ $escaped = 0;
+ } else {
+ $escaped = 1;
+ }
+ } elsif ($escaped) {
+ if ( $filter[$pos] eq '"' ) {
+ $arg .= '"';
+ $escaped = 0;
+ } else {
+ gloups( "Invalid escape sequence in argument: '\\%s'", $filter[$pos] );
+ }
+ } else {
+ if ( $filter[$pos] eq '"' ) {
+ $done = 1;
+ } else {
+ $arg .= $filter[$pos];
+ }
+ }
+ } else {
+ if ( $filter[$pos] eq ')' ) {
+
+ # counter the next ++ since we don't want to eat
+ # this char
+ $pos--;
+ $done = 1;
+ } else {
+ $arg .= $filter[$pos];
+ }
+ }
+ $pos++;
+ }
+
+ # and now, add the code to check this equality
+ $code .= "(\$_[$fieldpos] =~ m{$arg})";
+
+ }
+ showmethecode("End of expression")
+ if $debug{'filter'};
+ gloups("Unfinished statement.")
+ if ( $pos == $length );
+ gloups( "End of expression expected, got '%s'", $filter[$pos] )
+ unless ( $filter[$pos] eq ')' );
+ $pos++;
+ }
+
+ # And now, launch the beast, finish the function and use eval
+ # to construct this function.
+ # Ok, the lack of lexer is a fair price for the eval ;)
+ parse_expression();
+ gloups("Garbage at the end of the expression")
+ if ( $pos != $length );
+ $code .= "; }";
+ print STDERR "CODE = $code\n"
+ if $debug{'filter'};
+ eval $code;
+ die wrap_mod( "po4a::po", dgettext( "po4a", "Evaluating the provided filter failed: %s" ), $@ )
+ if $@;
+
+ for ( my $cpt = (0) ; $cpt < $self->count_entries() ; $cpt++ ) {
+
+ my ( $msgid, $ref, $msgstr, $flags, $type, $comment, $previous, $automatic );
+
+ $msgid = $self->msgid($cpt);
+ $ref = $self->{po}{$msgid}{'reference'};
+
+ $msgstr = $self->{po}{$msgid}{'msgstr'};
+ $flags = $self->{po}{$msgid}{'flags'};
+ $type = $self->{po}{$msgid}{'type'};
+ $comment = $self->{po}{$msgid}{'comment'};
+ $previous = $self->{po}{$msgid}{'previous'};
+ $automatic = $self->{po}{$msgid}{'automatic'};
+
+ # DO NOT CHANGE THE ORDER
+ $res->push_raw(
+ 'msgid' => $msgid,
+ 'msgstr' => $msgstr,
+ 'flags' => $flags,
+ 'type' => $type,
+ 'reference' => $ref,
+ 'comment' => $comment,
+ 'previous' => $previous,
+ 'automatic' => $automatic
+ ) if ( apply( $msgid, $msgstr, $ref, $flags, $comment, $previous, $automatic ) );
+ }
+
+ # delete the apply subroutine
+ # otherwise it will be redefined.
+ undef &apply;
+ return $res;
+}
+
+=back
+
+=head1 Functions to use a message catalog for translations
+
+=over 4
+
+=item gettext($%)
+
+Request the translation of the string given as argument in the current catalog.
+The function returns the original (untranslated) string if the string was not
+found.
+
+After the string to translate, you can pass a hash of extra
+arguments. Here are the valid entries:
+
+=over
+
+=item B<wrap>
+
+boolean indicating whether we can consider that whitespaces in string are
+not important. If yes, the function canonizes the string before looking for
+a translation, and wraps the result.
+
+=item B<wrapcol>
+
+the column at which we should wrap (default: 76).
+
+=back
+
+=cut
+
+sub gettext {
+ my $self = shift;
+ my $text = shift;
+ my (%opt) = @_;
+ my $res;
+
+ return "" unless length($text); # Avoid returning the header.
+ my $validoption = "reference wrap wrapcol";
+ my %validoption;
+
+ map { $validoption{$_} = 1 } ( split( / /, $validoption ) );
+ foreach ( keys %opt ) {
+ Carp::confess "internal error: unknown arg $_.\n" . "Here are the valid options: $validoption.\n"
+ unless $validoption{$_};
+ }
+
+ $text = canonize($text)
+ if ( $opt{'wrap'} );
+
+ my $esc_text = escape_text($text);
+
+ $self->{gettextqueries}++;
+
+ if (
+ defined $self->{po}{$esc_text}
+ and defined $self->{po}{$esc_text}{'msgstr'}
+ and length $self->{po}{$esc_text}{'msgstr'}
+ and ( not defined $self->{po}{$esc_text}{'flags'}
+ or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/ )
+ )
+ {
+
+ $self->{gettexthits}++;
+ $res = unescape_text( $self->{po}{$esc_text}{'msgstr'} );
+ if ( defined $self->{po}{$esc_text}{'plural'} ) {
+ if ( $self->{po}{$esc_text}{'plural'} eq "0" ) {
+ warn wrap_mod(
+ "po4a gettextize",
+ dgettext(
+ "po4a",
+ "'%s' is the singular form of a message, " . "po4a will use the msgstr[0] translation (%s)."
+ ),
+ $esc_text,
+ $res
+ );
+ } else {
+ warn wrap_mod(
+ "po4a gettextize",
+ dgettext(
+ "po4a",
+ "'%s' is the plural form of a message, " . "po4a will use the msgstr[1] translation (%s)."
+ ),
+ $esc_text,
+ $res
+ );
+ }
+ }
+ } else {
+ $res = $text;
+ }
+
+ if ( $opt{'wrap'} ) {
+ $res = wrap( $res, $opt{'wrapcol'} || 76, 0 );
+ }
+
+ # print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n";
+ return $res;
+}
+
+=item stats_get()
+
+Returns statistics about the hit ratio of gettext since the last time that
+stats_clear() was called. Please note that it's not the same
+statistics than the one printed by msgfmt --statistic. Here, it's statistics
+about recent usage of the PO file, while msgfmt reports the status of the
+file. Example of use:
+
+ [some use of the PO file to translate stuff]
+
+ ($percent,$hit,$queries) = $pofile->stats_get();
+ print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n";
+
+=cut
+
+sub stats_get() {
+ my $self = shift;
+ my ( $h, $q ) = ( $self->{gettexthits}, $self->{gettextqueries} );
+ my $p = ( $q == 0 ? 100 : int( $h / $q * 10000 ) / 100 );
+
+ # $p =~ s/\.00//;
+ # $p =~ s/(\..)0/$1/;
+
+ return ( $p, $h, $q );
+}
+
+=item stats_clear()
+
+Clears the statistics about gettext hits.
+
+=cut
+
+sub stats_clear {
+ my $self = shift;
+ $self->{gettextqueries} = 0;
+ $self->{gettexthits} = 0;
+}
+
+=back
+
+=head1 Functions to build a message catalog
+
+=over 4
+
+=item push(%)
+
+Push a new entry at the end of the current catalog. The arguments should
+form a hash table. The valid keys are:
+
+=over 4
+
+=item B<msgid>
+
+the string in original language.
+
+=item B<msgstr>
+
+the translation.
+
+=item B<reference>
+
+an indication of where this string was found. Example: file.c:46 (meaning
+in 'file.c' at line 46). It can be a space-separated list in case of
+multiple occurrences.
+
+=item B<comment>
+
+a comment added here manually (by the translators). The format here is free.
+
+=item B<automatic>
+
+a comment which was automatically added by the string extraction
+program. See the B<--add-comments> option of the B<xgettext> program for
+more information.
+
+=item B<flags>
+
+space-separated list of all defined flags for this entry.
+
+Valid flags are: B<c-text>, B<python-text>, B<lisp-text>, B<elisp-text>, B<librep-text>,
+B<smalltalk-text>, B<java-text>, B<awk-text>, B<object-pascal-text>, B<ycp-text>,
+B<tcl-text>, B<wrap>, B<no-wrap> and B<fuzzy>.
+
+See the gettext documentation for their meaning.
+
+=item B<type>
+
+this is mostly an internal argument: it is used while gettextizing
+documents. The idea here is to parse both the original and the translation
+into a PO object, and merge them, using one's msgid as msgid and the
+other's msgid as msgstr. To make sure that things get ok, each msgid in PO
+objects are given a type, based on their structure (like "chapt", "sect1",
+"p" and so on in DocBook). If the types of strings are not the same, that
+means that both files do not share the same structure, and the process
+reports an error.
+
+This information is written as automatic comment in the PO file since this
+gives to translators some context about the strings to translate.
+
+=item B<wrap>
+
+boolean indicating whether whitespaces can be mangled in cosmetic
+reformattings. If true, the string is canonized before use.
+
+This information is written to the PO file using the B<wrap> or B<no-wrap> flag.
+
+=item B<wrapcol>
+
+ignored; the key is kept for backward computability.
+
+=back
+
+=cut
+
+sub push {
+ my $self = shift;
+ my %entry = @_;
+
+ my $validoption = "wrap wrapcol type msgid msgstr automatic previous flags reference";
+ my %validoption;
+
+ map { $validoption{$_} = 1 } ( split( / /, $validoption ) );
+ foreach ( keys %entry ) {
+ Carp::confess "internal error: unknown arg $_.\n" . "Here are the valid options: $validoption.\n"
+ unless $validoption{$_};
+ }
+
+ unless ( $entry{'wrap'} ) {
+ $entry{'flags'} .= " no-wrap";
+ }
+ if ( defined( $entry{'msgid'} ) ) {
+ $entry{'msgid'} = canonize( $entry{'msgid'} )
+ if ( $entry{'wrap'} );
+
+ $entry{'msgid'} = escape_text( $entry{'msgid'} );
+ }
+ if ( defined( $entry{'msgstr'} ) ) {
+ $entry{'msgstr'} = canonize( $entry{'msgstr'} )
+ if ( $entry{'wrap'} );
+
+ $entry{'msgstr'} = escape_text( $entry{'msgstr'} );
+ }
+
+ $self->push_raw(%entry);
+}
+
+# The same as push(), but assuming that msgid and msgstr are already escaped
+sub push_raw {
+ my $self = shift;
+ my %entry = @_;
+ my ( $msgid, $msgstr, $reference, $comment, $automatic, $previous, $flags, $type, $transref ) = (
+ $entry{'msgid'}, $entry{'msgstr'}, $entry{'reference'}, $entry{'comment'}, $entry{'automatic'},
+ $entry{'previous'}, $entry{'flags'}, $entry{'type'}, $entry{'transref'}
+ );
+ my $keep_conflict = $entry{'conflict'};
+
+ # print STDERR "Push_raw\n";
+ # print STDERR " msgid=>>>$msgid<<<\n" if $msgid;
+ # print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr;
+ # Carp::cluck " flags=$flags\n" if $flags;
+
+ return unless defined( $entry{'msgid'} );
+
+ # no msgid => header definition
+ unless ( length( $entry{'msgid'} ) ) {
+
+ # if (defined($self->{header}) && $self->{header} =~ /\S/) {
+ # warn dgettext("po4a","Redefinition of the header. ".
+ # "The old one will be discarded\n");
+ # } FIXME: do that iff the header isn't the default one.
+ $self->{header} = $msgstr;
+ $self->{header_comment} = $comment;
+ return;
+ }
+
+ if ( $self->{options}{'porefs'} =~ m/^never/ ) {
+ $reference = "";
+ } elsif ( $self->{options}{'porefs'} =~ m/^counter/ ) {
+ if ( $reference =~ m/^(.+?)(?=\S+:\d+)/g ) {
+ my $new_ref = $1;
+ 1 while $reference =~ s{ # x modifier is added to add formatting and improve readability
+ \G(\s*)(\S+):\d+ # \G is the last match in m//g (see also the (?=) syntax above)
+ # $2 is the file name
+ }{
+ $self->{counter}{$2} ||= 0, # each file has its own counter
+ ++$self->{counter}{$2}, # increment it
+ $new_ref .= "$1$2:".$self->{counter}{$2} # replace line number by this counter
+ }gex && pos($reference);
+ $reference = $new_ref;
+ }
+ } elsif ( $self->{options}{'porefs'} =~ m/^file/ ) {
+ $reference =~ s/:\d+//g;
+ }
+
+ if ( defined( $self->{po}{$msgid} ) ) {
+ warn wrap_mod( "po4a::po", dgettext( "po4a", "msgid defined twice: %s" ), $msgid )
+ if (0); # FIXME: put a verbose stuff
+ if ( defined $msgstr
+ and defined $self->{po}{$msgid}{'msgstr'}
+ and $self->{po}{$msgid}{'msgstr'} ne $msgstr )
+ {
+ my $txt = quote_text( $msgid, $self->{options}{'wrap-po'} );
+ my ( $first, $second ) = (
+ format_comment( ". ", $self->{po}{$msgid}{'reference'} )
+ . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ),
+
+ format_comment( ". ", $reference ) . quote_text($msgstr), $self->{options}{'wrap-po'}
+ );
+
+ if ($keep_conflict) {
+ if ( $self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s ) {
+ $msgstr =
+ $self->{po}{$msgid}{'msgstr'} . "\\n#-#-#-#-# $transref (type: $type) #-#-#-#-#\\n" . $msgstr;
+ } else {
+ $msgstr =
+ "#-#-#-#-# "
+ . $self->{po}{$msgid}{'transref'}
+ . " (type "
+ . $self->{po}{$msgid}{'type'}
+ . ") #-#-#-#-#\\n"
+ . $self->{po}{$msgid}{'msgstr'} . "\\n"
+ . "#-#-#-#-# $transref (type: $type) #-#-#-#-#\\n"
+ . $msgstr;
+ }
+
+ # Every msgid will have the same list of references.
+ # Only keep the last list.
+ $self->{po}{$msgid}{'reference'} = "";
+ } else {
+ warn wrap_msg(
+ dgettext(
+ "po4a",
+ "Translations don't match for:\n" . "%s\n"
+ . "-->First translation:\n" . "%s\n"
+ . " Second translation:\n" . "%s\n"
+ . " Old translation discarded."
+ ),
+ $txt, $first, $second
+ );
+ }
+ }
+ }
+ if ( defined $transref ) {
+ $self->{po}{$msgid}{'transref'} = $transref;
+ }
+ if ( length($reference) ) {
+ if ( defined $self->{po}{$msgid}{'reference'} ) {
+
+ # Only add the new reference if it's not already included in the existing string
+ # It'd be much easier if $self->{po}{$msgid}{'reference'} were an array instead of a joined string...
+ my $oldref = $self->{po}{$msgid}{'reference'};
+ $self->{po}{$msgid}{'reference'} .= " " . $reference
+ unless ( ( $oldref =~ m/ $reference / )
+ || ( $oldref =~ m/ $reference$/ )
+ || ( $oldref =~ m/^$reference$/ )
+ || ( $oldref =~ m/^$reference / ) );
+ } else {
+ $self->{po}{$msgid}{'reference'} = $reference;
+ }
+ }
+ $self->{po}{$msgid}{'msgstr'} = $msgstr;
+ $self->{po}{$msgid}{'comment'} = $comment;
+ $self->{po}{$msgid}{'automatic'} = $automatic;
+ $self->{po}{$msgid}{'previous'} = $previous;
+
+ $self->{po}{$msgid}{pos_doc} = () unless ( defined( $self->{po}{$msgid}{pos_doc} ) );
+ CORE::push( @{ $self->{po}{$msgid}{pos_doc} }, $self->{count_doc}++ );
+ CORE::push( @{ $self->{gettextize_types} }, $type );
+
+ unless ( defined( $self->{po}{$msgid}{'pos'} ) ) {
+ $self->{po}{$msgid}{'pos'} = $self->{count}++;
+ }
+ $self->{po}{$msgid}{'type'} = $type;
+ $self->{po}{$msgid}{'plural'} = $entry{'plural'}
+ if defined $entry{'plural'};
+
+ if ( defined($flags) ) {
+ $flags = " $flags ";
+ $flags =~ s/,/ /g;
+ foreach my $flag (@known_flags) {
+ if ( index( $flags, " $flag " ) != -1 ) { # if flag to be set
+ unless ( defined( $self->{po}{$msgid}{'flags'} )
+ && $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/ )
+ {
+ # flag not already set
+ if ( defined $self->{po}{$msgid}{'flags'} ) {
+ $self->{po}{$msgid}{'flags'} .= " " . $flag;
+ } else {
+ $self->{po}{$msgid}{'flags'} = $flag;
+ }
+ }
+ }
+ }
+ }
+
+ # print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n";
+
+}
+
+=back
+
+=head1 Miscellaneous functions
+
+=over 4
+
+=item count_entries()
+
+Returns the number of entries in the catalog (without the header).
+
+=cut
+
+sub count_entries($) {
+ my $self = shift;
+ return $self->{count};
+}
+
+=item count_entries_doc()
+
+Returns the number of entries in document. If a string appears multiple times
+in the document, it will be counted multiple times.
+
+=cut
+
+sub count_entries_doc($) {
+ my $self = shift;
+ return $self->{count_doc};
+}
+
+=item msgid($)
+
+Returns the msgid of the given number.
+
+=cut
+
+sub msgid($$) {
+ my $self = shift;
+ my $num = shift;
+
+ foreach my $msgid ( keys %{ $self->{po} } ) {
+ return $msgid if ( $self->{po}{$msgid}{'pos'} eq $num );
+ }
+ return undef;
+}
+
+=item msgid_doc($)
+
+Returns the msgid with the given position in the document.
+
+=cut
+
+sub msgid_doc($$) {
+ my $self = shift;
+ my $num = shift;
+
+ foreach my $msgid ( keys %{ $self->{po} } ) {
+ foreach my $pos ( @{ $self->{po}{$msgid}{'pos_doc'} } ) {
+ return $msgid if ( $pos eq $num );
+ }
+ }
+ return undef;
+}
+
+=item type_doc($)
+
+Returns the type of the msgid with the given position in the document. This is
+probably only useful to gettextization, and it's stored separately from
+{$msgid}{'type'} because the later location may be overwritten by another type
+when the $msgid is duplicated in the master document.
+
+=cut
+
+sub type_doc($$) {
+ my $self = shift;
+ my $num = shift;
+
+ return ${ $self->{gettextize_types} }[$num];
+}
+
+=item get_charset()
+
+Returns the character set specified in the PO header. If it hasn't been
+set, it will return "UTF-8".
+
+=cut
+
+sub get_charset() {
+ my $self = shift;
+
+ $self->{header} =~ /charset=(.*?)[\s\\]/;
+
+ if ( defined $1 ) {
+ return $1;
+ } else {
+ return "UTF-8";
+ }
+}
+
+#----[ helper functions ]---------------------------------------------------
+
+# transforme the string from its PO file representation to the form which
+# should be used to print it
+sub unescape_text {
+ my $text = shift;
+
+ print STDERR "\nunescape [$text]====" if $debug{'escape'};
+ $text = join( "", split( /\n/, $text ) );
+ $text =~ s/\\"/"/g;
+
+ # unescape newlines
+ # NOTE on \G:
+ # The following regular expression introduce newlines.
+ # Thus, ^ doesn't match all beginnings of lines.
+ # \G is a zero-width assertion that matches the position
+ # of the previous substitution with s///g. As every
+ # substitution ends by a newline, it always matches a
+ # position just after a newline.
+ $text =~ s/( # $1:
+ (\G|[^\\]) # beginning of the line or any char
+ # different from '\'
+ (\\\\)* # followed by any even number of '\'
+ )\\n # and followed by an escaped newline
+ /$1\n/sgx; # single string, match globally, allow comments
+ # unescape carriage returns
+ $text =~ s/( # $1:
+ (\G|[^\\]) # beginning of the line or any char
+ # different from '\'
+ (\\\\)* # followed by any even number of '\'
+ )\\r # and followed by an escaped carriage return
+ /$1\r/sgx; # single string, match globally, allow comments
+ # unescape tabulations
+ $text =~ s/( # $1:
+ (\G|[^\\])# beginning of the line or any char
+ # different from '\'
+ (\\\\)* # followed by any even number of '\'
+ )\\t # and followed by an escaped tabulation
+ /$1\t/mgx; # multilines string, match globally, allow comments
+ # and unescape the escape character
+ $text =~ s/\\\\/\\/g;
+ print STDERR ">$text<\n" if $debug{'escape'};
+
+ return $text;
+}
+
+# transform the string to its representation as it should be written in PO
+# files
+sub escape_text {
+ my $text = shift;
+
+ print STDERR "\nescape [$text]====" if $debug{'escape'};
+ $text =~ s/\\/\\\\/g;
+ $text =~ s/"/\\"/g;
+ $text =~ s/\n/\\n/g;
+ $text =~ s/\r/\\r/g;
+ $text =~ s/\t/\\t/g;
+ print STDERR ">$text<\n" if $debug{'escape'};
+
+ return $text;
+}
+
+# put quotes around the string on each lines (without escaping it)
+# It does also normalize the text (ie, make sure its representation is wrapped
+# on the 80th char, but without changing the meaning of the string)
+sub quote_text {
+ my $string = shift;
+ my $do_wrap = shift // 'no'; # either 'no' or 'newlines', or column at which we should wrap
+
+ return '""' unless length($string);
+
+ return "\"$string\"" if ( $do_wrap eq 'no' );
+
+ print STDERR "\nquote $do_wrap [$string]====" if $debug{'quote'};
+
+ # break lines on newlines, if any
+ # see unescape_text for an explanation on \G
+ $string =~ s/( # $1:
+ (\G|[^\\]) # beginning of the line or any char
+ # different from '\'
+ (\\\\)* # followed by any even number of '\'
+ \\n) # and followed by an escaped newline
+ /$1\n/sgx; # single string, match globally, allow comments
+
+ $string = wrap( $string, $do_wrap ) if ( $do_wrap ne 'newlines' );
+ my @string = split( /\n/, $string );
+ $string = join( "\"\n\"", @string );
+ $string = "\"$string\"";
+ if ( scalar @string > 1 && $string[0] ne '' ) {
+ $string = "\"\"\n" . $string;
+ }
+
+ print STDERR ">$string<\n" if $debug{'quote'};
+ return $string;
+}
+
+# undo the work of the quote_text function
+sub unquote_text {
+ my $string = shift;
+ print STDERR "\nunquote [$string]====" if $debug{'quote'};
+ $string =~ s/^""\\n//s;
+ $string =~ s/^"(.*)"$/$1/s;
+ $string =~ s/"\n"//gm;
+
+ # Note: an even number of '\' could precede \\n, but I could not build a
+ # document to test this
+ $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm;
+ $string =~ s|!!DUMMYPOPM!!|\\n|gm;
+ print STDERR ">$string<\n" if $debug{'quote'};
+ return $string;
+}
+
+# canonize the string: write it on only one line, changing consecutive
+# whitespace to only one space.
+# Warning, it changes the string and should only be called if the string is
+# plain text
+sub canonize {
+ my $text = shift;
+ print STDERR "\ncanonize [$text]====" if $debug{'canonize'};
+ $text =~ s/^ *//s;
+ $text =~ s/^[ \t]+/ /gm;
+
+ # if ($text eq "\n"), it messed up the first string (header)
+ $text =~ s/\n/ /gm if ( $text ne "\n" );
+ $text =~ s/([.)]) +/$1 /gm;
+ $text =~ s/([^.)]) */$1 /gm;
+ $text =~ s/ *$//s;
+ print STDERR ">$text<\n" if $debug{'canonize'};
+ return $text;
+}
+
+
+# Wraps the string. We don't use Text::Wrap since it mangles whitespace at the
+# end of the split line.
+#
+# Mandatory arguments:
+# - A string to wrap. May content line breaks, in such case each line will be
+# wrapped separately.
+# Optional arguments:
+# - A column to wrap on. Default: 76.
+# - The extra length allowed for the first line. Default: -10 (which means it
+# will be wrapped 10 characters shorter).
+sub wrap {
+ my $text = shift;
+ return "0" if ( $text eq '0' );
+ my $col = shift || 76;
+ my $first_shift = shift || -10;
+ my @lines = split( /\n/, "$text" );
+ my $res = "";
+
+ while ( defined( my $line = shift @lines ) ) {
+ if ( $first_shift != 0 && length($line) > $col + $first_shift ) {
+ unshift @lines, $line;
+ $first_shift = 0;
+ next;
+ }
+ if ( length($line) > $col ) {
+ my $pos = rindex( $line, " ", $col );
+ while ( substr( $line, $pos - 1, 1 ) eq '.' && $pos != -1 ) {
+ $pos = rindex( $line, " ", $pos - 1 );
+ }
+ if ( $pos == -1 ) {
+
+ # There are no spaces in the first $col chars, pick-up the
+ # first space
+ $pos = index( $line, " " );
+ }
+ if ( $pos != -1 ) {
+ my $end = substr( $line, $pos + 1 );
+ $line = substr( $line, 0, $pos + 1 );
+ if ( $end =~ s/^( +)// ) {
+ $line .= $1;
+ }
+ unshift @lines, $end;
+ }
+ }
+ $first_shift = 0;
+ $res .= "$line\n";
+ }
+
+ # Restore the original trailing spaces
+ $res =~ s/\s+$//s;
+ if ( $text =~ m/(\s+)$/s ) {
+ $res .= $1;
+ }
+ return $res;
+}
+
+# outputs properly a '# ... ' line to be put in the PO file
+sub format_comment {
+ my $comment = shift;
+ my $char = shift;
+ my $result = "#" . $char . $comment;
+ $result =~ s/\n/\n#$char/gs;
+ $result =~ s/^#$char$/#/gm;
+ $result .= "\n";
+ return $result;
+}
+
+1;
+__END__
+
+=back
+
+=head1 AUTHORS
+
+ Denis Barbier <barbier@linuxfr.org>
+ Martin Quinson (mquinson#debian.org)
+
+=cut
diff --git a/lib/Locale/Po4a/Pod.pm b/lib/Locale/Po4a/Pod.pm
new file mode 100644
index 0000000..4b3dc04
--- /dev/null
+++ b/lib/Locale/Po4a/Pod.pm
@@ -0,0 +1,287 @@
+# Locale::Po4a::Pod -- Convert POD data to PO file, for translation.
+#
+# This program is free software; you may redistribute it and/or modify it
+# under the terms of GPL v2.0 or later (see COPYING file).
+#
+# This module converts POD to PO file, so that it becomes possible to
+# translate POD formatted documentation. See gettext documentation for
+# more info about PO files.
+
+############################################################################
+# Modules and declarations
+############################################################################
+
+use Pod::Parser;
+use Locale::Po4a::TransTractor qw(process new get_in_charset get_out_charset);
+
+package Locale::Po4a::Pod;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+require Exporter;
+
+use vars qw(@ISA);
+@ISA = qw(Locale::Po4a::TransTractor Pod::Parser);
+
+use Carp qw(croak confess);
+
+sub initialize { }
+
+sub translate {
+ my ( $self, $str, $ref, $type ) = @_;
+ my (%options) = @_;
+
+ $str = $self->pre_trans( $str, $ref, $type );
+ $str = $self->SUPER::translate( $str, $ref, $type, %options );
+ $str = $self->post_trans( $str, $ref, $type );
+
+ return $str;
+}
+
+sub pre_trans {
+ my ( $self, $str, $ref, $type ) = @_;
+
+ return $str;
+}
+
+sub post_trans {
+ my ( $self, $str, $ref, $type ) = @_;
+
+ # Change ascii non-breaking space to POD one
+ my $nbs_out = "\xA0";
+ my $enc_length = Encode::from_to( $nbs_out, "latin1", $self->get_out_charset );
+ if ( defined $enc_length ) {
+ while ( $str =~ m/(^|.*\s)(\S+?)\Q$nbs_out\E(\S+?)(\s.*$|$)/s ) {
+ my ( $begin, $m1, $m2, $end ) = ( $1, $2, $3, $4 );
+ $str = ( defined $begin ) ? $begin : "";
+
+ # Remove the non-breaking spaces in the string that will be
+ # between S<...>
+ $m2 =~ s/\Q$nbs_out\E/ /g;
+ $str .= "S<$m1 $m2>";
+ $str .= ( defined $end ) ? $end : "";
+ }
+ }
+
+ return $str;
+}
+
+sub command {
+ my ( $self, $command, $paragraph, $line_num ) = @_;
+
+ # print STDOUT "cmd: '$command' '$paragraph' at $line_num\n";
+ if ( $command eq 'back'
+ || $command eq 'cut'
+ || $command eq 'pod' )
+ {
+ $self->pushline("=$command\n\n");
+ } elsif ( $command eq 'over' ) {
+ $self->pushline( "=$command $paragraph" . ( length($paragraph) ? "" : "\n\n" ) );
+ } elsif ( $command eq 'encoding' ) {
+ my $charset = $paragraph;
+ $charset =~ s/^\s*(.*?)\s*$/$1/s;
+
+ my $master_charset = $self->get_in_charset;
+ croak wrap_mod(
+ "po4a::pod",
+ dgettext(
+ "po4a",
+ "The file %s declares %s as encoding, but you provided %s as master charset. Please change either setting."
+ ),
+ $self->{DOCPOD}{refname},
+ $charset,
+ $master_charset,
+ ) if ( length( $master_charset // '' ) > 0 && uc($charset) ne uc($master_charset) );
+
+ # The =encoding line will be added by docheader
+ } else {
+ $paragraph = $self->translate( $paragraph, $self->{DOCPOD}{refname} . ":$line_num", "=$command", "wrap" => 1 );
+ $self->pushline("=$command $paragraph\n\n");
+ }
+}
+
+sub verbatim {
+ my ( $self, $paragraph, $line_num ) = @_;
+
+ # print "verb: '$paragraph' at $line_num\n";
+
+ if ( $paragraph eq "\n" ) {
+ $self->pushline("$paragraph\n");
+ return;
+ }
+ $paragraph = $self->translate( $paragraph, $self->{DOCPOD}{refname} . ":$line_num", "verbatim" );
+ $paragraph =~ s/\n$//m;
+ $self->pushline("$paragraph\n");
+}
+
+sub textblock {
+ my ( $self, $paragraph, $line_num ) = @_;
+
+ # print "text: '$paragraph' at $line_num\n";
+
+ if ( $paragraph eq "\n" ) {
+ $self->pushline("$paragraph\n");
+ return;
+ }
+
+ # Fix a pretty damned bug.
+ # Podlators don't wrap explicitelly the text, and groff won't seem to
+ # wrap any line begining with a space. So, we have to consider as
+ # verbatim not only the paragraphs whose first line is indented, but
+ # the paragraph containing an indented line.
+ # That way, we'll declare more paragraphs as verbatim than needed, but
+ # that's harmless (only less confortable for translators).
+ if ( $paragraph =~ m/^[ \t]/m ) {
+ $self->verbatim( $paragraph, $line_num );
+ return;
+ }
+
+ $paragraph = $self->translate( $paragraph, $self->{DOCPOD}{refname} . ":$line_num", 'textblock', "wrap" => 1 );
+ $paragraph =~ s/ *\n/ /gm; # Unwrap the content, to ensure that C<> markup is not split on several lines
+ $self->pushline("$paragraph\n\n");
+}
+
+sub end_pod { }
+
+sub read {
+ my ( $self, $filename, $refname, $charset ) = @_;
+ $charset ||= "UTF-8";
+ my $fh;
+ open $fh, "<:encoding($charset)", $filename;
+ push @{ $self->{DOCPOD}{infile} }, ( $fh, $refname );
+ $self->Locale::Po4a::TransTractor::read( $filename, $refname, $charset );
+}
+
+sub parse {
+ my $self = shift;
+
+ my @list = @{ $self->{DOCPOD}{infile} };
+ while ( scalar @list ) {
+ my ( $fh, $refname ) = ( shift @list, shift @list );
+ $self->{DOCPOD}{refname} = $refname;
+ $self->parse_from_filehandle($fh);
+ close $fh;
+ }
+}
+
+sub docheader {
+ my $self = shift;
+ my $encoding = $self->get_out_charset();
+ if ( ( defined $encoding )
+ and ( length $encoding )
+ and ( $encoding ne "ascii" ) )
+ {
+ $encoding = "\n=encoding $encoding\n";
+ } else {
+ $encoding = "";
+ }
+
+ return <<EOT;
+
+ *****************************************************
+ * GENERATED FILE, DO NOT EDIT *
+ * THIS IS NO SOURCE FILE, BUT RESULT OF COMPILATION *
+ *****************************************************
+
+This file was generated by po4a(7). Do not store it (in VCS, for example),
+but store the PO file used as source file by po4a-translate.
+
+In fact, consider this as a binary, and the PO file as a regular .c file:
+If the PO get lost, keeping this translation up-to-date will be harder.
+$encoding
+EOT
+}
+1;
+
+##############################################################################
+# Module return value and documentation
+##############################################################################
+
+1;
+__END__
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Pod - convert POD data from/to PO files
+
+=head1 SYNOPSIS
+
+ use Locale::Po4a::Pod;
+ my $parser = Locale::Po4a::Pod->new();
+
+ # Read POD from STDIN and write to STDOUT.
+ $parser->parse_from_filehandle;
+
+ # Read POD from file.pod and write to file.txt.
+ $parser->parse_from_file ('file.pod', 'file.txt');
+
+=head1 DESCRIPTION
+
+Locale::Po4a::Pod is a module to help the translation of documentation in
+the POD format (the preferred language for documenting Perl) into other
+[human] languages.
+
+=head1 STATUS OF THIS MODULE
+
+I think that this module is rock stable, and there is only one known bug
+with F</usr/lib/perl5/Tk/MainWindow.pod> (and some other
+pages, see below) which contains:
+
+ C<" #n">
+
+Lack of luck, in the po4a version, this was split on the space by the
+wrapping. As result, in the original version, the man page contains:
+
+ " #n"
+
+and mine contains:
+
+ "" #n""
+
+which is logic since CE<lt>foobarE<gt> is rewritten "foobar".
+
+Complete list of pages having this problem on my box (from 564 pages; note
+that it depends on the chosen wrapping column):
+
+ /usr/lib/perl5/Tk/MainWindow.pod
+ /usr/share/perl/5.8.0/overload.pod
+ /usr/share/perl/5.8.0/pod/perlapi.pod
+ /usr/share/perl/5.8.0/pod/perldelta.pod
+ /usr/share/perl/5.8.0/pod/perlfaq5.pod
+ /usr/share/perl/5.8.0/pod/perlpod.pod
+ /usr/share/perl/5.8.0/pod/perlre.pod
+ /usr/share/perl/5.8.0/pod/perlretut.pod
+
+
+
+=head1 INTERNALS
+
+As a derived class from Pod::Parser, Locale::Po4a::Pod supports the same
+methods and interfaces. See L<Pod::Parser> for all the details; briefly,
+one creates a new parser with C<< Locale::Po4a::Pod->new() >> and then
+calls either parse_from_filehandle() or parse_from_file().
+
+=head1 SEE ALSO
+
+L<Pod::Parser>,
+L<Locale::Po4a::Man(3pm)>,
+L<Locale::Po4a::TransTractor(3pm)>,
+L<po4a(7)|po4a.7>
+
+=head1 AUTHORS
+
+ Denis Barbier <barbier@linuxfr.org>
+ Martin Quinson (mquinson#debian.org)
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright © 2002 SPI, Inc.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
diff --git a/lib/Locale/Po4a/RubyDoc.pm b/lib/Locale/Po4a/RubyDoc.pm
new file mode 100644
index 0000000..6780ca1
--- /dev/null
+++ b/lib/Locale/Po4a/RubyDoc.pm
@@ -0,0 +1,527 @@
+# Locale::Po4a::RubyDoc -- Convert Ruby Document data to PO file, for translation
+#
+# Copyright © 2016-2017 Francesco Poli <invernomuto@paranoici.org>
+#
+# This program is free software; you may redistribute it and/or modify it
+# under the terms of GPL v2.0 or later (see COPYING).
+#
+# This work is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This work is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this work; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#
+# Parts of the code (such as many regular expressions) were adapted
+# from the source of rdtool, under the terms of the GNU General Public
+# License, version 2 or later.
+# These parts are originally:
+# Copyright © 2004 MoonWolf <moonwolf@moonwolf.com>
+# Copyright © 2011-2012 Youhei SASAKI <uwabami@gfd-dennou.org>
+#
+# The initialize code was adapted from the source of Locale::Po4a::Text,
+# under the terms of the GNU General Public License, version 2 or later.
+# This code was originally:
+# Copyright © 2005-2008 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>
+#
+############################################################################
+#
+# This module converts Ruby Document (RD) format to PO files, so that Ruby
+# Document formatted texts may be translated. See gettext documentation
+# for more details about PO files.
+#
+############################################################################
+
+package Locale::Po4a::RubyDoc;
+
+use Locale::Po4a::TransTractor qw(process new);
+use Locale::Po4a::Common;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+require Exporter;
+
+use vars qw(@ISA);
+@ISA = qw(Locale::Po4a::TransTractor);
+
+######################
+# Global variables #
+######################
+
+my $insiderubydoc = 0;
+
+#############
+# Methods #
+#############
+
+sub initialize {
+ my $self = shift;
+ my %options = @_;
+
+ $self->{options}{'debug'} = 1;
+ $self->{options}{'verbose'} = 1;
+ $self->{options}{'puredoc'} = 0;
+
+ foreach my $opt ( keys %options ) {
+ die wrap_mod( "po4a::rubydoc", dgettext( "po4a", "Unknown option: %s" ), $opt )
+ unless exists $self->{options}{$opt};
+ $self->{options}{$opt} = $options{$opt};
+ }
+
+ if ( defined $options{'puredoc'} ) {
+
+ # initially assume to be already inside the Ruby Document
+ $insiderubydoc = 1;
+ } else {
+
+ # initially assume to be outside the Ruby Document
+ $insiderubydoc = 0;
+ }
+}
+
+sub docheader {
+ return <<EOT;
+#
+# *****************************************************
+# * GENERATED FILE, DO NOT EDIT *
+# * THIS IS NO SOURCE FILE, BUT RESULT OF COMPILATION *
+# *****************************************************
+#
+# This file was generated by po4a-translate(1). Do not store it (in VCS,
+# for example), but store the PO file used as source file by po4a-translate.
+#
+# In fact, consider this as a binary, and the PO file as a regular source file:
+# If the PO gets lost, keeping this translation up-to-date will be harder.
+#
+EOT
+}
+
+sub parse {
+ my $self = shift;
+
+ # start with baseline and firstindent corresponding to no indentation
+ my $baseline = 0;
+ my $firstindent = 0;
+
+ # start in non-verbatim mode
+ my $verbmode = 0;
+
+ # we have not yet seen any Term, hence we are not yet waiting for a
+ # Description
+ my $waitfordesc = 0;
+ my $methodterm = "";
+
+ # flag to remember that we have reached the end of the document
+ my $eof = 0;
+
+ PARAGRAPH: while () {
+
+ # start accumulating a new paragraph and corresponding variables
+ my ( $para, $pref, $ptype, $pwrap, $symbol, $tail ) = ( "", "", "", 1, "", "" );
+
+ LINE: while () {
+
+ # fetch next line and its reference
+ my ( $line, $lref ) = $self->shiftline();
+
+ unless ( defined($line) ) {
+
+ # we reached the end of the document
+ $eof = 1;
+ last LINE;
+ }
+
+ if ( $line =~ /^=begin\s*(\bRD\b.*)?\s*$/ ) {
+
+ # we are entering a Ruby Document part
+ $insiderubydoc = 1;
+ $baseline = 0;
+ $verbmode = 0;
+ $waitfordesc = 0;
+ $self->pushline($line);
+ next PARAGRAPH;
+ }
+
+ if ( $line =~ /^=end/ ) {
+
+ # we are exiting a Ruby Document part
+ $insiderubydoc = 0;
+ $baseline = 0;
+ $verbmode = 0;
+ $waitfordesc = 0;
+ $tail = $line;
+ last LINE;
+ }
+
+ # do nothing while outside the Ruby Document
+ next PARAGRAPH unless ($insiderubydoc);
+
+ # we encountered a Comment: ignore it entirely
+ next LINE if ( $line =~ /^#/ );
+
+ if ( $line =~ /^(={1,4})(?!=)\s*(?=\S)(.*)/
+ or $line =~ /^(\+{1,2})(?!\+)\s*(?=\S)(.*)/ )
+ {
+ # we encountered a Headline: this is a paragraph on its own
+ if ( length($para) ) {
+
+ # we already have some paragraph to be processed:
+ # reput the current line in input and end paragraph
+ $self->unshiftline( $line, $lref );
+ last LINE;
+ } else {
+
+ # we are at the beginning of a paragraph, but a Headline
+ # is a single-line paragraph: define the variables
+ # and end paragraph
+ $symbol = "$1 ";
+ $para = $2;
+ $pref = $lref;
+ $ptype = "Headline $1";
+ $baseline = 0;
+ $verbmode = 0;
+ $waitfordesc = 0;
+ last LINE;
+ }
+ }
+
+ if ( $line =~ /^<<<\s*(\S+)/ ) {
+
+ # we encountered an Include line: end paragraph
+ $tail = $line;
+ last LINE;
+ }
+
+ # compute indentation
+ $line =~ /^(\s*)/;
+ my $indent = length($1);
+
+ if ($verbmode) {
+
+ # use verbatim mode rules
+ # -----------------------
+
+ if ( $indent >= $firstindent ) {
+
+ # indentation matches first line or is deeper:
+ # the Verbatim goes on
+ $para .= $line;
+ next LINE;
+ } else {
+
+ # indentation is shallower than first line:
+ # reput the current line in input, exit verbatim mode
+ # and end paragraph
+ $self->unshiftline( $line, $lref );
+ $verbmode = 0;
+ $waitfordesc = 0;
+ last LINE;
+ }
+ } else {
+
+ # use non-verbatim mode rules
+ # ---------------------------
+
+ if ( $line =~ /^\s*$/ ) {
+
+ # we encountered a WHITELINE: end paragraph
+ $tail = $line;
+ last LINE;
+ }
+
+ if ( $line =~ /^(\s*)\*(\s*)(.*)/ ) {
+
+ # we encountered the first line of a ItemListItem
+ if ( length($para) ) {
+
+ # we already have some paragraph to be processed:
+ # reput the current line in input and end paragraph
+ $self->unshiftline( $line, $lref );
+ last LINE;
+ } else {
+
+ # we are at the beginning of a paragraph:
+ # define the variables
+ $symbol = "$1*$2";
+ $para .= $3;
+ $pref = $lref;
+ $ptype = "ItemListItem *";
+ $baseline = length($symbol);
+ $waitfordesc = 0;
+ next LINE;
+ }
+ }
+
+ if ( $line =~ /^(\s*)(\(\d+\))(\s*)(.*)/ ) {
+
+ # we encountered the first line of an EnumListItem
+ if ( length($para) ) {
+
+ # we already have some paragraph to be processed:
+ # reput the current line in input and end paragraph
+ $self->unshiftline( $line, $lref );
+ last LINE;
+ } else {
+
+ # we are at the beginning of a paragraph:
+ # define the variables
+ $symbol = "$1$2$3";
+ $para .= $4;
+ $pref = $lref;
+ $ptype = "EnumListItem $2";
+ $baseline = length($symbol);
+ $waitfordesc = 0;
+ next LINE;
+ }
+ }
+
+ if ( $line =~ /^(\s*):(\s*)(.*)/ ) {
+
+ # we encountered the Term line of a DescListItem
+ if ( length($para) ) {
+
+ # we already have some paragraph to be processed:
+ # reput the current line in input and end paragraph
+ $self->unshiftline( $line, $lref );
+ last LINE;
+ } else {
+
+ # we are at the beginning of a paragraph, but the Term
+ # part of a DescListItem is a single-line paragraph:
+ # define the variables and end paragraph
+ $symbol = "$1:$2";
+ $para = $3;
+ $pref = $lref;
+ $ptype = "DescListItem Term :";
+ $baseline = length($symbol);
+ $waitfordesc = 1;
+ last LINE;
+ }
+ }
+
+ if ( $line =~ /^(\s*)---(?!-|\s*$)(\s*)(.*)/ ) {
+
+ # we encountered the Term line of a MethodListItem
+ if ( length($para) ) {
+
+ # we already have some paragraph to be processed:
+ # reput the current line in input and end paragraph
+ $self->unshiftline( $line, $lref );
+ last LINE;
+ } else {
+
+ # we are at the beginning of a paragraph, but the Term
+ # part of a MethodListItem is a single-line paragraph;
+ # moreover, it's not translatable: end paragraph
+ $baseline = length("$1---$2");
+ $waitfordesc = 2;
+ $tail = $line;
+ $methodterm = "--- $3";
+ last LINE;
+ }
+ }
+
+ # we apparently encountered a STRINGLINE
+ if ( length($para) ) {
+
+ # we already have some paragraph to be processed:
+ if ( $indent == $baseline ) {
+
+ # indentation matches baseline:
+ # append the STRINGLINE to the paragraph
+ $para .= $line;
+ } else {
+
+ # indentation differs from baseline:
+ # reput the current line in input and end paragraph
+ $self->unshiftline( $line, $lref );
+ last LINE;
+ }
+ } else {
+
+ # we are at the beginning of a paragraph:
+ # define the variables
+ if ($waitfordesc) {
+
+ # we were waiting for a DescListItem Description:
+ # we have just found it
+ if ( $waitfordesc == 1 ) {
+ $ptype = "DescListItem Description";
+ } else {
+ $ptype = "MethodListItem Description $methodterm";
+ }
+ $baseline = $indent;
+ $waitfordesc = 0;
+
+ # reproduce the original indentation
+ $symbol = " " x $indent;
+ } else {
+ if ( $indent > $baseline ) {
+
+ # indentation is deeper than baseline:
+ # we are entering a Verbatim
+ $verbmode = 1;
+ $ptype = "Verbatim";
+ $pwrap = 0;
+ $firstindent = $indent;
+ } else {
+
+ # indentation is not deeper than baseline:
+ # this is a TextBlock
+ $ptype = "TextBlock";
+ $baseline = $indent;
+
+ # reproduce the original indentation
+ $symbol = " " x $indent;
+ }
+ }
+ $para .= $line;
+ $pref = $lref;
+ }
+ }
+
+ }
+
+ if ( length($para) ) {
+
+ # set wrap column at 76 - identation, but never less than 26
+ my $ni = length($symbol);
+ my $wc = 76 - $ni;
+ $wc = 26 if ( $wc < 26 );
+
+ # get the translated paragraph
+ my $translated = $self->translate(
+ $para,
+ $pref,
+ $ptype,
+ 'wrap' => $pwrap,
+ 'wrapcol' => $wc
+ );
+
+ if ($pwrap) {
+
+ # reformat the translated paragraph
+ my $is = " " x $ni;
+ chomp $translated;
+ $translated =~ s/\n/\n$is/g;
+ $translated .= "\n";
+ }
+
+ # push the paragraph to the translated document
+ $self->pushline( $symbol . $translated );
+ }
+
+ if ( length($tail) ) {
+
+ # push the non translatable tail to the translated document
+ $self->pushline($tail);
+ }
+
+ # stop processing, if we have already reached the end of the document
+ return if ($eof);
+ }
+}
+
+##########################
+# Module documentation #
+##########################
+
+1;
+__END__
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::RubyDoc -- Convert Ruby Document data from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::RubyDoc is a module to help the translation of documentation in
+the Ruby Document (RD) format (a language used to document Ruby) into other
+[human] languages.
+
+=head1 STATUS OF THIS MODULE
+
+This module has been successfully tested on simple Ruby Document files
+covering a good part of the format syntax.
+
+A known limitation is that it fails to properly recognize the stacked
+structure of input Ruby Document: this implies that when, for instance,
+an EnumListItem consists of more than one Block, only the first Block
+is actually recognized as EnumListItem, while the subsequent ones are
+considered just as TextBlocks...
+
+=head1 OPTIONS ACCEPTED BY THIS MODULE
+
+This module supports the following option:
+
+=over
+
+=item B<puredoc>
+
+Handle files entirely made of Ruby Document formatted text (without
+any "=begin" line).
+
+By default, this module only handles Ruby Document formatted text
+between "=begin" and "=end" lines (hence ignoring, among other things,
+everything that precedes the first "=begin" line).
+
+=back
+
+=head1 SEE ALSO
+
+L<Locale::Po4a::TransTractor(3pm)>
+
+=head1 AUTHORS
+
+Francesco Poli <invernomuto@paranoici.org>
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright © 2016-2017 Francesco Poli <invernomuto@paranoici.org>
+
+This work is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This work is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this work; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+Parts of the code (such as many regular expressions) were adapted
+from the source of rdtool, under the terms of the GNU General Public
+License, version 2 or later.
+These parts are originally:
+
+ Copyright © 2004 MoonWolf <moonwolf@moonwolf.com>
+ Copyright © 2011-2012 Youhei SASAKI <uwabami@gfd-dennou.org>
+
+The initialize code was adapted from the source of Locale::Po4a::Text,
+under the terms of the GNU General Public License, version 2 or later.
+This code was originally:
+
+ Copyright © 2005-2008 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>
+
+=cut
diff --git a/lib/Locale/Po4a/Sgml.pm b/lib/Locale/Po4a/Sgml.pm
new file mode 100644
index 0000000..45eb360
--- /dev/null
+++ b/lib/Locale/Po4a/Sgml.pm
@@ -0,0 +1,1372 @@
+#!/usr/bin/perl -w
+
+# Po4a::Sgml.pm
+#
+# extract and translate translatable strings from an sgml based document.
+#
+# This code is an adapted version of sgmlspl (SGML postprocessor for the
+# SGMLS and NSGMLS parsers) which was:
+#
+# Copyright © 1995 David Megginson <dmeggins@aix1.uottawa.ca>
+#
+# The adaptation for po4a was done by Denis Barbier <barbier@linuxfr.org>,
+# Martin Quinson (mquinson#debian.org) and others.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+########################################################################
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Sgml - convert SGML documents from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::Sgml is a module to help the translation of documentation in
+the SGML format into other [human] languages.
+
+This module uses B<onsgmls>(1) to parse the SGML files. Make sure it is
+installed.
+Also make sure that the DTD of the SGML files are installed in the system.
+
+=head1 OPTIONS ACCEPTED BY THIS MODULE
+
+=over 4
+
+=item B<debug>
+
+Space-separated list of keywords indicating which category of extra debug
+messages should be shown. Possible values are: C<entities>, C<generic>,
+C<onsgml>, C<refs> and C<tag>.
+
+=item B<verbose>
+
+Give more information about what's going on.
+
+=item B<translate>
+
+Space-separated list of extra tags (beside the DTD provided ones) whose
+content should form an extra msgid, i.e that should be translated.
+
+=item B<section>
+
+Space-separated list of extra tags (beside the DTD provided ones)
+containing other tags, some of them being of category B<translate>.
+
+=item B<indent>
+
+Space-separated list of tags which increase the indentation level. This will
+affect the identation in the resulting document.
+
+=item B<verbatim>
+
+The layout within those tags should not be changed. The paragraph won't get
+wrapped, and no extra indentation space or new line will be added for
+cosmetic purpose.
+
+=item B<empty>
+
+Tags not needing to be closed.
+
+=item B<ignore>
+
+Tags ignored and considered as plain char data by po4a. That is to say that
+they can be part of an msgid. For example, E<lt>bE<gt> is a good candidate
+for this category since putting it in the B<translate> section would create a
+msgids with only its content, (and it's ususally not a whole sentence), which
+is bad.
+
+=item B<attributes>
+
+A space-separated list of attributes that need to be translated. You can
+specify the attributes by their name (for example, C<lang>), but you can also
+prefix it with a tag hierarchy, to specify that this attribute will only be
+translated when it is into the specified tag. For example:
+C<< <bbb><aaa>lang >> specifies that the lang attribute will only be
+translated if it is in an C<< <aaa> >> tag, which is in a C<< <bbb> >> tag.
+The tag names are actually regular expressions so you can also write things
+like C<< <aaa|bbb>lang >> to only translate C<lang> attributes that are in
+an C<< <aaa> >> or a C<< <bbb> >> tag.
+
+=item B<qualify>
+
+A space-separated list of attributes for which the translation must be
+qualified by the attribute name, i.e. the text extracted for the transalation
+will include both the attributes name and it's value. e.g. for a tag like
+C<< <aaa lang_en="foo"> >> translators will be presented with the string
+C<lang_en="foo">. Note that this also automatically adds the given attribute
+into the B<attributes> list too.
+
+
+=item B<force>
+
+Proceed even if the DTD is unknown or if B<onsgmls> finds errors in the input
+file.
+
+=item B<include-all>
+
+By default, msgids containing only one entity (like C<&version;>) are skipped
+for the translators' comfort. Activating this option prevents this
+optimisation. It can be useful if the document contains a construction like
+C<< <title>&Aacute;</title> >>, even if I doubt such things to ever happen...
+
+=item B<ignore-inclusion>
+
+Space-separated list of entities that won't be inlined.
+Use this option with caution: it may cause B<onsgmls> (used internally) to add
+tags and render the output document invalid.
+
+=back
+
+=head1 STATUS OF THIS MODULE
+
+The result is perfect. I.e., the generated documents are exactly the
+same as the originals. But there are still some problems:
+
+=over 2
+
+=item *
+
+The error output of B<onsgmls> is redirected to /dev/null by default, which is clearly
+bad. I don't know how to avoid that.
+
+The problem is that I have to "protect" the conditional inclusions (i.e. the
+C<E<lt>! [ %foo [> and C<]]E<gt>> stuff) from B<onsgmls>. Otherwise
+B<onsgmls> eats them, and I don't know how to restore them in the final
+document. To prevent that, I rewrite them to C<{PO4A-beg-foo}> and
+C<{PO4A-end}>.
+
+The problem with this is that the C<{PO4A-end}> and such I add are invalid in
+the document (not in a E<lt>pE<gt> tag or so).
+
+If you want to view the B<onsgmls> output, just add the following to your command line (or po4a configuration line):
+
+ -o debug=onsgmls
+
+=item *
+
+It does work only with the DebianDoc and DocBook DTD. Adding support for a
+new DTD should be very easy. The mechanism is the same for every DTD, you just
+have to give a list of the existing tags and some of their characteristics.
+
+I agree, this needs some more documentation, but it is still considered as
+beta, and I hate to document stuff which may/will change.
+
+=item *
+
+Warning, support for DTDs is quite experimental. I did not read any
+reference manual to find the definition of every tag. I did add tag
+definition to the module 'till it works for some documents I found on the
+net. If your document use more tags than mine, it won't work. But as I said
+above, fixing that should be quite easy.
+
+I did test DocBook against the SAG (System Administrator Guide) only, but
+this document is quite big, and should use most of the DocBook
+specificities.
+
+For DebianDoc, I tested some of the manuals from the DDP, but not all yet.
+
+=item *
+
+In case of file inclusion, string reference of messages in PO files
+(i.e. lines like C<#: en/titletoc.sgml:9460>) will be wrong.
+
+This is because I preprocess the file to protect the conditional inclusion
+(i.e. the C<E<lt>! [ %foo [> and C<]]E<gt>> stuff) and some entities (like
+C<&version;>) from B<onsgmls> because I want them verbatim to the generated
+document. For that, I make a temp copy of the input file and do all the
+changes I want to this before passing it to B<onsgmls> for parsing.
+
+So that it works, I replace the entities asking for a file inclusion by the
+content of the given file (so that I can protect what needs to be in a subfile
+also). But nothing is done so far to correct the references (i.e., filename
+and line number) afterward. I'm not sure what the best thing to do is.
+
+=back
+
+=cut
+
+package Locale::Po4a::Sgml;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+require Exporter;
+use vars qw(@ISA @EXPORT);
+@ISA = qw(Locale::Po4a::TransTractor);
+@EXPORT = qw();
+
+use Locale::Po4a::TransTractor;
+use Locale::Po4a::Common;
+
+eval qq{use SGMLS};
+if ($@) {
+ die wrap_mod(
+ "po4a::sgml",
+ dgettext(
+ "po4a",
+ "The needed module SGMLS.pm was not found and needs to be installed. It can be found on the CPAN, in package libsgmls-perl on debian, etc."
+ )
+ );
+}
+
+use File::Temp;
+
+my %debug = (
+ 'tag' => 0,
+ 'generic' => 0,
+ 'entities' => 0,
+ 'refs' => 0,
+ 'onsgmls' => 0
+);
+
+my $xmlprolog = undef; # the '<?xml ... ?>' line if existing
+
+sub initialize {
+ my $self = shift;
+ my %options = @_;
+
+ $self->{options}{'translate'} = '';
+ $self->{options}{'section'} = '';
+ $self->{options}{'indent'} = '';
+ $self->{options}{'empty'} = '';
+ $self->{options}{'verbatim'} = '';
+ $self->{options}{'ignore'} = '';
+ $self->{options}{'ignore-inclusion'} = '';
+
+ $self->{options}{'include-all'} = '';
+
+ $self->{options}{'force'} = '';
+
+ $self->{options}{'verbose'} = '';
+ $self->{options}{'debug'} = '';
+
+ foreach my $opt ( keys %options ) {
+ if ( $options{$opt} ) {
+ die wrap_mod( "po4a::sgml", dgettext( "po4a", "Unknown option: %s" ), $opt )
+ unless exists $self->{options}{$opt};
+ $self->{options}{$opt} = $options{$opt};
+ }
+ }
+ if ( $options{'debug'} ) {
+ foreach ( split /\s+/, $options{'debug'} ) {
+ die wrap_mod( "po4a::sgml", dgettext( "po4a", "Unknown debug category: %s. Known categories:\n%s" ),
+ $_, join( " ", keys %debug ) )
+ unless exists $debug{$_};
+ $debug{$_} = 1;
+ }
+ }
+}
+
+sub read {
+ my ( $self, $filename, $refname, $charset ) = @_;
+
+ push @{ $self->{DOCPOD}{infile} }, $filename;
+ $self->Locale::Po4a::TransTractor::read( $filename, $refname, $charset );
+}
+
+sub parse {
+ my $self = shift;
+ map { $self->parse_file($_) } @{ $self->{DOCPOD}{infile} };
+}
+
+#
+# Filter out some uninteresting strings for translation
+#
+sub translate {
+ my ($self) = (shift);
+ my ( $string, $ref, $type ) = ( shift, shift, shift );
+ my (%options) = @_;
+
+ # don't translate entries composed of one entity
+ if ( ( ( $string =~ /^&[^;]*;$/ ) || ( $options{'wrap'} && $string =~ /^\s*&[^;]*;\s*$/ ) )
+ && !( $self->{options}{'include-all'} ) )
+ {
+ warn wrap_mod( "po4a::sgml", dgettext( "po4a", "msgid skipped to help translators (contains only an entity)" ) )
+ unless $self->verbose() <= 0;
+ return $string . ( $options{'wrap'} ? "\n" : "" );
+ }
+
+ # don't translate entries composed of tags only
+ if ( $string =~ /^(((<[^>]*>)|\s)*)$/
+ && !( $self->{options}{'include-all'} ) )
+ {
+ warn wrap_mod( "po4a::sgml", dgettext( "po4a", "msgid skipped to help translators (contains only tags)" ) )
+ unless $self->verbose() <= 0;
+ return $string . ( $options{'wrap'} ? "\n" : "" );
+ }
+
+ # don't translate entries composed of marked section tags only
+ if ( ( $string =~ /^(?:<!\s*\[\s*[^\[]+\s*\[|\]\s*]\s*>|\s)*$/ )
+ && !( $self->{options}{'include-all'} ) )
+ {
+ warn wrap_mod(
+ "po4a::sgml",
+ dgettext(
+ "po4a",
+ "msgid skipped to "
+ . "help translators (contains only opening or closing "
+ . "tags of marked sections)"
+ ),
+ $string
+ ) unless $self->verbose() <= 0;
+ return $string . ( $options{'wrap'} ? "\n" : "" );
+ }
+
+ $string = $self->SUPER::translate( $string, $ref, $type, %options );
+
+ $string = $self->post_trans( $string, $ref, $type );
+
+ return $string;
+}
+
+sub post_trans {
+ my ( $self, $str, $ref, $type ) = @_;
+
+ # Change ascii non-breaking space to an &nbsp;
+ my $nbs_out = "\xA0";
+ my $enc_length = Encode::from_to( $nbs_out, "latin1", $self->get_out_charset );
+ $str =~ s/\Q$nbs_out/&nbsp;/g if defined $enc_length;
+
+ return $str;
+}
+
+#
+# Make sure our cruft is removed from the file
+#
+sub pushline {
+ my ( $self, $line ) = @_;
+ $line =~ s/{PO4A-amp}/&/g;
+ $self->SUPER::pushline($line);
+}
+
+sub set_tags_kind {
+ my $self = shift;
+ my (%kinds) = @_;
+
+ foreach (qw(translate empty section verbatim ignore attributes qualify)) {
+ $self->{SGML}->{k}{$_} = $self->{options}{$_} ? $self->{options}{$_} . ' ' : '';
+
+ # Remove the default behavior for the tags defined with the
+ # options.
+ foreach my $k ( keys %kinds ) {
+ foreach my $t ( split( " ", $self->{SGML}->{k}{$_} ) ) {
+ $kinds{$k} =~ s/\b$t\b//;
+ }
+ }
+ }
+
+ foreach ( keys %kinds ) {
+ die "po4a::sgml: internal error: set_tags_kind called with unrecognized arg $_"
+ if ( $_ !~ /^(translate|empty|verbatim|ignore|indent|attributes|qualify)$/ );
+
+ $self->{SGML}->{k}{$_} .= $kinds{$_};
+ }
+}
+
+#
+# Do the actual work, using the SGMLS package and settings done elsewhere.
+#
+sub parse_file {
+ my ( $self, $mastername ) = @_;
+ my ($prolog);
+
+ # Rewrite the file to:
+ # - protect optional inclusion marker (i.e. "<![ %str [" and "]]>")
+ # - protect entities from expansion (ie "&release;")
+ my $origfile = "";
+ my $i = 0;
+ while ( $i < @{ $self->{TT}{doc_in} } ) {
+ $origfile .= ${ $self->{TT}{doc_in} }[$i];
+ $i += 2;
+ }
+
+ unless ( $self->{options}{'force'} ) {
+
+ # Detect if we can find the DTD
+ my ( $tmpfh, $tmpfile ) = File::Temp::tempfile(
+ "po4a-XXXX",
+ SUFFIX => ".sgml",
+ DIR => File::Spec->tmpdir(),
+ UNLINK => 0
+ );
+ print $tmpfh $origfile;
+ close $tmpfh
+ or die wrap_mod( "po4a::sgml", dgettext( "po4a", "Cannot close tempfile: %s" ), $! );
+ if ( system("onsgmls -p $tmpfile") ) {
+ unlink($tmpfile);
+ die wrap_mod(
+ "po4a::sgml",
+ dgettext(
+ "po4a", "Error while running onsgmls -p. Please check if onsgmls and the DTD are installed."
+ )
+ );
+ }
+ unlink($tmpfile);
+ }
+
+ # Detect the XML pre-prolog
+ if ( $origfile =~ s/^(\s*<\?xml[^?]*\?>)// ) {
+ warn wrap_mod(
+ "po4a::sgml",
+ dgettext(
+ "po4a",
+ "Trying to handle a XML document as a SGML one. "
+ . "Feel lucky if it works, help us implementing a proper XML backend if it does not."
+ ),
+ $mastername
+ ) unless $self->verbose() <= 0;
+ $xmlprolog = $1;
+ }
+
+ # Get the prolog
+ {
+ $prolog = $origfile;
+ my $lvl; # number of '<' seen without matching '>'
+ my $pos = 0; # where in the document (in chars) while detecting prolog boundaries
+
+ unless ( $prolog =~ s/^(.*<!DOCTYPE).*$/$1/is ) {
+ die wrap_mod(
+ "po4a::sgml",
+ dgettext(
+ "po4a",
+ "This file is not a master SGML document (no DOCTYPE). "
+ . "It may be a file to be included by another one, in which case it should not be passed to po4a directly. Text from included files is extracted/translated when handling the master file including them."
+ )
+ );
+ }
+ $pos += length($prolog);
+ $lvl = 1;
+ while ( $lvl != 0 ) {
+
+ # Eat comments in the prolog, since there may be some '>' or '<' in them.
+ if ( $origfile =~ m/^.{$pos}(<!--.*?-->)/s ) {
+ print "Found a comment in the prolog: $1\n" if ( $debug{'generic'} );
+ $pos += length($1);
+
+ # take care of the line numbers
+ my @a = split( /\n/, $1 );
+ shift @a; # nb line - 1
+ while ( defined( shift @a ) ) {
+ $prolog .= "\n";
+ }
+ next;
+ }
+
+ # Search the closing '>'
+ my ($c) = substr( $origfile, $pos, 1 );
+ $lvl++ if ( $c eq '<' );
+ $lvl-- if ( $c eq '>' );
+ $prolog = "$prolog$c";
+ $pos++;
+ }
+ }
+
+ # Add the definition of new tags that will be used for the
+ # conditionnal inclusions
+ if ( $origfile =~ /^.*<!DOCTYPE[^[>]*\[/is ) {
+ $origfile =~
+ s/^(.*<!DOCTYPE[^[>]*\[)/$1 <!ELEMENT PO4ABEG - o empty> <!ATTLIST PO4ABEG name CDATA #REQUIRED> <!ELEMENT PO4AEND - o empty>/is;
+ }
+
+ print STDERR "PROLOG=$prolog\n------------\n" if ( $debug{'generic'} );
+
+ # Configure the tags for this dtd
+ if ( $prolog =~ /debiandoc/i ) {
+ $self->set_tags_kind(
+ "translate" => "author version abstract title" . "date copyrightsummary heading p " . "example tag title",
+ "empty" => "date ref manref url toc",
+ "verbatim" => "example",
+ "ignore" => "package prgn file tt em var "
+ . "name email footnote po4aend po4abeg "
+ . "strong ftpsite ftppath qref",
+ "indent" => "appendix " . "book "
+ . "chapt copyright "
+ . "debiandoc "
+ . "enumlist " . "item " . "list "
+ . "sect sect1 sect2 sect3 sect4 "
+ . "tag taglist titlepag toc"
+ );
+
+ } elsif ( $prolog =~ /docbook/i ) {
+ $self->set_tags_kind(
+ "translate" => "abbrev appendixinfo artheader attribution "
+ . "biblioentry biblioset "
+ . "chapterinfo collab collabname confdates confgroup conftitle " . "date "
+ . "edition editor entry example "
+ . "figure "
+ . "glosssee glossseealso glossterm "
+ . "holder "
+ . "member msgaud msglevel msgorig "
+ . "orgdiv orgname othername "
+ . "pagenums para phrase pubdate publishername primary "
+ . "refclass refdescriptor refentrytitle refmiscinfo refname refpurpose releaseinfo remark revnumber revremark "
+ . "screeninfo seg secondary see seealso segtitle simpara substeps subtitle synopfragmentref synopsis "
+ . "term tertiary title titleabbrev "
+ . "contrib epigraph",
+ "empty" => "audiodata colspec graphic imagedata textdata sbr spanspec videodata xref",
+ "indent" => "abstract answer appendix article articleinfo audioobject author authorgroup "
+ . "bibliodiv bibliography blockquote blockinfo book bookinfo bridgehead "
+ . "callout calloutlist caption caution chapter copyright "
+ . "dedication docinfo "
+ . "entry "
+ . "formalpara "
+ . "glossary glossdef glossdiv glossentry glosslist group "
+ . "imageobject important index indexterm informaltable itemizedlist "
+ . "keyword keywordset "
+ . "legalnotice listitem lot "
+ . "mediaobject msg msgentry msginfo msgexplan msgmain msgrel msgsub msgtext " . "note "
+ . "objectinfo orderedlist "
+ . "part partintro preface procedure publisher "
+ . "qandadiv qandaentry qandaset question "
+ . "reference refentry refentryinfo refmeta refnamediv refsect1 refsect1info refsect2 refsect2info refsect3 refsect3info refsection refsectioninfo refsynopsisdiv refsynopsisdivinfo revision revdescription row "
+ . "screenshot sect1 sect1info sect2 sect2info sect3 sect3info sect4 sect4info sect5 sect5info section sectioninfo seglistitem segmentedlist set setindex setinfo shortcut simplelist simplemsgentry simplesect step synopfragment "
+ . "table tbody textobject tgroup thead tip toc "
+ . "variablelist varlistentry videoobject "
+ . "warning",
+ "verbatim" => "address cmdsynopsis holder literallayout programlisting "
+ . "refentrytitle refname refpurpose screen term title",
+ "ignore" => "acronym action affiliation anchor application arg author authorinitials "
+ . "city citation citerefentry citetitle classname co command computeroutput constant corpauthor country "
+ . "database po4abeg po4aend "
+ . "email emphasis envar errorcode errorname errortext errortype exceptionname "
+ . "filename firstname firstterm footnote footnoteref foreignphrase function "
+ . "glossterm guibutton guiicon guilabel guimenu guimenuitem guisubmenu "
+ . "hardware "
+ . "indexterm informalexample inlineequation inlinegraphic inlinemediaobject interface interfacename isbn "
+ . "keycap keycode keycombo keysym "
+ . "link lineannotation literal "
+ . "manvolnum markup medialabel menuchoice methodname modespec mousebutton "
+ . "nonterminal "
+ . "olink ooclass ooexception oointerface option optional othercredit "
+ . "parameter personname phrase productname productnumber prompt property pubsnumber "
+ . "quote "
+ . "remark replaceable returnvalue revhistory "
+ . "sgmltag sidebar structfield structname subscript superscript surname symbol systemitem "
+ . "token trademark type "
+ . "ulink userinput "
+ . "varname volumenum "
+ . "wordasword " . "xref " . "year",
+ "attributes" => "<(article|book)>lang"
+ );
+
+ } else {
+ if ( $self->{options}{'force'} ) {
+ warn wrap_mod( "po4a::sgml",
+ dgettext( "po4a", "DTD of this file is unknown, but proceeding as requested." ) );
+ $self->set_tags_kind();
+ } else {
+ die wrap_mod( "po4a::sgml",
+ dgettext( "po4a", "DTD of this file is unknown. (supported: DebianDoc, DocBook). The prolog follows:" )
+ . "\n$prolog" );
+ }
+ }
+
+ # Hash of the file entities that won't be included
+ my %ignored_inclusion = ();
+ foreach ( split / /, $self->{options}{'ignore-inclusion'} ) {
+ $ignored_inclusion{$_} = 1;
+ }
+
+ # Prepare the reference indirection stuff
+ my @refs;
+ my $length = ( $origfile =~ tr/\n/\n/ );
+ print "XX Prepare reference indirection stuff\n" if $debug{'refs'};
+ for ( my $i = 1 ; $i <= $length ; $i++ ) {
+ push @refs, "$mastername:$i";
+ print "$mastername:$i\n" if $debug{'refs'};
+ }
+
+ # protect the conditional inclusions in the file
+ $origfile =~ s/<!\[\s*IGNORE\s*\[/{PO4A-beg-IGNORE}/g; # cond. incl. starts
+ $origfile =~ s/<!\[\s*CDATA\s*\[/{PO4A-beg-CDATA}/g; # cond. incl. starts
+ $origfile =~ s/<!\[\s*RCDATA\s*\[/{PO4A-beg-RCDATA}/g; # cond. incl. starts
+ $origfile =~ s/<!\[\s*([^\[\s]+)\s*\[/<po4abeg name="$1">/g; # cond. incl. starts
+ $origfile =~ s/\]\]>/<po4aend>/g; # cond. incl. end
+
+ # Remove <![ IGNORE [ sections
+ # FIXME: we don't support included PO4A-beg-
+ my $tmp1 = $origfile;
+ while ( $tmp1 =~ m/^(.*?)(\{PO4A-beg-\s*IGNORE\s*}(?:.+?)<po4aend>)(.*)$/s ) {
+ my ( $begin, $ignored, $end ) = ( $1, $2, $3 );
+ my @begin = split( /\n/, $begin );
+ my @ignored = split( /\n/, $ignored );
+ my $pre = scalar @begin;
+ my $len = ( scalar @ignored ) - 1;
+ $pre++ if ( $begin =~ /\n$/s );
+ $len++ if ( $end =~ /^\n/s );
+
+ # remove the references of the ignored lines
+ splice @refs, $pre + 1, $len - 1;
+
+ # remove the lines
+ $tmp1 = $begin . $end;
+ }
+ $origfile = $tmp1;
+
+ # The <, >, and & in a CDATA must be escaped because they do not
+ # correspond to tags or entities delimiters.
+ $tmp1 = $origfile;
+ $origfile = "";
+ while ( $tmp1 =~ m/^(.*?{PO4A-beg-\s*(?:CDATA|RCDATA)\s*})(.+?)(<po4aend>.*)$/s ) {
+ my ( $begin, $tmp ) = ( $1, $2 );
+ $tmp1 = $3;
+ $tmp =~ s/</{PO4A-lt}/gs;
+ $tmp =~ s/>/{PO4A-gt}/gs;
+ $tmp =~ s/&/{PO4A-amp}/gs;
+ $origfile .= $begin . $tmp;
+ }
+ $origfile .= $tmp1;
+
+ # Deal with the %entities; in the prolog. God damn it, this code is gross!
+ # Try hard not to change the number of lines to not fuck up the references
+ my %prologentincl;
+ my $moretodo = 1;
+ PROLOGENTITY: while ($moretodo) { # non trivial loop to deal with recursive inclusion
+ $moretodo = 0;
+
+ # Unprotect not yet defined inclusions
+ $prolog =~ s/{PO4A-percent}/%/sg;
+ print STDERR "prolog=>>>>$prolog<<<<\n"
+ if ( $debug{'entities'} );
+ while ( $prolog =~ /(.*?)<!ENTITY\s*%\s*(\S*)\s+SYSTEM\s*"([^>"]*)"\s*>(.*)$/is ) { #})"{ (Stupid editor)
+ print STDERR "Seen the definition entity of prolog inclusion '$2' (=$3)\n"
+ if ( $debug{'entities'} );
+
+ # Preload the content of the entity.
+ my $key = $2;
+ my $filename = $3;
+ my $origfilename = $filename;
+ my ( $begin, $end ) = ( $1, $4 );
+ if ( $filename !~ m%^/% && $mastername =~ m%/% ) {
+ my $dir = $mastername;
+ $dir =~ s%/[^/]*$%%;
+ $filename = "$dir/$filename";
+
+ # origfile also needs to be fixed otherwise onsgmls won't
+ # find the file.
+ $origfile =~ s/(<!ENTITY\s*%\s*\Q$key\E\s+SYSTEM\s*")\Q$origfilename\E("\s*>)/$1$filename$2/gsi;
+ }
+ if ( defined $ignored_inclusion{$key} or !-e $filename ) {
+
+ # We won't expand this entity.
+ # And we avoid onsgmls to do so.
+ $prolog = "$begin<!--{PO4A-ent-beg-$key}$filename" . "{PO4A-ent-end}-->$end";
+ } else {
+ $prolog = $begin . $end;
+ ( -e $filename && open IN, "<$filename" )
+ || die wrap_mod( "po4a::sgml", dgettext( "po4a", "Cannot open %s (content of entity %s%s;): %s" ),
+ $filename, '%', $key, $! );
+ local $/ = undef;
+ $prologentincl{$key} = <IN>;
+ close IN;
+ print STDERR "Content of \%$key; is $filename ("
+ . ( $prologentincl{$key} =~ tr/\n/\n/ )
+ . " lines long)\n"
+ if ( $debug{'entities'} );
+ print STDERR "content: " . $prologentincl{$key} . "\n"
+ if ( $debug{'entities'} );
+ $moretodo = 1;
+ next PROLOGENTITY;
+ }
+ }
+ while ( $prolog =~ /(.*?)<!ENTITY\s*%\s*(\S*)\s*"([^>"]*)"\s*>(.*)$/is ) { #})"{ (Stupid editor)
+ print STDERR "Seen the definition entity of prolog definition '$2' (=$3)\n"
+ if ( $debug{'entities'} );
+
+ # Preload the content of the entity.
+ my $key = $2;
+ $prolog = $1 . $4;
+ $prologentincl{$key} = $3;
+ print STDERR "content: " . $prologentincl{$key} . "\n"
+ if ( $debug{'entities'} );
+ $moretodo = 1;
+ next PROLOGENTITY;
+ }
+ while ( $prolog =~ /^(.*?)%([^;\s]*);(.*)$/s ) {
+ my ( $pre, $ent, $post ) = ( $1, $2, $3 );
+
+ # Yeah, right, the content of the entity can be defined in a not yet loaded entity
+ # It's easy to build a weird case where all that shit collapses poorly. But why the
+ # hell are you using those strange constructs in your document, damn it?
+ print STDERR "Seen prolog inclusion $ent\n" if ( $debug{'entities'} );
+ if ( defined( $prologentincl{$ent} ) ) {
+ $prolog = $pre . $prologentincl{$ent} . $post;
+ print STDERR "Change \%$ent; to its content in the prolog\n"
+ if $debug{'entities'};
+ $moretodo = 1;
+ } else {
+
+ # AAAARGH stupid document using %bla; and having then defined in another inclusion!
+ # Protect it for this pass, and unprotect it on next one
+ print STDERR "entity $ent not defined yet ?!\n"
+ if $debug{'entities'};
+ $prolog = "$pre" . '{PO4A-percent}' . "$ent;$post";
+ }
+ }
+ }
+ $prolog =~ s/<!--\{PO4A-ent-beg-(.*?)\}(.*?)\{PO4A-ent-end\}-->/<!ENTITY % $1 SYSTEM "$2">/g;
+
+ # Unprotect undefined inclusions, and die of them
+ $prolog =~ s/\{PO4A-percent\}/%/sg;
+ if ( $prolog =~ /%([^;\s]*);/ ) {
+ die wrap_mod( "po4a::sgml", dgettext( "po4a", "unrecognized prolog inclusion entity: %%%s;" ), $1 )
+ unless ( $ignored_inclusion{$1} );
+ }
+
+ # Protect &entities; (all but the ones asking for a file inclusion)
+ # search the file inclusion entities
+ my %entincl;
+ my $searchprolog = $prolog;
+ while ( $searchprolog =~ /(.*?)<!ENTITY\s+(\S*)\s+SYSTEM\s*"([^>"]*)"\s*>(.*)$/is ) { #})"{
+ print STDERR "Seen the entity of inclusion $2 (=$3)\n"
+ if ( $debug{'entities'} );
+ my $key = $2;
+ my $filename = $3;
+ my $origfilename = $filename;
+ $searchprolog = $4;
+ if ( $filename !~ m%^/% && $mastername =~ m%/% ) {
+ my $dir = $mastername;
+ $dir =~ s%/[^/]*$%%;
+ $filename = "$dir/$filename";
+
+ # origfile also needs to be fixed otherwise onsgmls won't find
+ # the file.
+ $origfile =~ s/(<!ENTITY\s+$key\s+SYSTEM\s*")\Q$origfilename\E("\s*>)/$1$filename$2/gsi;
+ }
+ if ( ( not defined $ignored_inclusion{$2} ) and ( -e $filename ) ) {
+ $entincl{$key}{'filename'} = $filename;
+
+ # Preload the content of the entity
+ ( -e $filename && open IN, "<$filename" )
+ || die wrap_mod( "po4a::sgml", dgettext( "po4a", "Cannot open %s (content of entity %s%s;): %s" ),
+ $filename, '&', $key, $! );
+ local $/ = undef;
+ $entincl{$key}{'content'} = <IN>;
+ close IN;
+ $entincl{$key}{'length'} = ( $entincl{$key}{'content'} =~ tr/\n/\n/ );
+ print STDERR "read $filename (content of \&$key;, $entincl{$key}{'length'} lines long)\n"
+ if ( $debug{'entities'} );
+ }
+ }
+
+ # Change the entities including files in the document
+ my $dosubstitution = 1;
+ while ($dosubstitution) {
+ $dosubstitution = 0;
+ foreach my $key ( keys %entincl ) {
+
+ # The external entity can be referenced as &key; or &key
+ # In the second case, we must differentiate &key and &key2
+ while ( $origfile =~ /^(.*?)&$key(;.*$|[^-_:.A-Za-z0-9].*$|$)/s ) {
+
+ # Since we will include a new file, we
+ # must do a new round of substitutions.
+ $dosubstitution = 1;
+ my ( $begin, $end ) = ( $1, $2 );
+ $end = "" unless ( defined $end );
+ $end =~ s/^;//s;
+
+ if ( $begin =~ m/.*<!--(.*?)$/s and $1 !~ m/-->/s ) {
+
+ # This entity is commented. Just remove it.
+ $origfile = $begin . $end;
+ next;
+ }
+
+ # add the refs
+ my $len = $entincl{$key}{'length'}; # number added by the inclusion
+ my $pre = ( $begin =~ tr/\n/\n/ ); # number of \n
+ my $post = ( $end =~ tr/\n/\n/ );
+ print "XX Add a ref. pre=$pre; len=$len; post=$post\n"
+ if $debug{'refs'};
+
+ # Keep a reference of inclusion position in main file
+ my $main = $refs[$pre];
+
+ # Remove the references for the lines after the inclusion
+ # point.
+ my @endrefs = splice @refs, $pre + 1;
+
+ # Add the references of the added lines
+ my $i;
+ for ( $i = 0 ; $i < $len ; $i++ ) {
+ $refs[ $i + $pre ] = "$main $entincl{$key}{'filename'}:" . ( $i + 1 );
+ }
+
+ if ( $begin !~ m/\n[ \t]*$/s ) {
+ if ( $entincl{$key}{'content'} =~ m/^[ \t]*\n/s ) {
+
+ # There is nothing in the first line of the
+ # included file, and something on the line before
+ # the inclusion The line reference will be more
+ # informative like this:
+ $refs[$pre] = $main;
+ }
+ }
+ if ( $end !~ s/^[ \t]*\n//s ) {
+ if ( $entincl{$key}{'content'} =~ m/\n[ \t]*$/s ) {
+
+ # There is something on the line after the
+ # inclusion, and there is an end of line at the
+ # end of the included file. We must add the line
+ # reference of the remainder on the line:
+ push @refs, $main;
+ }
+ }
+
+ # Append the references removed earlier (lines after the
+ # inclusion point).
+ push @refs, @endrefs;
+
+ # Do the substitution
+ $origfile = "$begin" . $entincl{$key}{'content'} . "$end";
+ print STDERR "substitute $key\n" if ( $debug{'entities'} );
+ }
+ }
+ }
+ $origfile =~ s/\G(.*?)&([A-Za-z_:][-_:.A-Za-z0-9]*|#[0-9]+|#x[0-9a-fA-F]+)\b/$1\{PO4A-amp\}$2/gs;
+ if ( defined($xmlprolog) && length($xmlprolog) ) {
+ $origfile =~ s/\/>/\{PO4A-close\}>/gs;
+ }
+
+ if ( $debug{'refs'} ) {
+ print "XX Resulting shifts\n";
+ for ( my $i = 0 ; $i < scalar @refs ; $i++ ) {
+ print "$mastername:" . ( $i + 1 ) . " -> $refs[$i]\n";
+ }
+ }
+
+ my ( $tmpfh, $tmpfile ) = File::Temp::tempfile(
+ "po4a-XXXX",
+ SUFFIX => ".sgml",
+ DIR => File::Spec->tmpdir(),
+ UNLINK => 0
+ );
+ print $tmpfh $origfile;
+ close $tmpfh or die wrap_mod( "po4a::sgml", dgettext( "po4a", "Cannot close tempfile: %s" ), $! );
+
+ my $cmd = "onsgmls -l -E 0 -wno-valid $tmpfile" . ( $debug{'onsgmls'} ? "" : " 2>/dev/null" ) . " |";
+ print STDERR "CMD=$cmd\n" if ( $debug{'generic'} or $debug{'onsgmls'} );
+
+ open( IN, $cmd ) || die wrap_mod( "po4a::sgml", dgettext( "po4a", "Cannot run onsgmls: %s" ), $! );
+
+ # The kind of tags
+ my ( %translate, %empty, %verbatim, %indent, %exist, %attribute, %qualify );
+ foreach ( split( / /, ( $self->{SGML}->{k}{'translate'} || '' ) ) ) {
+ $translate{ uc $_ } = 1;
+ $indent{ uc $_ } = 1;
+ $exist{ uc $_ } = 1;
+ }
+ foreach ( split( / /, ( $self->{SGML}->{k}{'empty'} || '' ) ) ) {
+ $empty{ uc $_ } = 1;
+ $exist{ uc $_ } = 1;
+ }
+ foreach ( split( / /, ( $self->{SGML}->{k}{'verbatim'} || '' ) ) ) {
+ $translate{ uc $_ } = 1;
+ $verbatim{ uc $_ } = 1;
+ $exist{ uc $_ } = 1;
+ }
+ foreach ( split( / /, ( $self->{SGML}->{k}{'indent'} || '' ) ) ) {
+ $translate{ uc $_ } = 1;
+ $indent{ uc $_ } = 1;
+ $exist{ uc $_ } = 1;
+ }
+ foreach ( split( / /, ( $self->{SGML}->{k}{'ignore'} ) || '' ) ) {
+ $exist{ uc $_ } = 1;
+ }
+ foreach ( split( / /, ( $self->{SGML}->{k}{'attributes'} || '' ) ) ) {
+ my ( $attr, $tags );
+ if (m/(^.*>)(\w+)/) {
+ $attr = uc $2;
+ $tags = $1;
+ } else {
+ $attr = uc $_;
+ $tags = ".*";
+ }
+ if ( exists $attribute{$attr} ) {
+ $attribute{$attr} .= "|$tags";
+ } else {
+ $attribute{$attr} = $tags;
+ }
+ }
+ foreach ( split( / /, ( $self->{SGML}->{k}{'qualify'} ) || '' ) ) {
+ $qualify{ uc $_ } = 1;
+ $attribute{ uc $_ } = '.*' unless exists $attribute{ uc $_ };
+ }
+
+ # What to do before parsing
+
+ # push the XML prolog if existing
+ $self->pushline( $xmlprolog . "\n" ) if ( defined($xmlprolog) && length($xmlprolog) );
+
+ # Put the prolog into the file, allowing for entity definition translation
+ # <!ENTITY myentity "definition_of_my_entity">
+ # and push("<!ENTITY myentity \"".$self->translate("definition_of_my_entity")
+ if ( $prolog =~ m/(.*?\[)(.*)(\]>)/s ) {
+ warn "Pre=~~$1~~;Post=~~$3~~\n" if ( $debug{'entities'} );
+ $self->pushline( $1 . "\n" ) if ( length($1) );
+ $prolog = $2;
+ my ($post) = $3;
+ while ( $prolog =~ m/^(.*?)<!ENTITY\s+(\S*)\s+"([^"]*)"\s*>(.*)$/is ) { #" ){
+ $self->pushline($1) if length($1);
+ $self->pushline( "<!ENTITY $2 \"" . $self->translate( $3, "", "definition of entity \&$2;" ) . "\">" );
+ warn "Seen text entity $2\n" if ( $debug{'entities'} );
+ $prolog = $4;
+ }
+ $prolog .= $post;
+ $self->pushline( $prolog . "\n" ) if ( length($prolog) );
+ } else {
+ warn "No entity declaration detected in ~~$prolog~~...\n" if ( $debug{'entities'} );
+ $self->pushline($prolog) if length($prolog);
+ }
+
+ # The parse object.
+ # Damn SGMLS. It makes me do crude things.
+ no strict "subs";
+ my $parse = new SGMLS(IN);
+ use strict;
+
+ # Some values for the parsing
+ my @open = (); # opened translation container tags
+ my $verb = 0; # can we wrap or not
+ my $verb_last_ref;
+ my $seenfootnote = 0;
+ my $indent = 0; # indent level
+ my $lastchar = ''; #
+ my $buffer = ""; # what we will soon handle
+
+ # Keep a reference to the last line indicated by onsgmls
+ my $line = 0;
+
+ # Unfortunately, onsgmls do not mention all the line changes. We have
+ # to keep track of the number of lines seen in the "record ends".
+ my $adds = 0;
+
+ # If the last line received contains only spaces, do not take it into
+ # account for the line reference of the paragraph.
+ my $empty_last_cdata = 0;
+
+ # run the appropriate handler for each event
+ EVENT: while ( my $event = $parse->next_event ) {
+
+ # get the line reference to build po entries
+ if ( $line != $parse->line ) {
+
+ # onsgmls informs us of that the line changed. Reset $adds and
+ # $empty_last_cdata
+ $adds = 0;
+ $empty_last_cdata = 0;
+ $line = $parse->line;
+ }
+ my $ref = $refs[ $parse->line - 1 + $adds - $empty_last_cdata ];
+
+ # In verbatim mode, keep the current line reference.
+ if ($verb) {
+ $ref = $refs[ $parse->line - 1 ];
+ }
+ my $type;
+
+ if ( $event->type eq 'start_element' ) {
+ die wrap_ref_mod( $ref, "po4a::sgml", dgettext( "po4a", "Unknown tag %s" ), $event->data->name )
+ unless $exist{ $event->data->name };
+
+ $lastchar = ">";
+
+ # Which tag did we see?
+ my $tag = '';
+ $tag .= '<' . lc( $event->data->name() );
+ foreach my $attr ( sort $event->data->attribute_names() ) {
+
+ my $val = ${ $event->data->attributes() }{$attr};
+ my $value = $val->value();
+
+ # if ($val->type() eq 'IMPLIED') {
+ # $tag .= ' '.lc($attr).'="'.lc($attr).'"';
+ # } els
+ if ( $val->type() eq 'CDATA'
+ || $val->type() eq 'IMPLIED' )
+ {
+ if ( defined $value && length($value) ) {
+ my $lattr = lc $attr;
+ my $uattr = uc $attr;
+ if ( exists $attribute{$uattr} ) {
+ my $context = "";
+ foreach my $o (@open) {
+ next if ( !defined $o or $o =~ m%^</% );
+ $o =~ s/ .*/>/;
+ $context .= $o;
+ }
+ $context = join( "", $context, "<", lc( $event->data->name() ), ">" );
+ if ( $context =~ /^($attribute{$uattr})$/ ) {
+ if ( $qualify{$uattr} ) {
+ my $translated =
+ $self->translate( "$lattr=$value", $ref, "attribute $context$lattr" );
+ if ( $translated =~ s/^$lattr=// ) {
+ $value = $translated;
+ } else {
+ die wrap_mod( "po4a::sgml",
+ dgettext( "po4a", "bad translation '%s' for '%s' in '%s'" ),
+ $translated, $context . $lattr, $ref );
+ }
+ } else {
+ $value = $self->translate( $value, $ref, "attribute $context$lattr" );
+ }
+ }
+ }
+ if ( $value =~ m/\"/ ) {
+ $value = "'" . $value . "'";
+ } else {
+ $value = '"' . $value . '"';
+ }
+ $tag .= " $lattr=$value";
+ }
+ } elsif ( $val->type() eq 'NOTATION' ) {
+ } else {
+ $tag .= ' ' . lc($attr) . '="' . lc($value) . '"'
+ if ( defined $value && length($value) );
+ }
+ }
+ $tag .= '>';
+
+ # debug
+ print STDERR "Seen $tag, open level=" . ( scalar @open ) . ", verb=$verb\n"
+ if ( $debug{'tag'} );
+
+ if ( $event->data->name() eq 'FOOTNOTE' ) {
+
+ # we want to put the <para> inside the <footnote> in the same msgid
+ $seenfootnote = 1;
+ }
+
+ if ($seenfootnote) {
+ $buffer .= $tag;
+ next EVENT;
+ }
+ if ( $translate{ $event->data->name() } ) {
+
+ # Build the type
+ if ( scalar @open > 0 ) {
+ $type = $open[$#open] . $tag;
+ } else {
+ $type = $tag;
+ }
+
+ # do the job
+ if ( @open > 0 ) {
+ $self->end_paragraph( $buffer, $ref, $type, $verb, $indent, @open );
+ } else {
+ $self->pushline($buffer) if $buffer;
+ }
+ $buffer = "";
+ push @open, $tag;
+ } elsif ( $indent{ $event->data->name() } ) {
+ die wrap_ref_mod( $ref, "po4a::sgml",
+ dgettext( "po4a", "Closing tag for a translation container missing before %s" ), $tag )
+ if ( scalar @open );
+ }
+
+ if ( $verbatim{ $event->data->name() } ) {
+ $verb++;
+
+ # Keep a reference to the line that openned the verbatim
+ # section. This is needed to check if its data starts on
+ # the same line.
+ $verb_last_ref = $ref;
+ }
+ if ($verb) {
+
+ # Tag in a verbatim section. Check if it appeared at
+ # the same line than the previous data. If not, it
+ # means that an end of line must be added to the
+ # buffer.
+ if ( $ref ne $verb_last_ref ) {
+
+ # FIXME: Does it work if $verb > 1
+ $buffer .= "\n";
+ $verb_last_ref = $ref;
+ }
+ }
+
+ if ( $indent{ $event->data->name() } ) {
+
+ # push the indenting space only if not in verb before that tag
+ # push trailing "\n" only if not in verbose afterward
+ $self->pushline( ( $verb > 1 ? "" : ( " " x $indent ) ) . $tag . ( $verb ? "" : "\n" ) );
+ $indent++ unless $empty{ $event->data->name() };
+ } else {
+ $tag =~ s/<po4abeg name="([^"]+)">/<![ $1 [/; #"; Stupid emacs
+ $tag =~ s/<po4aend>/]]>/;
+ $buffer .= $tag;
+ }
+ } # end of type eq 'start_element'
+
+ elsif ( $event->type eq 'end_element' ) {
+ my $tag = (
+ $empty{ $event->data->name() }
+ ? ''
+ : '</' . lc( $event->data->name() ) . '>'
+ );
+
+ if ($verb) {
+
+ # Tag in a verbatim section. Check if it appeared at
+ # the same line than the previous data. If not, it
+ # means that an end of line must be added to the
+ # buffer.
+ if ( $ref ne $verb_last_ref ) {
+
+ # FIXME: Does it work if $verb > 1
+ $buffer .= "\n";
+ $verb_last_ref = $ref;
+ }
+ }
+ print STDERR "Seen $tag, level=" . ( scalar @open ) . ", verb=$verb\n"
+ if ( $debug{'tag'} );
+
+ $lastchar = ">";
+
+ if ( $event->data->name() eq 'FOOTNOTE' ) {
+
+ # we want to put the <para> inside the <footnote> in the same msgid
+ $seenfootnote = 0;
+ }
+
+ if ($seenfootnote) {
+ $buffer .= $tag;
+ next EVENT;
+ }
+ if ( $translate{ $event->data->name() } ) {
+ $type = $open[$#open] . $tag;
+ $self->end_paragraph( $buffer, $ref, $type, $verb, $indent, @open );
+ $buffer = "";
+ pop @open;
+ if ( @open > 0 ) {
+ pop @open;
+ push @open, $tag;
+ }
+ } elsif ( $indent{ $event->data->name() } ) {
+ die wrap_ref_mod( $ref, "po4a::sgml",
+ dgettext( "po4a", "Closing tag for a translation container missing before %s" ), $tag )
+ if ( scalar @open );
+ }
+
+ unless ( $event->data->name() =~ m/^(PO4ABEG|PO4AEND)$/si ) {
+ if ( $indent{ $event->data->name() } ) {
+ $indent--;
+
+ # add indenting space only when not in verbatim
+ # add the tailing \n only if out of verbatim after that tag
+ $self->pushline( ( $verb ? "" : ( " " x $indent ) ) . $tag . ( $verb > 1 ? "" : "\n" ) );
+ } else {
+ $buffer .= $tag;
+ }
+ $verb-- if $verbatim{ $event->data->name() };
+ }
+ } # end of type eq 'end_element'
+
+ elsif ( $event->type eq 'cdata' ) {
+ my $cdata = $event->data;
+ $empty_last_cdata = ( $cdata =~ m/^\s*$/ );
+ $cdata =~ s/{PO4A-lt}/</g;
+ $cdata =~ s/{PO4A-gt}/>/g;
+ $cdata =~ s/{PO4A-amp}/&/g;
+ $cdata =~ s/{PO4A-end}/\]\]>/g;
+ $cdata =~ s/{PO4A-beg-([^\}]+)}/<!\[$1\[/g;
+ if ($verb) {
+
+ # Check if this line of data appear on the same line
+ # than the previous tag. If not, append an end of line
+ # to the buffer.
+ if ( $ref ne $verb_last_ref ) {
+ $buffer .= "\n";
+ $verb_last_ref = $ref;
+ }
+ } else {
+ $cdata =~ s/\\t/ /g;
+ $cdata =~ s/\s+/ /g;
+ $cdata =~ s/^\s//s if $lastchar eq ' ';
+ }
+ $lastchar = substr( $cdata, -1, 1 );
+ $buffer .= $cdata;
+ if ( defined($xmlprolog) && length($xmlprolog) ) {
+ $buffer =~ s/>PO4A-close\}>/\/>/sg;
+ $buffer =~ s/PO4A-close\}>//sg; # This should not be necessary
+ }
+ } # end of type eq 'cdata'
+
+ elsif ( $event->type eq 'sdata' ) {
+ my $sdata = $event->data;
+ $sdata =~ s/^\[//;
+ $sdata =~ s/\s*\]$//;
+ $lastchar = substr( $sdata, -1, 1 );
+ $buffer .= '&' . $sdata . ';';
+ } # end of type eq 'sdata'
+
+ elsif ( $event->type eq 're' ) {
+
+ # End of record, the line reference shall be incremented.
+ $adds += 1;
+ if ($verb) {
+
+ # Check if this line of data appear on the same line
+ # than the previous tag. If not, append an end of line
+ # to the buffer.
+ if ( $ref ne $verb_last_ref ) {
+ $buffer .= "\n";
+ $verb_last_ref = $ref;
+ }
+ $buffer .= "\n";
+ } elsif ( $lastchar ne ' ' ) {
+ $buffer .= " ";
+ }
+ $lastchar = ' ';
+ } #end of type eq 're'
+
+ elsif ( $event->type eq 'conforming' ) {
+
+ } elsif ( $event->type eq 'pi' ) {
+ my $pi = $event->data;
+ $buffer .= "<?$pi>";
+
+ } else {
+ die wrap_ref_mod(
+ $refs[ $parse->line ],
+ "po4a::sgml", dgettext( "po4a", "Unknown SGML event type: %s" ),
+ $event->type
+ );
+ }
+ }
+
+ # What to do after parsing
+ $self->pushline($buffer);
+ close(IN);
+ if ( $? != 0 and $self->verbose() > 0 ) {
+ warn wrap_mod(
+ "po4a::sgml",
+ dgettext(
+ "po4a",
+ "Warning: onsgmls produced some errors. "
+ . "This is usually caused by po4a, which modifies the input "
+ . "and restores it afterwards, causing the input of onsgmls "
+ . "to be invalid. This is usually safe, but you may wish "
+ . "to verify the generated document with onsgmls -wno-valid."
+ )
+ );
+ unless ( $debug{'onsgmls'} ) {
+ warn wrap_mod(
+ "po4a::sgml",
+ dgettext(
+ "po4a",
+ "To see the error message, "
+ . "rerun po4a with this additional argument:\n"
+ . " -o debug=onsgmls"
+ )
+ );
+ }
+ }
+ unlink($tmpfile) unless ( $debug{'refs'} or $debug{'onsgmls'} );
+}
+
+sub end_paragraph {
+ my ( $self, $para, $ref, $type, $verb, $indent ) = ( shift, shift, shift, shift, shift, shift );
+ my (@open) = @_;
+ die "Internal error: no paragraph to end here!!"
+ unless scalar @open;
+
+ return unless defined($para) && length($para);
+
+ if ( ( $para =~ m/^\s*$/s ) and ( not $verb ) ) {
+
+ # In non-verbatim environments, a paragraph with only spaces is
+ # like an empty paragraph
+ return;
+ }
+
+ # unprotect &entities;
+ $para =~ s/{PO4A-amp}/&/g;
+
+ # remove the name"\|\|" onsgmls added as attributes
+ $para =~ s/ name=\"\\\|\\\|\"//g;
+ $para =~ s/ moreinfo=\"none\"//g;
+
+ # Extract the leading and trailing spaces. They will be restored only
+ # in verbatim environments.
+ my ( $leading_spaces, $trailing_spaces ) = ( "", "" );
+ if ($verb) {
+
+ # In the verbatim mode, we can ignore empty lines, but not the
+ # leading spaces or tabulations. Otherwise, the PO will look
+ # weird.
+ if ( $para =~ m/^(\s*\n)(.*?)(\s*)$/s ) {
+ $leading_spaces = $1;
+ $para = $2;
+ $trailing_spaces = $3;
+ }
+ } else {
+ if ( $para =~ m/^(\s*)(.*?)(\s*)$/s ) {
+ $leading_spaces = $1;
+ $para = $2;
+ $trailing_spaces = $3;
+ }
+ }
+
+ $para = $self->translate(
+ $para, $ref, $type,
+ 'wrap' => !$verb,
+ 'wrapcol' => ( 75 - $indent )
+ );
+
+ if ($verb) {
+ $para = $leading_spaces . $para . $trailing_spaces;
+ } else {
+ $para =~ s/^\s+//s;
+ my $toadd = " " x ( $indent + 1 );
+ $para =~ s/^/$toadd/mg;
+ $para .= "\n";
+ }
+
+ $self->pushline($para);
+}
+
+1;
+
+=head1 AUTHORS
+
+This module is an adapted version of sgmlspl (SGML postprocessor for the
+ONSGMLS parser) which was:
+
+ Copyright © 1995 David Megginson <dmeggins@aix1.uottawa.ca>
+
+The adaptation for po4a was done by:
+
+ Denis Barbier <barbier@linuxfr.org>
+ Martin Quinson (mquinson#debian.org)
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright © 1995 David Megginson <dmeggins@aix1.uottawa.ca>.
+ Copyright © 2002-2005 SPI, Inc.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
diff --git a/lib/Locale/Po4a/TeX.pm b/lib/Locale/Po4a/TeX.pm
new file mode 100644
index 0000000..b68adab
--- /dev/null
+++ b/lib/Locale/Po4a/TeX.pm
@@ -0,0 +1,1756 @@
+#!/usr/bin/perl -w
+
+# Copyright © 2004, 2005 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>
+#
+# This file is part of po4a.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with po4a; if not, write to the Free Software
+# Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+########################################################################
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::TeX - convert TeX documents and derivatives from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::TeX is a module to help the translation of TeX documents into
+other [human] languages. It can also be used as a base to build modules for
+TeX-based documents.
+
+Users should probably use the LaTeX module, which inherits from the TeX module
+and contains the definitions of common LaTeX commands.
+
+=head1 TRANSLATING WITH PO4A::TEX
+
+This module can be used directly to handle generic TeX documents.
+This will split your document in smaller blocks (paragraphs, verbatim
+blocks, or even smaller like titles or indexes).
+
+There are some options (described in the next section) that can customize
+this behavior. If this doesn't fit to your document format you're encouraged
+to write your own derivative module from this, to describe your format's details.
+See the section B<WRITING DERIVATIVE MODULES> below, for the process description.
+
+This module can also be customized by lines starting with "% po4a:" in the
+TeX file. This process is described in the B<INLINE CUSTOMIZATION> section.
+
+=cut
+
+package Locale::Po4a::TeX;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+require Exporter;
+use vars qw(@ISA @EXPORT);
+@ISA = qw(Locale::Po4a::TransTractor);
+@EXPORT = qw(%commands %environments
+ $RE_ESCAPE $ESCAPE $RE_VERBATIM
+ $no_wrap_environments
+ $verbatim_environments
+ %separated_command
+ %separated_environment
+ %translate_buffer_env
+ &add_comment
+ &generic_command
+ &register_generic_command
+ &register_generic_environment);
+
+use Locale::Po4a::TransTractor;
+use Locale::Po4a::Common;
+use File::Basename qw(dirname);
+use Carp qw(croak);
+
+use Encode;
+use Encode::Guess;
+
+# hash of known commands and environments, with parsing sub.
+# See end of this file
+use vars qw(%commands %environments);
+
+# hash to describe the number of parameters and which one have to be
+# translated. Used by generic commands
+our %command_parameters = ();
+our %environment_parameters = ();
+
+# hash to describe the separators of environments.
+our %env_separators = ();
+
+# The escape character used to introduce commands.
+our $RE_ESCAPE = "\\\\";
+our $ESCAPE = "\\";
+
+# match the beginning of a verbatim block
+our $RE_VERBATIM = "\\\\begin\\{(?:verbatim)\\*?\\}";
+
+# match the beginning of a comment.
+# NOTE: It must contain a group, with chars preceding the comment
+our $RE_PRE_COMMENT = "(?<!\\\\)(?:\\\\\\\\)*";
+our $RE_COMMENT = "\\\%";
+
+# Space separated list of environments that should not be re-wrapped.
+our $no_wrap_environments = "verbatim";
+our $verbatim_environments = "verbatim";
+
+# hash with the commands that have to be separated (or have to be joined).
+# 3 modes are currently used:
+# '*' The command is separated if it appear at an extremity of a
+# paragraph
+# '+' The command is separated, but its arguments are joined together
+# with the command name for the translation
+# '-' The command is not separated, unless it appear alone on a paragraph
+# (e.g. \strong)
+our %separated_command = ();
+our %separated_environment = ();
+
+=head1 OPTIONS ACCEPTED BY THIS MODULE
+
+These are this module's particular options:
+
+=over 4
+
+=item B<debug>
+
+Activate debugging for some internal mechanisms of this module.
+Use the source to see which parts can be debugged.
+
+=item B<no_wrap>
+
+Comma-separated list of environments which should not be re-wrapped.
+
+Note that there is a difference between verbatim and no_wrap environments.
+There is no command and comments analysis in verbatim blocks.
+
+If this environment was not already registered, po4a will consider that
+this environment does not take any parameters.
+
+=item B<exclude_include>
+
+Colon-separated list of files that should not be included by \input and
+\include.
+
+=item B<definitions>
+
+The name of a file containing definitions for po4a, as defined in the
+B<INLINE CUSTOMIZATION> section.
+You can use this option if it is not possible to put the definitions in
+the document being translated.
+
+=item B<verbatim>
+
+Comma-separated list of environments which should be taken as verbatim.
+
+If this environment was not already registered, po4a will consider that
+this environment does not take any parameters.
+
+=back
+
+Use these options to override the default behavior of the defined commands.
+
+=head1 INLINE CUSTOMIZATION
+
+The TeX module can be customized with lines starting by B<% po4a:>.
+These lines are interpreted as commands to the parser.
+The following commands are recognized:
+
+=over 4
+
+=item B<% po4a: command> I<command1> B<alias> I<command2>
+
+Indicates that the arguments of the I<command1> command should be
+treated as the arguments of the I<command2> command.
+
+=item B<% po4a: command> I<command1> I<parameters>
+
+This describes in detail the parameters of the I<command1>
+command.
+This information will be used to check the number of arguments and their
+types.
+
+You can precede the I<command1> command by
+
+=over 4
+
+=item an asterisk (B<*>)
+
+po4a will extract this command from paragraphs (if it is located at
+the beginning or the end of a paragraph).
+The translators will then have to translate the parameters that are marked
+as translatable.
+
+=item a plus (B<+>)
+
+As for an asterisk, the command will be extracted if it appear at an
+extremity of a block, but the parameters won't be translated separately.
+The translator will have to translate the command concatenated to all its
+parameters.
+This keeps more context, and is useful for commands with small
+words in parameter, which can have multiple meanings (and translations).
+
+Note: In this case you don't have to specify which parameters are
+translatable, but po4a must know the type and number of parameters.
+
+=item a minus (B<->)
+
+In this case, the command won't be extracted from any block.
+But if it appears alone on a block, then only the parameters marked as
+translatable will be presented to the translator.
+This is useful for font commands. These commands should generally not be
+separated from their paragraph (to keep the context), but there is no
+reason to annoy the translator with them if a whole string is enclosed in
+such a command.
+
+=back
+
+The I<parameters> argument is a set of [] (to indicate an optional
+argument) or {} (to indicate a mandatory argument).
+You can place an underscore (_) between these brackets to indicate that
+the parameter must be translated. For example:
+ % po4a: command *chapter [_]{_}
+
+This indicates that the chapter command has two parameters: an optional
+(short title) and a mandatory one, which must both be translated.
+If you want to specify that the href command has two mandatory parameters,
+that you don't want to translate the URL (first parameter), and that you
+don't want this command to be separated from its paragraph (which allow
+the translator to move the link in the sentence), you can use:
+ % po4a: command -href {}{_}
+
+In this case, the information indicating which arguments must be
+translated is only used if a paragraph is only composed of this href
+command.
+
+=item B<% po4a: environment> I<env> I<parameters>
+
+This defines the parameters accepted by the I<env> environment and specifies the ones to be translated.
+This information is later used to check the number of arguments of the
+\begin command.
+The syntax of the I<parameters> argument is the same as described for the
+others commands.
+The first parameter of the \begin command is the name of the environment.
+This parameter must not be specified in the list of parameters. Here are
+some examples:
+ % po4a: environment multicols {}
+ % po4a: environment equation
+
+As for the commands, I<env> can be preceded by a plus (+) to indicate
+that the \begin command must be translated with all its arguments.
+
+=item B<% po4a: separator> I<env> B<">I<regex>B<">
+
+Indicates that an environment should be split according to the given
+regular expression.
+
+The regular expression is delimited by quotes.
+It should not create any back-reference.
+You should use (?:) if you need a group.
+It may also need some escapes.
+
+For example, the LaTeX module uses the "(?:&|\\\\)" regular expression to
+translate separately each cell of a table (lines are separated by '\\' and
+cells by '&').
+
+The notion of environment is expanded to the type displayed in the PO file.
+This can be used to split on "\\\\" in the first mandatory argument of the
+title command. In this case, the environment is title{#1}.
+
+=item B<% po4a: verbatim environment> I<env>
+
+Indicate that I<env> is a verbatim environment.
+Comments and commands will be ignored in this environment.
+
+If this environment was not already registered, po4a will consider that
+this environment does not take any parameters.
+
+=back
+
+=cut
+
+# Directory name of the main file.
+# It is the directory where included files will be searched.
+# See read_file.
+my $my_dirname;
+
+# Array of files that should not be included by read_file.
+# See read_file.
+our @exclude_include;
+
+my %type_end = ( '{' => '}', '[' => ']', ' ' => '' );
+
+#########################
+#### DEBUGGING STUFF ####
+#########################
+my %debug = (
+ 'pretrans' => 0, # see pre-conditioning of translation
+ 'postrans' => 0, # see post-conditioning of translation
+ 'translate' => 0, # see translation
+ 'extract_commands' => 0, # see commands extraction
+ 'commands' => 0, # see command subroutines
+ 'environments' => 0, # see environment subroutines
+ 'translate_buffer' => 0 # see buffer translation
+);
+
+=head1 WRITING DERIVATE MODULES
+
+=over 4
+
+=item B<pre_trans>
+
+=cut
+
+sub pre_trans {
+ my ( $self, $str, $ref, $type ) = @_;
+
+ # Preformatting, so that translators don't see
+ # strange chars
+ my $origstr = $str;
+ print STDERR "pre_trans($str)="
+ if ( $debug{'pretrans'} );
+
+ # Accentuated characters
+ # FIXME: only do this if the encoding is UTF-8?
+ # $str =~ s/${RE_ESCAPE}`a/à/g;
+## $str =~ s/${RE_ESCAPE}c{c}/ç/g; # not in texinfo: @,{c}
+ # $str =~ s/${RE_ESCAPE}^e/ê/g;
+ # $str =~ s/${RE_ESCAPE}'e/é/g;
+ # $str =~ s/${RE_ESCAPE}`e/è/g;
+ # $str =~ s/${RE_ESCAPE}`u/ù/g;
+ # $str =~ s/${RE_ESCAPE}"i/ï/g;
+ # # Non breaking space. FIXME: should we change $\sim$ to ~
+ # $str =~ s/~/\xA0/g; # FIXME: not in texinfo: @w{ }
+
+ print STDERR "$str\n" if ( $debug{'pretrans'} );
+ return $str;
+}
+
+=item B<post_trans>
+
+=cut
+
+sub post_trans {
+ my ( $self, $str, $ref, $type ) = @_;
+ my $transstr = $str;
+
+ print STDERR "post_trans($str)="
+ if ( $debug{'postrans'} );
+
+ # Accentuated characters
+ # $str =~ s/à/${ESCAPE}`a/g;
+## $str =~ s/ç/$ESCAPEc{c}/g; # FIXME: not in texinfo
+ # $str =~ s/ê/${ESCAPE}^e/g;
+ # $str =~ s/é/${ESCAPE}'e/g;
+ # $str =~ s/è/${ESCAPE}`e/g;
+ # $str =~ s/ù/${ESCAPE}`u/g;
+ # $str =~ s/ï/${ESCAPE}"i/g;
+ # # Non breaking space. FIXME: should we change ~ to $\sim$
+ # $str =~ s/\xA0/~/g; # FIXME: not in texinfo
+
+ print STDERR "$str\n" if ( $debug{'postrans'} );
+ return $str;
+}
+
+# Comments are extracted in the parse function.
+# They are stored in the @comments array, and then displayed as a PO
+# comment with the first translated string of the paragraph.
+my @comments = ();
+
+=item B<add_comment>
+
+Add a string as a comment to be added around the next translated element.
+This is mostly useful to the texinfo module, as comments are automatically handled in TeX.
+
+=cut
+
+sub add_comment {
+ my ( $self, $comment ) = @_;
+ push @comments, $comment;
+}
+
+=item B<translate>
+
+Wrapper around Transtractor's translate, with pre- and post-processing
+filters.
+
+Comments of a paragraph are inserted as a PO comment for the first
+translated string of this paragraph.
+
+=cut
+
+sub translate {
+ my ( $self, $str, $ref, $type ) = @_;
+ my (%options) = @_;
+ my $origstr = $str;
+ print STDERR "translate($str)="
+ if ( $debug{'translate'} );
+
+ return $str unless ( defined $str ) && length($str);
+ return $str if ( $str eq "\n" );
+
+ $str = pre_trans( $self, $str, $ref || $self->{ref}, $type );
+
+ # add comments (if any and not already added to the PO)
+ if (@comments) {
+ $options{'comment'} .= join( '\n', @comments );
+
+ @comments = ();
+ }
+
+ # FIXME: translate may append a newline, keep the trailing spaces so we can
+ # recover them.
+ my $spaces = "";
+ if ( $options{'wrap'} and $str =~ m/^(.*?)(\s+)$/s ) {
+ $str = $1;
+ $spaces = $2;
+ }
+
+ # Translate this
+ $str = $self->SUPER::translate( $str, $ref || $self->{ref}, $type || $self->{type}, %options );
+
+ # FIXME: translate may append a newline, see above
+ if ( $options{'wrap'} ) {
+ chomp $str;
+ $str .= $spaces;
+ }
+
+ $str = post_trans( $self, $str, $ref || $self->{ref}, $type );
+
+ print STDERR "'$str'\n" if ( $debug{'translate'} );
+ return $str;
+}
+
+###########################
+### COMMANDS SEPARATION ###
+###########################
+
+=item B<get_leading_command>($buffer)
+
+This function returns:
+
+=over 4
+
+=item A command name
+
+If no command is found at the beginning of the given buffer, this string
+will be empty. Only commands that can be separated are considered.
+The %separated_command hash contains the list of these commands.
+
+=item A variant
+
+This indicates if a variant is used. For example, an asterisk (*) can
+be added at the end of sections command to specify that they should
+not be numbered. In this case, this field will contain "*". If there
+is no variant, the field is an empty string.
+
+=item An array of tuples (type of argument, argument)
+
+The type of argument can be either '{' (for mandatory arguments) or '['
+(for optional arguments).
+
+=item The remaining buffer
+
+The rest of the buffer after the removal of this leading command and
+its arguments. If no command is found, the original buffer is not
+touched and returned in this field.
+
+=back
+
+=cut
+
+sub get_leading_command {
+ my ( $self, $buffer ) = ( shift, shift );
+ my $command = ""; # the command name
+ my $variant = ""; # a varriant for the command (e.g. an asterisk)
+ my @args; # array of arguments
+ print STDERR "get_leading_command($buffer)="
+ if ( $debug{'extract_commands'} );
+
+ if ( $buffer =~ m/^$RE_ESCAPE([[:alnum:]]+)(\*?)(.*)$/s
+ && defined $separated_command{$1} )
+ {
+ # The buffer begin by a comand (possibly preceded by some
+ # whitespaces).
+ $command = $1;
+ $variant = $2;
+ $buffer = $3;
+
+ # read the arguments (if any)
+ while ( $buffer =~ m/^\s*$RE_PRE_COMMENT([\[\{])(.*)$/s ) {
+ my $type = $1;
+ my $arg = "";
+ my $count = 1;
+ $buffer = $2;
+
+ # stop reading the buffer when the number of ] (or }) matches the
+ # the number of [ (or {).
+ while ( $count > 0 ) {
+ if ( $buffer =~ m/^(.*?$RE_PRE_COMMENT)([\[\]\{\}])(.*)$/s ) {
+ $arg .= $1;
+ $buffer = $3;
+ if ( $2 eq $type ) {
+ $count++;
+ } elsif ( $2 eq $type_end{$type} ) {
+ $count--;
+ }
+ if ( $count > 0 ) {
+ $arg .= $2;
+ }
+ } else {
+ die wrap_ref_mod( $self->{ref}, "po4a::tex", dgettext( "po4a", "un-balanced %s in '%s'" ),
+ $type, $buffer );
+ }
+ }
+ push @args, ( $type, $arg );
+ }
+ }
+ if ( defined $command and length $command ) {
+
+ # verify the number of arguments
+ my ( $check, $reason, $remainder ) = check_arg_count( $self, $command, \@args );
+ if ( not $check ) {
+ die wrap_ref_mod(
+ $self->{ref},
+ "po4a::tex",
+ dgettext( "po4a", "Error while checking the number of " . "arguments of the '%s' command: %s" ) . "\n",
+ $command,
+ $reason
+ );
+ }
+
+ if (@$remainder) {
+
+ # FIXME: we should also keep the spaces to be idempotent
+ my ( $temp, $type, $arg );
+ while (@$remainder) {
+ $type = shift @$remainder;
+ $arg = shift @$remainder;
+ $temp .= $type . $arg . $type_end{$type};
+
+ # And remove the same number of arguments from @args
+ pop @args;
+ pop @args;
+ }
+ $buffer = $temp . $buffer;
+ }
+ }
+
+ print STDERR "($command,$variant,@args,$buffer)\n"
+ if ( $debug{'extract_commands'} );
+ return ( $command, $variant, \@args, $buffer );
+}
+
+=item B<get_trailing_command>($buffer)
+
+The same as B<get_leading_command>, but for commands at the end of a buffer.
+
+=cut
+
+sub get_trailing_command {
+ my ( $self, $buffer ) = ( shift, shift );
+ my $orig_buffer = $buffer;
+ print STDERR "get_trailing_command($buffer)="
+ if ( $debug{'extract_commands'} );
+
+ my @args;
+ my $command = "";
+ my $variant = "";
+
+ # While the buffer ends by }, consider it is a mandatory argument
+ # and extract this argument.
+ while ($buffer =~ m/^(.*$RE_PRE_COMMENT(\{).*)$RE_PRE_COMMENT\}$/s
+ or $buffer =~ m/^(.*$RE_PRE_COMMENT(\[).*)$RE_PRE_COMMENT\]$/s )
+ {
+ my $arg = "";
+ my $count = 1;
+ $buffer = $1;
+ my $type = $2;
+
+ # stop reading the buffer when the number of } (or ]) matches the
+ # the number of { (or [).
+ while ( $count > 0 ) {
+ if ( $buffer =~ m/^(.*$RE_PRE_COMMENT)([\{\}\[\]])(.*)$/s ) {
+ $arg = $3 . $arg;
+ $buffer = $1;
+ if ( $2 eq $type ) {
+ $count--;
+ } elsif ( $2 eq $type_end{$type} ) {
+ $count++;
+ }
+ if ( $count > 0 ) {
+ $arg = $2 . $arg;
+ }
+ } else {
+ die wrap_ref_mod( $self->{ref}, "po4a::tex", dgettext( "po4a", "un-balanced %s in '%s'" ),
+ $type_end{$type}, $buffer );
+ }
+ }
+ unshift @args, ( $type, $arg );
+ }
+
+ # There should now be a command, maybe followed by an asterisk.
+ if ( $buffer =~ m/^(.*$RE_PRE_COMMENT)$RE_ESCAPE([[:alnum:]]+)(\*?)\s*$/s
+ && defined $separated_command{$2} )
+ {
+ $buffer = $1;
+ $command = $2;
+ $variant = $3;
+ my ( $check, $reason, $remainder ) = check_arg_count( $self, $command, \@args );
+ if ( not $check ) {
+ die wrap_ref_mod(
+ $self->{ref},
+ "po4a::tex",
+ dgettext( "po4a", "Error while checking the number of " . "arguments of the '%s' command: %s" ) . "\n",
+ $command,
+ $reason
+ );
+ }
+ if (@$remainder) {
+
+ # There are some arguments after the command.
+ # We can't extract this comand.
+ $command = "";
+ }
+ }
+
+ # sanitize return values if no command was found.
+ if ( !length($command) ) {
+ $command = "";
+ $variant = "";
+ undef @args;
+ $buffer = $orig_buffer;
+ }
+
+ # verify the number of arguments
+
+ print STDERR "($command,$variant,@args,$buffer)\n"
+ if ( $debug{'extract_commands'} );
+ return ( $command, $variant, \@args, $buffer );
+}
+
+=item B<translate_buffer>
+
+Recursively translate a buffer by separating leading and trailing
+commands (those which should be translated separately) from the
+buffer.
+
+If a function is defined in %translate_buffer_env for the current
+environment, this function will be used to translate the buffer instead of
+translate_buffer().
+
+=cut
+
+our %translate_buffer_env = ();
+
+sub translate_buffer {
+ my ( $self, $buffer, $no_wrap, @env ) = ( shift, shift, shift, @_ );
+
+ if ( @env and defined $translate_buffer_env{ $env[-1] } ) {
+ return &{ $translate_buffer_env{ $env[-1] } }( $self, $buffer, $no_wrap, @env );
+ }
+
+ print STDERR "translate_buffer($buffer,$no_wrap,@env)="
+ if ( $debug{'translate_buffer'} );
+
+ my ( $command, $variant ) = ( "", "" );
+ my $args;
+ my $translated_buffer = "";
+ my $orig_buffer = $buffer;
+ my $t = ""; # a temporary string
+
+ if ( $buffer =~ /^\s*$/s ) {
+ print STDERR "($buffer,@env)\n"
+ if ( $debug{'translate_buffer'} );
+ return ( $buffer, @env );
+ }
+
+ # verbatim blocks.
+ # Buffers starting by \end{verbatim} are handled after.
+ if ( in_verbatim(@env) and $buffer !~ m/^\n?\Q$ESCAPE\Eend\{$env[-1]\*?\}/ ) {
+ if ( $buffer =~ m/^(.*?)(\n?\Q$ESCAPE\Eend\{$env[-1]\*?\}.*)$/s ) {
+
+ # end of a verbatim block
+ my ( $begin, $end ) = ( $1 ? $1 : "", $2 );
+ my ( $t1, $t2 ) = ( "", "" );
+ if ( defined $begin ) {
+ $t1 = $self->translate( $begin, $self->{ref}, $env[-1], "wrap" => 0 );
+ }
+ ( $t2, @env ) = translate_buffer( $self, $end, $no_wrap, @env );
+ print STDERR "($t1$t2,@env)\n"
+ if ( $debug{'translate_buffer'} );
+ return ( $t1 . $t2, @env );
+ } else {
+ $translated_buffer = $self->translate( $buffer, $self->{ref}, $env[-1], "wrap" => 0 );
+ print STDERR "($translated_buffer,@env)\n"
+ if ( $debug{'translate_buffer'} );
+ return ( $translated_buffer, @env );
+ }
+ }
+
+ # early detection of verbatim environment
+ if ( $buffer =~ /^($RE_VERBATIM\n?)(.*)$/s and length $2 ) {
+ my ( $begin, $end ) = ( $1, $2 );
+ my ( $t1, $t2 ) = ( "", "" );
+ ( $t1, @env ) = translate_buffer( $self, $begin, $no_wrap, @env );
+ ( $t2, @env ) = translate_buffer( $self, $end, $no_wrap, @env );
+
+ print STDERR "($t1$t2,@env)\n"
+ if ( $debug{'translate_buffer'} );
+ return ( $t1 . $t2, @env );
+ }
+
+ # detect \begin and \end (if they are not commented)
+ if (
+ $buffer =~ /^((?:.*?\n)? # $1 is
+ (?:[^%] # either not a %
+ | # or
+ (?<!\\)(?:\\\\)*\\%)*? # a % preceded by an odd nb of \
+ ) # $2 is a \begin{ with the end of the line
+ (${RE_ESCAPE}(?:begin|end)\{.*)$/sx
+ and length $1
+ )
+ {
+ my ( $begin, $end ) = ( $1, $2 );
+ my ( $t1, $t2 ) = ( "", "" );
+ if ( is_closed($begin) ) {
+ ( $t1, @env ) = translate_buffer( $self, $begin, $no_wrap, @env );
+ ( $t2, @env ) = translate_buffer( $self, $end, $no_wrap, @env );
+
+ print STDERR "($t1$t2,@env)\n"
+ if ( $debug{'translate_buffer'} );
+ return ( $t1 . $t2, @env );
+ }
+ }
+
+ # remove comments from the buffer.
+ # Comments are stored in an array and shown as comments in the PO.
+ while ( $buffer =~ m/($RE_PRE_COMMENT)$RE_COMMENT([^\n]*)(\n[ \t]*)(.*)$/s ) {
+ my $comment = $2;
+ my $end = "";
+ if ( $4 =~ m/^\n/s and $buffer !~ m/^$RE_COMMENT/s ) {
+
+ # a line with comments, followed by an empty line.
+ # Keep the empty line, but remove the comment.
+ # This is an empirical heuristic, but seems to work;)
+ $end = "\n";
+ }
+ if ( defined $comment and $comment !~ /^\s*$/s ) {
+ push @comments, $comment;
+ }
+ $buffer =~ s/($RE_PRE_COMMENT)$RE_COMMENT([^\n]*)(\n[ \t]*)/$1$end/s;
+ }
+
+ # translate leading commands.
+ do {
+ # keep the leading space to put them back after the translation of
+ # the command.
+ my $spaces = "";
+ if ( $buffer =~ /^(\s+)(.*?)$/s ) {
+ $spaces = $1;
+
+ # $buffer = $2; # FIXME: this also remove trailing spaces!!
+ $buffer =~ s/^\s*//s;
+ }
+ my $buffer_save = $buffer;
+ ( $command, $variant, $args, $buffer ) = get_leading_command( $self, $buffer );
+ if (
+ ( length $command )
+ and ( defined $separated_command{$command} )
+ and ( $separated_command{$command} eq '-' )
+ and ( ( not( defined($buffer) ) )
+ or ( $buffer !~ m/^\s*$/s ) )
+ )
+ {
+ # This command can be separated only if alone on a buffer.
+ # We need to remove the trailing commands first, and see if it
+ # will be alone on this buffer.
+ $buffer = $buffer_save;
+ $command = "";
+ }
+ if ( length($command) ) {
+
+ # call the command subroutine.
+ # These command subroutines will probably call translate_buffer
+ # with the content of each argument that need a translation.
+ if ( defined( $commands{$command} ) ) {
+ ( $t, @env ) = &{ $commands{$command} }( $self, $command, $variant, $args, \@env, $no_wrap );
+ $translated_buffer .= $spaces . $t;
+
+ # Handle spaces after a command.
+ $spaces = "";
+ if ( $buffer =~ /^(\s+)(.*?)$/s ) {
+ $spaces = $1;
+
+ # $buffer = $2; # FIXME: this also remove trailing spaces!!
+ $buffer =~ s/^\s*//s;
+ }
+ $translated_buffer .= $spaces;
+ } else {
+ die wrap_ref_mod( $self->{ref}, "po4a::tex", dgettext( "po4a", "Unknown command: '%s'" ), $command );
+ }
+ } else {
+ $buffer = $spaces . $buffer;
+ }
+ } while ( length($command) );
+
+ # array of trailing commands, which will be translated later.
+ my @trailing_commands = ();
+ do {
+ my $spaces = "";
+ if ( $buffer =~ /^(.*?)(\s+)$/s ) {
+ $buffer = $1;
+ $spaces = $2;
+ }
+ my $buffer_save = $buffer;
+ ( $command, $variant, $args, $buffer ) = get_trailing_command( $self, $buffer );
+ if (
+ ( length $command )
+ and ( defined $separated_command{$command} )
+ and ( $separated_command{$command} eq '-' )
+ and ( ( not defined $buffer )
+ or ( $buffer !~ m/^\s*$/s ) )
+ )
+ {
+ # We can extract this command.
+ $command = "";
+ $buffer = $buffer_save;
+ }
+ if ( length($command) ) {
+ unshift @trailing_commands, ( $command, $variant, $args, $spaces );
+ } else {
+ $buffer .= $spaces;
+ }
+ } while ( length($command) );
+
+ # Now, $buffer is just a block that can be translated.
+
+ # environment specific treatment
+ if ( @env and defined $env_separators{ $env[-1] } ) {
+ my $re_separator = $env_separators{ $env[-1] };
+ my $buf_begin = "";
+
+ # FIXME: the separator may have to be translated.
+ while ( $buffer =~ m/^(.*?)(\s*$re_separator\s*)(.*)$/s ) {
+ my ( $begin, $sep, $end ) = ( $1, $2, $3 );
+ $buf_begin .= $begin;
+ if ( is_closed($buf_begin) ) {
+ my $t = "";
+ ( $t, @env ) = translate_buffer( $self, $buf_begin, $no_wrap, @env );
+ $translated_buffer .= $t . $sep;
+ $buf_begin = "";
+ } else {
+
+ # the command is in a command argument
+ $buf_begin .= $sep;
+ }
+ $buffer = $end;
+ }
+ $buffer = $buf_begin . $buffer;
+ }
+
+ # finally, translate
+ if ( length($buffer) ) {
+ my $wrap = 1;
+ my ( $e1, $e2 );
+ NO_WRAP_LOOP: foreach $e1 (@env) {
+ foreach $e2 ( split( ' ', $no_wrap_environments ) ) {
+ if ( $e1 eq $e2 ) {
+ $wrap = 0;
+ last NO_WRAP_LOOP;
+ }
+ }
+ }
+ $wrap = 0 if ( defined $no_wrap and $no_wrap == 1 );
+
+ # Keep spaces at the end of the buffer.
+ my $spaces_end = "";
+ if ( $buffer =~ /^(.*?)(\s+)$/s ) {
+ $spaces_end = $2;
+ $buffer = $1;
+ }
+ if ( $wrap and $buffer =~ s/^(\s+)//s ) {
+ $translated_buffer .= $1;
+ }
+ $translated_buffer .=
+ $self->translate( $buffer, $self->{ref}, @env ? $env[-1] : "Plain text", "wrap" => $wrap );
+
+ # Restore spaces at the end of the buffer.
+ $translated_buffer .= $spaces_end;
+ }
+
+ # append the translation of the trailing commands
+ while (@trailing_commands) {
+ my $command = shift @trailing_commands;
+ my $variant = shift @trailing_commands;
+ my $args = shift @trailing_commands;
+ my $spaces = shift @trailing_commands;
+ if ( defined( $commands{$command} ) ) {
+ ( $t, @env ) = &{ $commands{$command} }( $self, $command, $variant, $args, \@env, $no_wrap );
+ $translated_buffer .= $t . $spaces;
+ } else {
+ die wrap_ref_mod( $self->{ref}, "po4a::tex", dgettext( "po4a", "Unknown command: '%s'" ), $command );
+ }
+ }
+
+ print STDERR "($translated_buffer,@env)\n"
+ if ( $debug{'translate_buffer'} );
+ return ( $translated_buffer, @env );
+}
+
+################################
+#### EXTERNAL CUSTOMIZATION ####
+################################
+
+=item B<read>
+
+Overloads Transtractor's read().
+
+=cut
+
+sub read ($$$$) {
+ my $self = shift;
+ my $filename = shift;
+ my $refname = shift;
+ my $charset = shift;
+
+ # keep the directory name of the main file.
+ $my_dirname = dirname($filename);
+
+ push @{ $self->{TT}{doc_in} }, read_file( $self, $filename, $refname, $charset );
+}
+
+=item B<read_file>
+
+Recursively read a file, appending included files which are not listed in the
+@exclude_include array. Included files are searched using the B<kpsewhich>
+command from the Kpathsea library.
+
+Except from the file inclusion part, it is a cut and paste from
+Transtractor's read.
+
+=cut
+
+# TODO: fix DOS end of lines
+sub read_file {
+ my $self = shift;
+ my $filename = shift
+ or croak wrap_mod( "po4a::tex", dgettext( "po4a", "Cannot read from file without having a filename" ) );
+ my $refname = shift // $filename;
+ my $charset = shift || 'UTF-8';
+ my $linenum = 0;
+ my @entries = ();
+
+ open( my $in, "<:encoding($charset)", $filename )
+ or croak wrap_mod( "po4a::tex", dgettext( "po4a", "Cannot read from %s: %s" ), $filename, $! );
+ while ( defined( my $textline = <$in> ) ) {
+ $linenum++;
+ my $ref = "$refname:$linenum";
+
+ # TODO: add support for includeonly
+ # The next regular expression matches \input or \includes that are
+ # not commented (but can be preceded by a \%.
+ while (
+ $textline =~ /^((?:[^%]|(?<!\\)(?:\\\\)*\\%)*)
+ \\(include|input)
+ \{([^\{]*)\}(.*)$/x
+ )
+ {
+ my ( $begin, $newfilename, $end ) = ( $1, $3, $4 );
+ my $tag = $2;
+ my $include = 1;
+ foreach my $f (@exclude_include) {
+ if ( $f eq $newfilename ) {
+ $include = 0;
+ $begin .= "\\$tag" . "{$newfilename}";
+ $textline = $end;
+ last;
+ }
+ }
+ if ( $include and ( $tag eq "include" ) ) {
+ $begin .= "\\clearpage";
+ }
+ if ( $begin !~ /^\s*$/ ) {
+ push @entries, ( $begin, $ref );
+ }
+ if ($include) {
+
+ # search the file
+ open( KPSEA, "kpsewhich " . $newfilename . " |" );
+ my $newfilepath = <KPSEA>;
+
+ if ( $newfilename ne "" and ( $newfilepath // '' ) eq '' ) {
+ die wrap_mod(
+ "po4a::tex",
+ dgettext(
+ "po4a",
+ "Cannot find '%s' with kpsewhich. To prevent this file to be included, add '-o exclude_include=%s' to the options, either on the command line or in your po4a.conf file."
+ ),
+ $newfilename,
+ $newfilename
+ );
+ }
+
+ push @entries, read_file( $self, $newfilepath, $newfilename, $charset );
+ if ( $tag eq "include" ) {
+ $textline = "\\clearpage" . $end;
+ } else {
+ $textline = $end;
+ }
+ }
+ }
+ if ( length($textline) ) {
+ my @entry = ( $textline, $ref );
+ push @entries, @entry;
+ }
+ }
+ close $in
+ or croak wrap_mod( "po4a::tex", dgettext( "po4a", "Cannot close %s after reading: %s" ), $filename, $! );
+
+ return @entries;
+}
+
+=back
+
+
+=over 4
+
+=item B<parse_definition_file>
+
+Subroutine for parsing a file with po4a directives (definitions for
+new commands).
+
+=cut
+
+sub parse_definition_file {
+ my ( $self, $filename, $only_try ) = @_;
+ my $filename_org = $filename;
+
+ open( KPSEA, "kpsewhich " . $filename . " |" );
+ $filename = <KPSEA>;
+
+ if ( not defined $filename ) {
+ warn wrap_mod( "po4a::tex", dgettext( "po4a", "kpsewhich cannot find %s" ), $filename_org );
+ if ( defined $only_try && $only_try ) {
+ return;
+ } else {
+ exit 1;
+ }
+ }
+
+ if ( !open( IN, "<$filename" ) ) {
+ warn wrap_mod( "po4a::tex", dgettext( "po4a", "Cannot open %s: %s" ), $filename, $! );
+ if ( defined $only_try && $only_try ) {
+ return;
+ } else {
+ exit 1;
+ }
+ }
+ while (<IN>) {
+ if (/^\s*%\s*po4a\s*:/) {
+ parse_definition_line( $self, $_ );
+ }
+ }
+}
+
+=item B<parse_definition_line>
+
+Parse a definition line of the form "% po4a: ".
+
+See the B<INLINE CUSTOMIZATION> section for more details.
+
+=cut
+
+sub parse_definition_line {
+ my ( $self, $line ) = @_;
+ $line =~ s/^\s*%\s*po4a\s*:\s*//;
+
+ if ( $line =~ /^command\s+([-*+]?)(\w+)\s+(.*)$/ ) {
+ my $command = $2;
+ $line = $3;
+ if ($1) {
+ $separated_command{$command} = $1;
+ }
+ if ( $line =~ /^alias\s+(\w+)\s*$/ ) {
+ if ( defined( $commands{$1} ) ) {
+ $commands{$command} = $commands{$1};
+ $command_parameters{$command} = $command_parameters{$1};
+ } else {
+ die wrap_mod( "po4a::tex", dgettext( "po4a", "Cannot use an alias to the unknown command '%s'" ), $2 );
+ }
+ } elsif ( $line =~ /^(-1|\d+),(-1|\d+),(-1|[ 0-9]*),(-1|[ 0-9]*?)\s*$/ ) {
+ die wrap_ref_mod(
+ $self->{ref},
+ "po4a::tex",
+ dgettext(
+ "po4a", "You are using the old definitions format (%s). Please update this definition line."
+ ),
+ $_[1]
+ );
+ } elsif ( $line =~ m/^((?:\{_?\}|\[_?\])*)\s*$/ ) {
+ register_generic_command("$command,$1");
+ }
+ } elsif ( $line =~ /^environment\s+([+]?\w+\*?)(.*)$/ ) {
+ my $env = $1;
+ $line = $2;
+ if ( $line =~ m/^\s*((?:\{_?\}|\[_?\])*)\s*$/ ) {
+ register_generic_environment("$env,$1");
+ }
+ } elsif ( $line =~ /^separator\s+(\w+(?:\[#[0-9]+\])?)\s+\"(.*)\"\s*$/ ) {
+ my $env = $1; # This is not necessarily an environment.
+ # It can also be something like 'title[#1]'.
+ $env_separators{$env} = $2;
+ } elsif ( $line =~ /^verbatim\s+environment\s+(\w+)\s*$/ ) {
+ register_verbatim_environment($1);
+ }
+}
+
+=item B<is_closed>
+
+=cut
+
+sub is_closed {
+ my $paragraph = shift;
+
+ # FIXME: [ and ] are more difficult to handle, because it is not easy to detect if it introduce an optional argument
+ my $tmp = $paragraph;
+ my $closing = 0;
+ my $opening = 0;
+
+ # FIXME: { and } should not be counted in verbatim blocks
+ # Remove comments
+ $tmp =~ s/$RE_PRE_COMMENT$RE_COMMENT.*//mg;
+ while ( $tmp =~ /^.*?$RE_PRE_COMMENT\{(.*)$/s ) {
+ $opening += 1;
+ $tmp = $1;
+ }
+ $tmp = $paragraph;
+
+ # Remove comments
+ $tmp =~ s/$RE_PRE_COMMENT$RE_COMMENT.*//mg;
+ while ( $tmp =~ /^.*?$RE_PRE_COMMENT\}(.*)$/s ) {
+ $closing += 1;
+ $tmp = $1;
+ }
+ return $opening eq $closing;
+}
+
+sub in_verbatim {
+ foreach my $e1 (@_) {
+ foreach my $e2 ( split( ' ', $verbatim_environments ) ) {
+ if ( $e1 eq $e2 ) {
+ return 1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+#############################
+#### MAIN PARSE FUNCTION ####
+#############################
+
+=item B<parse>
+
+=cut
+
+sub parse {
+ my $self = shift;
+ my ( $line, $ref );
+ my $paragraph = ""; # Buffer where we put the paragraph while building
+ my @env = (); # environment stack
+ my $t = "";
+
+ LINE:
+ undef $self->{type};
+ ( $line, $ref ) = $self->shiftline();
+
+ while ( defined($line) ) {
+ chomp($line);
+ $self->{ref} = "$ref";
+
+ if ( $line =~ /^\s*%\s*po4a\s*:/ ) {
+ parse_definition_line( $self, $line );
+ goto LINE;
+ }
+
+ my $closed = is_closed($paragraph);
+
+ #FIXME: what happens if a \begin{verbatim} or \end{verbatim} is in the
+ # middle of a line. (This is only an issue if the verbatim
+ # environment contains an un-closed bracket)
+ if (
+ (
+ $closed and ( $line =~ /^\s*$/
+ or $line =~ /^\s*$RE_VERBATIM\s*$/ )
+ )
+ or ( in_verbatim(@env) and $line =~ /^\s*\Q$ESCAPE\Eend\{$env[-1]\}\s*$/ )
+ )
+ {
+ # An empty line. This indicates the end of the current
+ # paragraph.
+ $paragraph .= $line . "\n";
+ if ( length($paragraph) ) {
+ ( $t, @env ) = translate_buffer( $self, $paragraph, undef, @env );
+ $self->pushline($t);
+ $paragraph = "";
+ @comments = ();
+ }
+ } else {
+
+ # continue the same paragraph
+ $paragraph .= $line . "\n";
+ }
+
+ # Reinit the loop
+ ( $line, $ref ) = $self->shiftline();
+ undef $self->{type};
+ }
+
+ if ( length($paragraph) ) {
+ ( $t, @env ) = translate_buffer( $self, $paragraph, undef, @env );
+ $self->pushline($t);
+ $paragraph = "";
+ }
+} # end of parse
+
+=item B<docheader>
+
+=back
+
+=cut
+
+sub docheader {
+ return "% This file was generated with po4a. Translate the source file.\n" . "%\n";
+}
+
+####################################
+#### DEFINITION OF THE COMMANDS ####
+####################################
+
+=head1 INTERNAL FUNCTIONS used to write derivative parsers
+
+Command and environment functions take the following arguments (in
+addition to the $self object):
+
+=over
+
+=item A command name
+
+=item A variant
+
+=item An array of (type, argument) tuples
+
+=item The current environment
+
+=back
+
+The first 3 arguments are extracted by get_leading_command or
+get_trailing_command.
+
+Command and environment functions return the translation of the command
+with its arguments and a new environment.
+
+Environment functions are called when a \begin command is found. They are
+called with the \begin command and its arguments.
+
+The TeX module only proposes one command function and one environment
+function: generic_command and generic_environment.
+
+generic_command uses the information specified by
+register_generic_command or by adding definition to the TeX file:
+ % po4a: command I<command1> I<parameters>
+
+generic_environment uses the information specified by
+register_generic_environment or by adding definition to the TeX file:
+ % po4a: environment I<env> I<parameters>
+
+Both functions will only translate the parameters that were specified as
+translatable (with a '_').
+generic_environment will append the name of the environment to the
+environment stack and generic_command will append the name of the command
+followed by an identifier of the parameter (like {#7} or [#2]).
+
+=cut
+
+# definition of environment related commands
+
+$commands{'begin'} = sub {
+ my $self = shift;
+ my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift );
+ my $no_wrap = shift || 0;
+ print "begin($command,$variant,@$args,@$env,$no_wrap)="
+ if ( $debug{'commands'} || $debug{'environments'} );
+ my ( $t, @e ) = ( "", () );
+
+ my $envir = $args->[1];
+ if ( defined($envir) and $envir =~ /^(.*)\*$/ ) {
+ $envir = $1;
+ }
+
+ if ( defined($envir) && defined( $environments{$envir} ) ) {
+ ( $t, @e ) = &{ $environments{$envir} }( $self, $command, $variant, $args, $env, $no_wrap );
+ } else {
+ die wrap_ref_mod( $self->{ref}, "po4a::tex", dgettext( "po4a", "unknown environment: '%s'" ), $args->[1] );
+ }
+
+ print "($t, @e)\n"
+ if ( $debug{'commands'} || $debug{'environments'} );
+ return ( $t, @e );
+};
+
+# Use register_generic to set the type of arguments. The function is then
+# overwritten:
+register_generic_command("*end,{}");
+$commands{'end'} = sub {
+ my $self = shift;
+ my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift );
+ my $no_wrap = shift || 0;
+ print "end($command,$variant,@$args,@$env,$no_wrap)="
+ if ( $debug{'commands'} || $debug{'environments'} );
+
+ # verify that this environment was the last pushed environment.
+ if ( !@$env || @$env[-1] ne $args->[1] ) {
+
+ # a begin may have been hidden in the middle of a translated
+ # buffer. FIXME: Just warn for now.
+ warn wrap_ref_mod( $self->{'ref'}, "po4a::tex", dgettext( "po4a", "unmatched end of environment '%s'" ),
+ $args->[1] );
+ } else {
+ pop @$env;
+ }
+
+ my ( $t, @e ) = generic_command( $self, $command, $variant, $args, $env, $no_wrap );
+
+ print "($t, @$env)\n"
+ if ( $debug{'commands'} || $debug{'environments'} );
+ return ( $t, @$env );
+};
+$separated_command{'begin'} = '*';
+
+sub generic_command {
+ my $self = shift;
+ my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift );
+ my $no_wrap = shift || 0;
+ print "generic_command($command,$variant,@$args,@$env,$no_wrap)="
+ if ( $debug{'commands'} || $debug{'environments'} );
+
+ my ( $t, @e ) = ( "", () );
+ my $translated = "";
+
+ # the number of arguments is checked during the extraction of the
+ # arguments
+
+ if ( ( not( defined $separated_command{$command} ) )
+ or $separated_command{$command} ne '+' )
+ {
+ # Use the information from %command_parameters to only translate
+ # the needed parameters
+ $translated = "$ESCAPE$command$variant";
+
+ # handle arguments
+ my @arg_types = @{ $command_parameters{$command}{'types'} };
+ my @arg_translated = @{ $command_parameters{$command}{'translated'} };
+ my ( $type, $opt );
+ my @targs = @$args;
+ my $count = 0;
+ while (@targs) {
+ $type = shift @targs;
+ $opt = shift @targs;
+ my $have_to_be_translated = 0;
+ TEST_TYPE:
+ if ( $count >= scalar @arg_types ) {
+
+ # The number of arguments does not match,
+ # and a variable number of arguments was not specified
+ die wrap_ref_mod( $self->{ref}, "po4a::tex",
+ dgettext( "po4a", "Wrong number of arguments for " . "the '%s' command." ) . "\n", $command );
+ } elsif ( $type eq $arg_types[$count] ) {
+ $have_to_be_translated = $arg_translated[$count];
+ $count++;
+ } elsif ( $type eq '{' and $arg_types[$count] eq '[' ) {
+
+ # an optionnal argument was not provided,
+ # try with the next argument.
+ $count++;
+ goto TEST_TYPE;
+ } else {
+ my $reason =
+ dgettext( "po4a", "An optional argument " . "was provided, but a mandatory one " . "is expected." );
+ die wrap_ref_mod( $self->{ref}, "po4a::tex", dgettext( "po4a", "Command '%s': %s" ) . "\n",
+ $command, $reason );
+ }
+ if ($have_to_be_translated) {
+ ( $t, @e ) = translate_buffer( $self, $opt, $no_wrap,
+ ( @$env, $command . $type . "#" . $count . $type_end{$type} ) );
+ } else {
+ $t = $opt;
+ }
+ $translated .= $type . $t . $type_end{$type};
+ }
+ } else {
+
+ # Translate the command with all its arguments joined
+ my $tmp = "$ESCAPE$command$variant";
+ my ( $type, $opt );
+ while (@$args) {
+ $type = shift @$args;
+ $opt = shift @$args;
+ $tmp .= $type . $opt . $type_end{$type};
+ }
+ @e = @$env;
+ my $wrap = 1;
+ $wrap = 0 if ( defined $no_wrap and $no_wrap == 1 );
+ $translated = $self->translate( $tmp, $self->{ref}, @e ? $e[-1] : "Plain text", "wrap" => $wrap );
+ }
+
+ print "($translated, @$env)\n"
+ if ( $debug{'commands'} || $debug{'environments'} );
+ return ( $translated, @$env );
+}
+
+sub register_generic_command {
+ if ( $_[0] =~ m/^(.*),((\{_?\}|\[_?\]| _? )*)$/ ) {
+ my $command = $1;
+ my $arg_types = $2;
+ if ( $command =~ /^([-*+])(.*)$/ ) {
+ $command = $2;
+ $separated_command{$command} = $1;
+ }
+ my @types = ();
+ my @translated = ();
+ while ( defined $arg_types
+ and length $arg_types
+ and $arg_types =~ m/^(?:([\{\[ ])(_?)[\}\] ])(.*)$/ )
+ {
+ push @types, $1;
+ push @translated, ( $2 eq "_" ) ? 1 : 0;
+ $arg_types = $3;
+ }
+ $command_parameters{$command}{'types'} = \@types;
+ $command_parameters{$command}{'translated'} = \@translated;
+ $command_parameters{$command}{'nb_args'} = "";
+ $commands{$command} = \&generic_command;
+ } else {
+ die wrap_mod( "po4a::tex",
+ dgettext( "po4a", "register_generic_command: unsupported " . "format: '%s'." ) . "\n", $_[0] );
+ }
+}
+
+########################################
+#### DEFINITION OF THE ENVIRONMENTS ####
+########################################
+sub generic_environment {
+ my $self = shift;
+ my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift );
+ my $no_wrap = shift;
+ print "generic_environment($command,$variant,$args,$env,$no_wrap)="
+ if ( $debug{'environments'} );
+ my ( $t, @e ) = ( "", () );
+ my $translated = "";
+
+ # The first argument (the name of the environment is never translated)
+ # For the others, @types and @translated are used.
+ $translated = "$ESCAPE$command$variant";
+ my @targs = @$args;
+ my $type = shift @targs;
+ my $opt = shift @targs;
+ my $new_env = $opt;
+ $translated .= $type . $new_env . $type_end{$type};
+ if ( ( not( defined $separated_environment{$new_env} ) )
+ or $separated_environment{$new_env} ne '+' )
+ {
+ # Use the information from %command_parameters to only translate
+ # the needed parameters
+ my @arg_types = @{ $environment_parameters{$new_env}{'types'} };
+ my @arg_translated = @{ $environment_parameters{$new_env}{'translated'} };
+
+ my $count = 0;
+ while (@targs) {
+ $type = shift @targs;
+ $opt = shift @targs;
+ my $have_to_be_translated = 0;
+ TEST_TYPE:
+ if ( $count >= scalar @arg_types ) {
+ die wrap_ref_mod( $self->{ref}, "po4a::tex",
+ dgettext( "po4a", "Wrong number of arguments for " . "the '%s' command." ) . "\n", $command );
+ } elsif ( $type eq $arg_types[$count] ) {
+ $have_to_be_translated = $arg_translated[$count];
+ $count++;
+ } elsif ( $type eq '{' and $arg_types[$count] eq '[' ) {
+
+ # an optionnal argument was not provided,
+ # try with the next argument.
+ $count++;
+ goto TEST_TYPE;
+ } else {
+ my $reason =
+ dgettext( "po4a", "An optional argument " . "was provided, but a mandatory one " . "is expected." );
+ die wrap_ref_mod( $self->{ref}, "po4a::tex", dgettext( "po4a", "Command '%s': %s" ) . "\n",
+ $command, $reason );
+ }
+
+ if ($have_to_be_translated) {
+ ( $t, @e ) = translate_buffer( $self, $opt, $no_wrap,
+ ( @$env, $new_env . $type . "#" . $count . $type_end{$type} ) );
+ } else {
+ $t = $opt;
+ }
+ $translated .= $type . $t . $type_end{$type};
+
+ }
+ } else {
+
+ # Translate the \begin command with all its arguments joined
+ my ( $type, $opt );
+ my $buf = $translated;
+ while (@targs) {
+ $type = shift @targs;
+ $opt = shift @targs;
+ $buf .= $type . $opt . $type_end{$type};
+ }
+ @e = @$env;
+ my $wrap = 1;
+ $wrap = 0 if $no_wrap == 1;
+ $translated = $self->translate( $buf, $self->{ref}, @e ? $e[-1] : "Plain text", "wrap" => $wrap );
+ }
+ @e = ( @$env, $new_env );
+
+ print "($translated,@e)\n"
+ if ( $debug{'environments'} );
+ return ( $translated, @e );
+}
+
+sub check_arg_count {
+ my $self = shift;
+ my $command = shift;
+ my $args = shift;
+ my @targs = @$args;
+ my $check = 1;
+ my @remainder = ();
+ my $reason = "";
+ my ( $type, $arg );
+ my @arg_types;
+
+ if ( $command eq 'begin' ) {
+ $type = shift @targs;
+
+ # The name of the environment is mandatory
+ if ( ( not defined $type )
+ or ( $type ne '{' ) )
+ {
+ $reason = dgettext( "po4a", "The first argument of \\begin is mandatory." );
+ $check = 0;
+ }
+ my $env = shift @targs;
+ if ( not defined $environment_parameters{$env} ) {
+ die wrap_ref_mod( $self->{ref}, "po4a::tex", dgettext( "po4a", "unknown environment: '%s'" ), $env );
+ }
+ @arg_types = @{ $environment_parameters{$env}{'types'} };
+ } else {
+ @arg_types = @{ $command_parameters{$command}{'types'} };
+ }
+
+ my $count = 0;
+ while ( $check and @targs ) {
+ $type = shift @targs;
+ $arg = shift @targs;
+ TEST_TYPE:
+ if ( $count >= scalar @arg_types ) {
+
+ # Too many arguments some will remain
+ @remainder = ( $type, $arg, @targs );
+ last;
+ } elsif ( $type eq $arg_types[$count] ) {
+ $count++;
+ } elsif ( $type eq '{' and $arg_types[$count] eq '[' ) {
+
+ # an optionnal argument was not provided,
+ # try with the next argument.
+ $count++;
+ goto TEST_TYPE;
+ } else {
+ $check = 0;
+ $reason = dgettext( "po4a", "An optional argument was " . "provided, but a mandatory one is expected." );
+ }
+ }
+
+ return ( $check, $reason, \@remainder );
+}
+
+sub register_generic_environment {
+ print "register_generic_environment($_[0])\n"
+ if ( $debug{'environments'} );
+ if ( $_[0] =~ m/^(.*),((?:\{_?\}|\[_?\])*)$/ ) {
+ my $env = $1;
+ my $arg_types = $2;
+ if ( $env =~ /^([+])(.*)$/ ) {
+ $separated_environment{$2} = $1;
+ $env = $2;
+ }
+ my @types = ();
+ my @translated = ();
+ while ( defined $arg_types
+ and length $arg_types
+ and $arg_types =~ m/^(?:([\{\[])(_?)[\}\]])(.*)$/ )
+ {
+ push @types, $1;
+ push @translated, ( $2 eq "_" ) ? 1 : 0;
+ $arg_types = $3;
+ }
+ $environment_parameters{$env} = {
+ 'types' => \@types,
+ 'translated' => \@translated
+ };
+ $environments{$env} = \&generic_environment;
+ }
+}
+
+sub register_verbatim_environment {
+ my $env = shift;
+ $no_wrap_environments .= " $env";
+ $verbatim_environments .= " $env";
+ $RE_VERBATIM = "\\\\begin\\{(?:" . join( "|", split( / /, $verbatim_environments ) ) . ")\\*?\\}";
+ register_generic_environment("$env,")
+ unless ( defined $environments{$env} );
+}
+
+####################################
+### INITIALIZATION OF THE PARSER ###
+####################################
+sub initialize {
+ my $self = shift;
+ my %options = @_;
+
+ $self->{options}{'definitions'} = '';
+ $self->{options}{'exclude_include'} = '';
+ $self->{options}{'no_wrap'} = '';
+ $self->{options}{'verbatim'} = '';
+ $self->{options}{'debug'} = '';
+ $self->{options}{'verbose'} = '';
+ $self->{options}{'no-warn'} = 0; # TexInfo option to not warn about the state of the module
+
+ %debug = ();
+
+ # FIXME: %commands and %separated_command should also be restored to their
+ # default values.
+
+ foreach my $opt ( keys %options ) {
+ if ( $options{$opt} ) {
+ die wrap_mod( "po4a::tex", dgettext( "po4a", "Unknown option: %s" ), $opt )
+ unless exists $self->{options}{$opt};
+ $self->{options}{$opt} = $options{$opt};
+ }
+ }
+
+ if ( $options{'debug'} ) {
+ foreach ( $options{'debug'} ) {
+ $debug{$_} = 1;
+ }
+ }
+
+ if ( $options{'exclude_include'} ) {
+ foreach ( split( /:/, $options{'exclude_include'} ) ) {
+ push @exclude_include, $_;
+ }
+ }
+
+ if ( $options{'no_wrap'} ) {
+ foreach ( split( /,/, $options{'no_wrap'} ) ) {
+ $no_wrap_environments .= " $_";
+ register_generic_environment("$_,")
+ unless ( defined $environments{$_} );
+ }
+ }
+
+ if ( $options{'verbatim'} ) {
+ foreach ( split( /,/, $options{'verbatim'} ) ) {
+ register_verbatim_environment($_);
+ }
+ }
+
+ if ( $options{'definitions'} ) {
+ $self->parse_definition_file( $options{'definitions'} );
+ }
+}
+
+=head1 STATUS OF THIS MODULE
+
+This module needs more tests.
+
+It was tested on a book and with the Python documentation.
+
+=head1 TODO LIST
+
+=over 4
+
+=item Automatic detection of new commands
+
+The TeX module could parse the newcommand arguments and try to guess the
+number of arguments, their type and whether or not they should be
+translated.
+
+=item Translation of the environment separator
+
+When \item is used as an environment separator, the item argument is
+attached to the following string.
+
+=item Some commands should be added to the environment stack
+
+These commands should be specified by couples.
+This can be used to specify commands beginning or ending a verbatim
+environment.
+
+=item Others
+
+Various other points are tagged TODO in the source.
+
+=back
+
+=head1 KNOWN BUGS
+
+Various points are tagged FIXME in the source.
+
+=head1 SEE ALSO
+
+L<Locale::Po4a::LaTeX(3pm)|Locale::Po4a::LaTeX>,
+L<Locale::Po4a::TransTractor(3pm)|Locale::Po4a::TransTractor>,
+L<po4a(7)|po4a.7>
+
+=head1 AUTHORS
+
+ Nicolas François <nicolas.francois@centraliens.net>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright © 2004, 2005 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
+
+1;
+
+__END__
+
+# LocalWords: Charset charset po UTF gettext msgid nostrip LaTeX
diff --git a/lib/Locale/Po4a/Texinfo.pm b/lib/Locale/Po4a/Texinfo.pm
new file mode 100644
index 0000000..ca8b289
--- /dev/null
+++ b/lib/Locale/Po4a/Texinfo.pm
@@ -0,0 +1,613 @@
+#!/usr/bin/perl -w
+
+# Copyright © 2004-2007 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>
+#
+# This file is part of po4a.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with po4a; if not, write to the Free Software
+# Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+########################################################################
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Texinfo - convert Texinfo documents and derivates from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::Texinfo is a module to help the translation of Texinfo documents into
+other [human] languages.
+
+This module contains the definitions of common Texinfo commands and environments.
+
+Only the comments starting with 'TRANSLATORS' are added to the PO files to guide the translators.
+
+=head1 STATUS OF THIS MODULE
+
+This module is still beta and not ready for production use.
+Please send patches to contribute, not bug reports as we don't know how to deal with them.
+
+=head1 OPTIONS ACCEPTED BY THIS MODULE
+
+These are this module's particular options:
+
+=over 4
+
+=item no-warn
+
+Do not warn about the current state of this module.
+
+=back
+
+=head1 SEE ALSO
+
+L<Locale::Po4a::TeX(3pm)|Locale::Po4a::TeX>,
+L<Locale::Po4a::TransTractor(3pm)|Locale::Po4a::TransTractor>,
+L<po4a(7)|po4a.7>
+
+=head1 AUTHORS
+
+ Nicolas François <nicolas.francois@centraliens.net>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright © 2004-2007 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
+
+package Locale::Po4a::Texinfo;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+require Exporter;
+use vars qw($VERSION @ISA @EXPORT);
+$VERSION = $Locale::Po4a::TeX::VERSION;
+@ISA = qw(Locale::Po4a::TeX);
+@EXPORT = qw();
+
+use Locale::Po4a::Common;
+use Locale::Po4a::TeX;
+use subs qw(
+ &parse_definition_file
+ &register_generic_command &is_closed &translate_buffer
+ &register_verbatim_environment
+ &generic_command
+ &in_verbatim);
+*parse_definition_file = \&Locale::Po4a::TeX::parse_definition_file;
+*register_generic_command = \&Locale::Po4a::TeX::register_generic_command;
+*register_verbatim_environment = \&Locale::Po4a::TeX::register_verbatim_environment;
+*generic_command = \&Locale::Po4a::TeX::generic_command;
+*is_closed = \&Locale::Po4a::TeX::is_closed;
+*in_verbatim = \&Locale::Po4a::TeX::in_verbatim;
+*translate_buffer = \&Locale::Po4a::TeX::translate_buffer;
+use vars qw($RE_ESCAPE $ESCAPE
+ $RE_VERBATIM
+ $RE_COMMENT $RE_PRE_COMMENT
+ $no_wrap_environments $separated_commands
+ %commands %environments
+ %command_categories %separated
+ %env_separators %debug
+ %translate_buffer_env
+ @exclude_include @comments);
+*RE_ESCAPE = \$Locale::Po4a::TeX::RE_ESCAPE;
+*ESCAPE = \$Locale::Po4a::TeX::ESCAPE;
+*RE_VERBATIM = \$Locale::Po4a::TeX::RE_VERBATIM;
+*RE_COMMENT = \$Locale::Po4a::TeX::RE_COMMENT;
+*RE_PRE_COMMENT = \$Locale::Po4a::TeX::RE_PRE_COMMENT;
+*no_wrap_environments = \$Locale::Po4a::TeX::no_wrap_environments;
+*separated_commands = \$Locale::Po4a::TeX::separated_commands;
+*commands = \%Locale::Po4a::TeX::commands;
+*environments = \%Locale::Po4a::TeX::environments;
+*command_categories = \%Locale::Po4a::TeX::command_categories;
+*separated = \%Locale::Po4a::TeX::separated;
+*env_separators = \%Locale::Po4a::TeX::env_separators;
+*debug = \%Locale::Po4a::TeX::debug;
+*translate_buffer_env = \%Locale::Po4a::TeX::translate_buffer_env;
+*exclude_include = \@Locale::Po4a::TeX::exclude_include;
+*comments = \@Locale::Po4a::TeX::comments;
+
+$ESCAPE = "\@";
+$RE_ESCAPE = "\@";
+$RE_VERBATIM = "\@example";
+$RE_COMMENT = "\\\@(?:c|comment)\\b";
+$RE_PRE_COMMENT = "(?<!\@)(?:\@\@)*";
+
+my %break_line = ();
+
+# translate_line_command indicate if the arguments to the command handled
+# by line_command() should be translated:
+# undefined: arguments are not translated
+# 0: there should be no arguments
+# 1: arguments should be translated
+my %translate_line_command = ();
+
+foreach (
+ qw/example smallexample tex display smalldisplay verbatim format smallformat
+ flushleft flushright lisp smalllisp ignore/
+ )
+{
+ register_verbatim_environment($_);
+ $commands{$_} = \&environment_line_command;
+ $translate_line_command{$_} = 0; # There should be no arguments
+ $break_line{$_} = 1;
+}
+
+my $docheader_pushed = 0;
+
+# The header shall not be written before the Texinfo header (which include
+# the \input command that define the texinfo macros)
+sub docheader {
+ return "";
+}
+
+sub push_docheader {
+ return if $docheader_pushed;
+ my $self = shift;
+ $self->pushline(<<END);
+\@c ===========================================================================
+\@c
+\@c This file was generated with po4a. Translate the source file.
+\@c
+\@c ===========================================================================
+
+END
+ $docheader_pushed = 1;
+}
+
+sub parse {
+ my $self = shift;
+ my ( $line, $ref );
+ my $paragraph = ""; # Buffer where we put the paragraph while building
+ my @env = (); # environment stack
+ my $t = "";
+ $docheader_pushed = 0;
+
+ print STDERR "The TexInfo module of po4a is not ready for production use, and needs a new maintainer.\n"
+ . "Please contact the po4a team if you want to help: send us patches, not bug reports.\n"
+ . "(use -o no-warn to remove this message)\n"
+ unless $self->{options}{'no-warn'};
+ LINE:
+ undef $self->{type};
+ ( $line, $ref ) = $self->shiftline();
+
+ while ( defined($line) ) {
+ chomp($line);
+ $self->{ref} = "$ref";
+
+ if ( $line =~ /^\s*@\s*po4a\s*:/ ) {
+ parse_definition_line( $self, $line );
+ goto LINE;
+ }
+
+ my $closed = 1;
+ if ( !in_verbatim(@env) ) {
+ $closed = is_closed($paragraph);
+ }
+
+ # if (not $closed) {
+ # print "not closed. line: '$line'\n para: '$paragraph'\n";
+ # }
+
+ if ( $closed and $line =~ /^\s*$/ ) {
+
+ # An empty line. This indicates the end of the current
+ # paragraph.
+ $paragraph .= $line . "\n";
+ if ( length($paragraph) ) {
+ ( $t, @env ) = translate_buffer( $self, $paragraph, undef, @env );
+ $self->pushline($t);
+ $paragraph = "";
+ }
+ } elsif ( $line =~ m/^\\input / ) {
+ if ( length($paragraph) ) {
+ ( $t, @env ) = translate_buffer( $self, $paragraph, undef, @env );
+ $self->pushline($t);
+ $paragraph = "";
+ }
+ $self->pushline( $line . "\n" );
+ $self->push_docheader();
+ } elsif ( $line =~ m/^$RE_COMMENT/ ) {
+ if ( $line =~ m/^\@(?:c|comment).*?TRANSLATORS:(.*)$/ ) {
+ $self->add_comment($1);
+ }
+ $self->push_docheader();
+ $self->pushline( $line . "\n" );
+ } elsif ( $closed
+ and ( $line =~ /^@([^ ]*?)(?: +(.*))?$/ )
+ and ( defined $commands{$1} )
+ and ( $break_line{$1} ) )
+ {
+ if ( length($paragraph) ) {
+ ( $t, @env ) = translate_buffer( $self, $paragraph, undef, @env );
+ $self->pushline($t);
+ $paragraph = "";
+ }
+ my $arg = $2;
+ my @args = ();
+ if ( defined $arg and length $arg ) {
+
+ # FIXME: keep the spaces ?
+ $arg =~ s/\s*$//s;
+ @args = ( " ", $arg );
+ }
+ ( $t, @env ) = &{ $commands{$1} }( $self, $1, "", \@args, \@env, 1 );
+ $self->pushline( $t . "\n" );
+ } else {
+
+ # continue the same paragraph
+ $paragraph .= $line . "\n";
+ }
+
+ # Reinit the loop
+ ( $line, $ref ) = $self->shiftline();
+ undef $self->{type};
+ }
+
+ if ( length($paragraph) ) {
+ ( $t, @env ) = translate_buffer( $self, $paragraph, undef, @env );
+ $self->pushline($t);
+ $paragraph = "";
+ }
+} # end of parse
+
+sub line_command {
+ my $self = shift;
+ my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift );
+ my $no_wrap = shift;
+ print "line_command($command,$variant,@$args,@$env,$no_wrap)="
+ if ( $debug{'commands'} );
+
+ my $translated = $ESCAPE . $command;
+ my $line = $args->[1];
+ if ( defined $line and length $line ) {
+ if ( defined $translate_line_command{$command}
+ and $translate_line_command{$command} )
+ {
+ # $no_wrap could be forced to 1, but it should already be set
+ my ( $t, $e ) = $self->translate_buffer( $line, $no_wrap, @$env, $command );
+ $translated .= " " . $t;
+ } else {
+ $translated .= " " . $line;
+ }
+ }
+ print "($translated,@$env)\n"
+ if ( $debug{'commands'} );
+ return ( $translated, @$env );
+}
+
+sub defindex_line_command {
+ my $self = shift;
+ my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift );
+ my $no_wrap = shift;
+ print "line_command($command,$variant,@$args,@$env,$no_wrap)="
+ if ( $debug{'commands'} );
+ my $idx = $$args[1] . "index";
+ $commands{$idx} = \&line_command;
+ $break_line{$idx} = 1;
+ $translate_line_command{$idx} = 1;
+
+ return line_command( $self, $command, $variant, $args, $env, $no_wrap );
+}
+
+sub translate_buffer_menu {
+ my ( $self, $buffer, $no_wrap, @env ) = ( shift, shift, shift, @_ );
+ print STDERR "translate_buffer_menu($buffer,$no_wrap,@env)="
+ if ( $debug{'translate_buffer'} );
+
+ my $translated_buffer = "";
+ my $spaces = "";
+ if ( $buffer =~ m/(\s*)$/s ) {
+ $spaces = $1;
+ }
+
+ while ( $buffer =~ m/^(.*?)((?:\n|^)\* )(.*)$/s ) {
+ my $sep = $2;
+ $buffer = $3;
+ my ( $t, @e ) = $self->translate_buffer_menuentry( $1, $no_wrap, @env, "menuentry" );
+ $translated_buffer .= $t . $sep;
+ }
+ my ( $t, @e ) = $self->translate_buffer_menuentry( $buffer, $no_wrap, @env, "menuentry" );
+ $translated_buffer .= $t;
+
+ $translated_buffer .= $spaces;
+
+ print STDERR "($translated_buffer,@env)\n"
+ if ( $debug{'translate_buffer'} );
+ return ( $translated_buffer, @env );
+}
+$translate_buffer_env{"menu"} = \&translate_buffer_menu;
+$translate_buffer_env{"detailmenu"} = \&translate_buffer_menu;
+$translate_buffer_env{"direntry"} = \&translate_buffer_menu;
+
+my $menu_width = 78;
+my $menu_sep_width = 30;
+
+sub translate_buffer_menuentry {
+ my ( $self, $buffer, $no_wrap, @env ) = ( shift, shift, shift, @_ );
+ print STDERR "translate_buffer_menuentry($buffer,$no_wrap,@env)="
+ if ( $debug{'translate_buffer'} );
+
+ my $translated_buffer = "";
+
+ if ( $buffer =~ m/^(.*?)(::)(?:\s+(.*))?$/s
+ or $buffer =~ m/^(.*?: .*?)(\.)\s+(.*)$/s )
+ {
+ my ( $name, $sep, $description ) = ( $1, $2, $3 );
+ my ( $t, @e ) = $self->translate_buffer( $name, $no_wrap, @env );
+ $translated_buffer = $t . $sep . " ";
+ my $l = length($translated_buffer) + 2;
+ if ( $l < $menu_sep_width - 1 ) {
+ $translated_buffer .= ' ' x ( $menu_sep_width - 1 - $l );
+ $l = $menu_sep_width - 1;
+ }
+ if ($description) {
+ ( $t, @e ) = $self->translate_buffer( $description, $no_wrap, @env );
+ }
+
+ # Replace newlines with space for proper wrapping
+ # See https://github.com/mquinson/po4a/issues/122
+ $t =~ s/\n/ /sg;
+
+ # Remove trailing spaces
+ $t =~ s/\s*$//;
+ $t = Locale::Po4a::Po::wrap( $t, $menu_width - $l - 2 );
+ my $spaces = ' ' x ( $l + 2 );
+ $t =~ s/\n/\n$spaces/sg;
+ $translated_buffer .= $t;
+ } else {
+
+ # FIXME: no-wrap if a line start by a space
+ my ( $t, @e ) = $self->translate_buffer( $buffer, $no_wrap, @env );
+ $translated_buffer = $t;
+ }
+
+ print STDERR "($translated_buffer,@env)\n"
+ if ( $debug{'translate_buffer'} );
+ return ( $translated_buffer, @env );
+}
+
+sub translate_buffer_ignore {
+ my ( $self, $buffer, $no_wrap, @env ) = ( shift, shift, shift, @_ );
+ print STDERR "translate_buffer_ignore($buffer,$no_wrap,@env);\n"
+ if ( $debug{'translate_buffer'} );
+ return ( $buffer, @env );
+}
+$translate_buffer_env{"ignore"} = \&translate_buffer_ignore;
+
+foreach (
+ qw(appendix section cindex findex kindex opindex pindex tindex vindex subsection
+ dircategory subtitle include
+ exdent center unnumberedsec
+ heading unnumbered unnumberedsubsec
+ unnumberedsubsubsec appendixsec appendixsubsec
+ appendixsubsubsec majorheading chapheading subheading
+ subsubheading shorttitlepage
+ subsubsection top item itemx chapter settitle
+ title author)
+ )
+{
+ $commands{$_} = \&line_command;
+ $break_line{$_} = 1;
+ $translate_line_command{$_} = 1;
+}
+foreach (
+ qw(c comment clear set setfilename setchapternewpage vskip synindex
+ syncodeindex need fonttextsize printindex headings finalout sp
+ definfoenclose)
+ )
+{
+ $commands{$_} = \&line_command;
+ $break_line{$_} = 1;
+}
+foreach (qw(defcodeindex defindex)) {
+ $commands{$_} = \&defindex_line_command;
+ $break_line{$_} = 1;
+}
+
+# definfoenclose: command definition => translate?
+foreach (
+ qw(insertcopying page bye summarycontents shortcontents contents
+ noindent)
+ )
+{
+ $commands{$_} = \&line_command;
+ $break_line{$_} = 1;
+ $translate_line_command{$_} = 0;
+}
+
+foreach (
+ qw(defcv deffn
+ defivar defmac defmethod defop
+ defopt defspec deftp deftypecv
+ deftypefn deftypefun
+ deftypeivar deftypemethod
+ deftypeop deftypevar deftypevr
+ defun defvar defvr)
+ )
+{
+ $commands{$_} = \&environment_line_command;
+ $translate_line_command{$_} = 1;
+ $break_line{$_} = 1;
+}
+foreach (
+ qw(defcvx deffnx defivarx defmacx defmethodx defopx defoptx
+ defspecx deftpx deftypecvx deftypefnx deftypefunx deftypeivarx
+ deftypemethodx deftypeopx deftypevarx deftypevrx defunx
+ defvarx defvrx)
+ )
+{
+ $commands{$_} = \&line_command;
+ $translate_line_command{$_} = 1;
+ $break_line{$_} = 1;
+}
+
+foreach (
+ qw(titlefont w i r b sansserif sc slanted strong t cite email
+ footnote indicateurl emph ref xref pxref inforef kbd key
+ acronym),
+
+ # The following commands could cause problems since their arguments
+ # have a semantic and a translator could decide not to translate code but
+ # still translate theses short words if they appear in another context.
+ qw(file command dfn dmn option math code samp var)
+ )
+{
+ register_generic_command("-$_,{_}");
+}
+
+register_generic_command("*anchor,{_}");
+register_generic_command("*refill,");
+
+$translate_line_command{'node'} = 1;
+$no_wrap_environments .= " node";
+$break_line{'node'} = 1;
+
+# @node Comments, Minimum, Conventions, Overview
+$commands{'node'} = sub {
+ my $self = shift;
+ my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift );
+ my $no_wrap = shift;
+ print "node($command,$variant,@$args,@$env,$no_wrap)="
+ if ( $debug{'commands'} );
+
+ my $translated = $ESCAPE . $command;
+ my $line = $args->[1];
+ if ( defined $line and length $line ) {
+ my @pointers = split( /, */, $line );
+ my @t;
+ foreach (@pointers) {
+ push @t, $self->translate( $_, $self->{ref}, $command, "wrap" => 0 );
+ }
+ $translated .= " " . join( ", ", @t );
+ }
+
+ print "($translated,@$env)\n"
+ if ( $debug{'commands'} );
+ return ( $translated, @$env );
+};
+
+sub environment_command {
+ my $self = shift;
+ my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift );
+ my $no_wrap = shift;
+ print "environment_command($command,$variant,@$args,@$env,$no_wrap)="
+ if ( $debug{'commands'} );
+ my ( $t, @e ) = ( "", () );
+
+ ( $t, @e ) = generic_command( $self, $command, $variant, $args, $env, $no_wrap );
+ @e = ( @$env, $command );
+
+ print "($t,@e)\n"
+ if ( $debug{'commands'} );
+ return ( $t, @e );
+}
+
+sub environment_line_command {
+ my $self = shift;
+ my ( $command, $variant, $args, $env ) = ( shift, shift, shift, shift );
+ my $no_wrap = shift;
+ print "environment_command_line($command,$variant,@$args,@$env,$no_wrap)="
+ if ( $debug{'commands'} );
+ my ( $t, @e ) = ( "", () );
+
+ ( $t, @e ) = line_command( $self, $command, $variant, $args, $env, $no_wrap );
+ @e = ( @$env, $command );
+
+ print "($t,@e)\n"
+ if ( $debug{'commands'} );
+ return ( $t, @e );
+}
+
+## push the environment in the environment stack, and do not translate
+## the command
+#sub push_environment {
+# my $self = shift;
+# my ($command,$variant,$args,$env) = (shift,shift,shift,shift);
+# print "push_environment($command,$variant,@$args,@$env)="
+# if ($debug{'environments'});
+#
+# my ($t,@e) = generic_command($self,$command,$variant,$args,$env);
+#
+# print "($t,@e)\n"
+# if ($debug{'environments'});
+# return ($t,@e);
+#}
+#
+foreach (
+ qw(detailmenu menu titlepage group copying
+ documentdescription cartouche
+ direntry
+ ifdocbook ifhtml ifinfo ifplaintext iftex ifxml
+ ifnotdocbook ifnothtml ifnotinfo ifnotplaintext ifnottex ifnotxml)
+ )
+{
+ $commands{$_} = \&environment_line_command;
+ $translate_line_command{$_} = 0;
+ $break_line{$_} = 1;
+}
+foreach (qw(enumerate multitable ifclear ifset)) {
+ $commands{$_} = \&environment_line_command;
+ $break_line{$_} = 1;
+}
+foreach (
+ qw(quotation smallquotation
+ indentedblock smallindentedblock
+ raggedright)
+ )
+{
+ $commands{$_} = \&environment_line_command;
+ $translate_line_command{$_} = 1;
+ $break_line{$_} = 1;
+}
+
+$env_separators{'format'} = "(?:(?:^|\n)\\\*|END-INFO-DIR-ENTRY|START-INFO-DIR-ENTRY)";
+$env_separators{'multitable'} = "(?:\@item|\@tab)";
+
+my $end_command = $commands{'end'};
+register_generic_command("*end, ");
+$commands{'end'} = $end_command;
+$break_line{'end'} = 1;
+
+register_generic_command("*macro, ");
+$commands{'macro'} = \&environment_command;
+$break_line{'macro'} = 1;
+register_generic_command("*itemize, ");
+$commands{'itemize'} = \&environment_command;
+$break_line{'itemize'} = 1;
+register_generic_command("*table, ");
+$commands{'table'} = \&environment_command;
+$break_line{'table'} = 1;
+
+# TODO: is_closed, use a regexp: \ does not escape the closing brace.
+# TBC on LaTeX.
+# In Texinfo, it appears with the "code" command. Maybe this command should
+# be used as verbatim. (Expressions.texi)
+
+# TODO: @include @ignore
+
+# TBC: node Indices
+
+1;
diff --git a/lib/Locale/Po4a/Text.pm b/lib/Locale/Po4a/Text.pm
new file mode 100644
index 0000000..b6633c2
--- /dev/null
+++ b/lib/Locale/Po4a/Text.pm
@@ -0,0 +1,1072 @@
+#!/usr/bin/perl -w
+
+# Po4a::Text.pm
+#
+# extract and translate translatable strings from a text documents
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+########################################################################
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Text - convert text documents from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::Text is a module to help the translation of text documents into
+other [human] languages.
+
+Paragraphs are split on empty lines (or lines containing only spaces or
+tabulations).
+
+If a paragraph contains a line starting by a space (or tabulation), this
+paragraph won't be rewrapped.
+
+=cut
+
+package Locale::Po4a::Text;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+require Exporter;
+use vars qw(@ISA @EXPORT);
+@ISA = qw(Locale::Po4a::TransTractor);
+@EXPORT = qw();
+
+use Locale::Po4a::TransTractor;
+use Locale::Po4a::Common;
+use YAML::Tiny;
+use Syntax::Keyword::Try;
+
+=head1 OPTIONS ACCEPTED BY THIS MODULE
+
+These are this module's particular options:
+
+=over
+
+=item B<keyvalue>
+
+Treat paragraphs that look like a colon-separated key-value pair as verbatim
+(with the C<no-wrap> flag in the PO file). A key-value pair string is a string
+like C<key: value>, containing one or more non-colon and non-space characters
+followed by a colon followed by at least one non-space character before the
+end of the line.
+
+=cut
+
+my $keyvalue = 0;
+
+=item B<nobullets>
+
+Deactivate the detection of bullets.
+
+By default, when a bullet is detected, the bullet paragraph is not considered
+as a verbatim paragraph (with the C<no-wrap> flag in the PO file). Instead, the
+corresponding paragraph is rewrapped in the translation.
+
+=cut
+
+my $bullets = 1;
+
+=item B<tabs=>I<mode>
+
+Specify how tabulations shall be handled. The I<mode> can be any of:
+
+=over
+
+=item B<split>
+
+Lines with tabulations introduce breaks in the current paragraph.
+
+=item B<verbatim>
+
+Paragraph containing tabulations will not be re-wrapped.
+
+=back
+
+By default, tabulations are considered as spaces.
+
+=cut
+
+my $tabs = "";
+
+=item B<breaks=>I<regex>
+
+A regular expression matching lines which introduce breaks.
+The regular expression will be anchored so that the whole line must match.
+
+=cut
+
+my $breaks;
+
+=item B<debianchangelog>
+
+Handle the header and footer of
+released versions, which only contain non translatable information.
+
+=cut
+
+my $debianchangelog = 0;
+
+=item B<fortunes>
+
+Handle the fortunes format, which separate fortunes with a line which
+consists in '%' or '%%', and use '%%' as the beginning of a comment.
+
+=cut
+
+my $fortunes = 0;
+
+=item B<markdown>
+
+Handle some special markup in Markdown-formatted texts.
+
+=cut
+
+my $markdown = 0;
+
+=item B<yfm_keys> (markdown-only)
+
+Comma-separated list of keys to process for translation in the YAML Front Matter
+section. All other keys are skipped. Keys are matched with a case-sensitive
+match. If B<yfm_paths> and B<yfm_keys> are used together, values are included if
+they are matched by at least one of the options. Array values are always translated,
+unless the B<yfm_skip_array> option is provided.
+
+=cut
+
+my %yfm_keys = ();
+
+=item B<yfm_lenient> (markdown only)
+
+Allow the YAML Front Matter parser to fail on malformated headers. This is
+particularly helpful when your file starts with a horizontal ruler instead
+of a YAML Front Matter, but you insist on using three dashes only for your
+ruler.
+
+=cut
+
+my $yfm_lenient = 0;
+
+=item B<yfm_paths> (markdown only)
+
+=item B<yfm_paths>
+
+Comma-separated list of hash paths to process for extraction in the YAML
+Front Matter section, all other paths are skipped. Paths are matched with a
+case-sensitive match. If B<yfm_paths> and B<yfm_keys> are used together,
+values are included if they are matched by at least one of the options.
+Arrays values are always returned unless the B<yfm_skip_array> option is
+provided.
+
+=cut
+
+my %yfm_paths = ();
+
+=item B<yfm_skip_array> (markdown-only)
+
+Do not translate array values in the YAML Front Matter section.
+
+=cut
+
+my $yfm_skip_array = 0;
+
+=item B<control>[B<=>I<field_list>]
+
+Handle Debian's control files.
+A comma-separated list of fields to be translated can be provided.
+
+=cut
+
+my %control = ();
+
+=item B<neverwrap>
+
+Prevent po4a from wrapping any lines. This means that every content is handled verbatim, even simple paragraphs.
+
+=cut
+
+my $defaultwrap = 1;
+
+my $parse_func = \&parse_fallback;
+
+my @comments = ();
+
+=back
+
+=cut
+
+sub initialize {
+ my $self = shift;
+ my %options = @_;
+
+ $self->{options}{'control'} = "";
+ $self->{options}{'breaks'} = 1;
+ $self->{options}{'debianchangelog'} = 1;
+ $self->{options}{'debug'} = 1;
+ $self->{options}{'fortunes'} = 1;
+ $self->{options}{'markdown'} = 1;
+ $self->{options}{'yfm_keys'} = '';
+ $self->{options}{'yfm_lenient'} = 0;
+ $self->{options}{'yfm_paths'} = '';
+ $self->{options}{'yfm_skip_array'} = 0;
+ $self->{options}{'nobullets'} = 0;
+ $self->{options}{'keyvalue'} = 1;
+ $self->{options}{'tabs'} = 1;
+ $self->{options}{'verbose'} = 1;
+ $self->{options}{'neverwrap'} = 1;
+
+ foreach my $opt ( keys %options ) {
+ die wrap_mod( "po4a::text", dgettext( "po4a", "Unknown option: %s" ), $opt )
+ unless exists $self->{options}{$opt};
+ $self->{options}{$opt} = $options{$opt};
+ }
+
+ $keyvalue = 1 if ( defined $options{'keyvalue'} );
+ $bullets = 0 if ( defined $options{'nobullets'} );
+ $tabs = $options{'tabs'} if ( defined $options{'tabs'} );
+ $breaks = $options{'breaks'} if ( defined $options{'breaks'} );
+ $defaultwrap = 0 if ( defined $options{'neverwrap'} );
+
+ $parse_func = \&parse_debianchangelog if ( defined $options{'debianchangelog'} );
+ $parse_func = \&parse_fortunes if ( defined $options{'fortunes'} );
+
+ if ( defined $options{'markdown'} ) {
+ $parse_func = \&parse_markdown;
+ $markdown = 1;
+ map {
+ $_ =~ s/^\s+|\s+$//g; # Trim the keys before using them
+ $yfm_keys{$_} = 1
+ } ( split( ',', $self->{options}{'yfm_keys'} ) );
+ map {
+ $_ =~ s/^\s+|\s+$//g; # Trim the keys before using them
+ $yfm_paths{$_} = 1
+ } ( split( ',', $self->{options}{'yfm_paths'} ) );
+
+ # map { print STDERR "key $_\n"; } (keys %yfm_keys);
+ $yfm_skip_array = $self->{options}{'yfm_skip_array'};
+ $yfm_lenient = $self->{options}{'yfm_lenient'};
+ } else {
+ foreach my $opt (qw(yfm_keys yfm_lenient yfm_skip_array)) {
+ die wrap_mod( "po4a::text", dgettext( "po4a", "Option %s is only valid when parsing markdown files." ),
+ $opt )
+ if exists $options{$opt};
+ }
+ }
+
+ if ( defined $options{'control'} ) {
+ $parse_func = \&parse_control;
+ if ( $options{'control'} eq "1" ) {
+ $control{''} = 1;
+ } else {
+ foreach my $tag ( split( ',', $options{'control'} ) ) {
+ $control{$tag} = 1;
+ }
+ }
+ }
+}
+
+sub parse_fallback {
+ my ( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) = @_;
+ if (
+ ( $line =~ /^\s*$/ )
+ or ( defined $breaks
+ and $line =~ m/^$breaks$/ )
+ )
+ {
+ # Break paragraphs on lines containing only spaces
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = $defaultwrap unless defined( $self->{verbatim} );
+ $self->pushline( $line . "\n" );
+ undef $self->{controlkey};
+ } elsif ( $line =~ /^-- $/ ) {
+
+ # Break paragraphs on email signature hint
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = $defaultwrap;
+ $self->pushline( $line . "\n" );
+ } elsif ( $line =~ /^=+$/
+ or $line =~ /^_+$/
+ or $line =~ /^-+$/ )
+ {
+ $wrapped_mode = 0;
+ $paragraph .= $line . "\n";
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = $defaultwrap;
+ } elsif ( $tabs eq "split" and $line =~ m/\t/ and $paragraph !~ m/\t/s ) {
+ $wrapped_mode = 0;
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "$line\n";
+ $wrapped_mode = 0;
+ } elsif ( $tabs eq "split" and $line !~ m/\t/ and $paragraph =~ m/\t/s ) {
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "$line\n";
+ $wrapped_mode = $defaultwrap;
+ } else {
+ if ( $line =~ /^\s/ ) {
+
+ # A line starting by a space indicates a non-wrap
+ # paragraph
+ $wrapped_mode = 0;
+ }
+ if (
+ $markdown
+ and (
+ $line =~ /\S $/ # explicit newline
+ or $line =~ /"""$/
+ )
+ )
+ { # """ textblock inside macro begin
+ # Markdown markup needing separation _after_ this line
+ $end_of_paragraph = 1;
+ } else {
+ undef $self->{bullet};
+ undef $self->{indent};
+ }
+
+ # TODO: comments
+ $paragraph .= $line . "\n";
+ }
+ return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
+}
+
+sub parse_debianchangelog {
+ my ( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) = @_;
+ if (
+ $expect_header
+ and $line =~ /^(\w[-+0-9a-z.]*)\ \(([^\(\) \t]+)\) # src, version
+ \s+([-+0-9a-z.]+); # distribution
+ \s*urgency\s*\=\s*(.*\S)\s*$/ix
+ )
+ { #
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $self->pushline("$line\n");
+ $expect_header = 0;
+ } elsif ( $line =~
+ m/^ \-\- (.*) <(.*)> ((\w+\,\s*)?\d{1,2}\s+\w+\s+\d{4}\s+\d{1,2}:\d\d:\d\d\s+[-+]\d{4}(\s+\([^\\\(\)]+\)))$/ )
+ {
+ # Found trailer
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $self->pushline("$line\n");
+ $expect_header = 1;
+ } else {
+ return parse_fallback( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
+ }
+ return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
+}
+
+sub parse_fortunes {
+ my ( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) = @_;
+
+ # Always include paragraphs in no-wrap mode,
+ # because the formatting of the fortunes
+ # is usually hand-crafted and matters.
+ $wrapped_mode = 0;
+
+ # Check if there are more lines in the file.
+ my $last_line_of_file = 0;
+ my ( $nextline, $nextref ) = $self->shiftline();
+ if ( defined $nextline ) {
+
+ # There is a next line, put it back.
+ $self->unshiftline( $nextline, $nextref );
+ } else {
+
+ # Nope, no more lines available.
+ $last_line_of_file = 1;
+ }
+
+ # Is the line the end of a fortune or the last line of the file?
+ if ( $line =~ m/^%%?\s*$/ or $last_line_of_file ) {
+
+ # Add the last line to the paragraph
+ if ($last_line_of_file) {
+ $paragraph .= $line;
+ }
+
+ # Remove the last newline for the translation.
+ chomp($paragraph);
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+
+ # Add the last newline again for the output.
+ $self->pushline("\n");
+
+ # Also add the separator line, if this is not the end of the file.
+ if ( !$last_line_of_file ) {
+ $self->pushline("$line\n");
+ }
+ } else {
+ $paragraph .= $line . "\n";
+ }
+ return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
+}
+
+sub parse_control {
+ my ( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) = @_;
+ if ( $line =~ m/^([^ :]*): *(.*)$/ ) {
+ warn wrap_mod( "po4a::text", dgettext( "po4a", "Unrecognized section: %s" ), $paragraph )
+ unless $paragraph eq "";
+ my $tag = $1;
+ my $val = $2;
+ my $t;
+ if ( $control{''} or $control{$tag} ) {
+ $t = $self->translate(
+ $val, $self->{ref},
+ $tag . ( defined $self->{controlkey} ? ", " . $self->{controlkey} : "" ),
+ "wrap" => 0
+ );
+ } else {
+ $t = $val;
+ }
+ if ( not defined $self->{controlkey} ) {
+ $self->{controlkey} = "$tag: $val";
+ }
+ $self->pushline("$tag: $t\n");
+ $paragraph = "";
+ $wrapped_mode = $defaultwrap;
+ $self->{bullet} = "";
+ $self->{indent} = " ";
+ } elsif ( $line eq " ." ) {
+ do_paragraph( $self, $paragraph, $wrapped_mode,
+ "Long Description" . ( defined $self->{controlkey} ? ", " . $self->{controlkey} : "" ) );
+ $paragraph = "";
+ $self->pushline( $line . "\n" );
+ $self->{bullet} = "";
+ $self->{indent} = " ";
+ } elsif ( $line =~ m/^ Link: +(.*)$/ ) {
+ do_paragraph( $self, $paragraph, $wrapped_mode,
+ "Long Description" . ( defined $self->{controlkey} ? ", " . $self->{controlkey} : "" ) );
+ my $link = $1;
+ my $t1 = $self->translate( "Link: ", $self->{ref}, "Link", "wrap" => 0 );
+ my $t2 = $self->translate(
+ $link, $self->{ref},
+ "Link" . ( defined $self->{controlkey} ? ", " . $self->{controlkey} : "" ),
+ "wrap" => 0
+ );
+ $self->pushline(" $t1$t2\n");
+ $paragraph = "";
+ } elsif ( defined $self->{indent}
+ and $line =~ m/^$self->{indent}\S/ )
+ {
+ $paragraph .= $line . "\n";
+ $self->{type} = "Long Description" . ( defined $self->{controlkey} ? ", " . $self->{controlkey} : "" );
+ } else {
+ return parse_fallback( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
+ }
+ return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
+}
+
+# Support pandoc's format of specifying bibliographic information.
+#
+# If the first line starts with a percent sign, the following
+# is considered to be title, author, and date.
+#
+# If the information spans multiple lines, the following
+# lines must be indented with space.
+# If information is omitted, it's just a percent sign
+# and a blank line.
+#
+# Examples with missing title resp. missing authors:
+#
+# %
+# % Author
+#
+# % My title
+# %
+# % June 14, 2018
+sub parse_markdown_bibliographic_information {
+ my ( $self, $line, $ref ) = @_;
+ my ( $nextline, $nextref );
+
+ # The first match is always the title or an empty string (no title).
+ if ( $line =~ /^%(.*)$/ ) {
+ my $title = $1;
+
+ # Remove leading and trailing whitespace
+ $title =~ s/^\s+|\s+$//g;
+
+ # If there's some text, look for continuation lines
+ if ( length($title) ) {
+ ( $nextline, $nextref ) = $self->shiftline();
+ while ( $nextline =~ /^\s+(.+)$/ ) {
+ $nextline = $1;
+ $nextline =~ s/^\s+|\s+$//g;
+ $title .= " " . $nextline;
+ ( $nextline, $nextref ) = $self->shiftline();
+ }
+
+ # Now the title should be complete, give it to translation.
+ my $t = $self->translate( $title, $ref, "Pandoc title block", "wrap" => $defaultwrap );
+ $t = Locale::Po4a::Po::wrap($t);
+ my $first_line = 1;
+ foreach my $translated_line ( split /\n/, $t ) {
+ if ($first_line) {
+ $first_line = 0;
+ $self->pushline( "% " . $translated_line . "\n" );
+ } else {
+ $self->pushline( " " . $translated_line . "\n" );
+ }
+ }
+ } else {
+
+ # Title has been empty, fetch the next line
+ # if that are the authors.
+ $self->pushline("%\n");
+ ( $nextline, $nextref ) = $self->shiftline();
+ }
+
+ # The next line can contain the author or an empty string.
+ if ( $nextline =~ /^%(.*)$/ ) {
+ my $author_ref = $nextref;
+ my $authors = $1;
+
+ # If there's some text, look for continuation lines
+ if ( length($authors) ) {
+ ( $nextline, $nextref ) = $self->shiftline();
+ while ( $nextline =~ /^\s+(.+)$/ ) {
+ $nextline = $1;
+ $authors .= ";" . $nextline;
+ ( $nextline, $nextref ) = $self->shiftline();
+ }
+
+ # Now the authors should be complete, split them by semicolon
+ my $first_line = 1;
+ foreach my $author ( split /;/, $authors ) {
+ $author =~ s/^\s+|\s+$//g;
+
+ # Skip empty authors
+ next unless length($author);
+ my $t = $self->translate( $author, $author_ref, "Pandoc title block" );
+ if ($first_line) {
+ $first_line = 0;
+ $self->pushline( "% " . $t . "\n" );
+ } else {
+ $self->pushline( " " . $t . "\n" );
+ }
+ }
+ } else {
+
+ # Authors has been empty, fetch the next line
+ # if that is the date.
+ $self->pushline("%\n");
+ ( $nextline, $nextref ) = $self->shiftline();
+ }
+
+ # The next line can contain the date.
+ if ( $nextline =~ /^%(.*)$/ ) {
+ my $date = $1;
+
+ # Remove leading and trailing whitespace
+ $date =~ s/^\s+|\s+$//g;
+ my $t = $self->translate( $date, $nextref, "Pandoc title block" );
+ $self->pushline( "% " . $t . "\n" );
+
+ # Now we're done with the bibliographic information
+ return;
+ }
+ }
+
+ # The line did not start with a percent sign, to stop
+ # parsing bibliographic information and return the
+ # line to the normal parsing.
+ $self->unshiftline( $nextline, $nextref );
+ return;
+ }
+}
+
+# Support YAML Front Matter in Markdown documents
+#
+# If the text starts with a YAML ---\n separator, the full text until
+# the next YAML ---\n separator is considered YAML metadata. The ...\n
+# "end of document" separator can be used at the end of the YAML
+# block.
+#
+sub parse_markdown_yaml_front_matter {
+ my ( $self, $line, $blockref ) = @_;
+ my $yfm;
+ my @saved_ctn;
+ my ( $nextline, $nextref ) = $self->shiftline();
+ push @saved_ctn, ( $nextline, $nextref );
+ while ( defined($nextline) ) {
+ last if ( $nextline =~ /^(---|\.\.\.)$/ );
+ $yfm .= $nextline;
+ ( $nextline, $nextref ) = $self->shiftline();
+ if ( $nextline =~ /: [\[\{]/ ) {
+ die wrap_mod(
+ "po4a::text",
+ dgettext(
+ "po4a",
+ "Inline lists and dictionaries on a single line are not correctly handled the parser we use (YAML::Tiny): they are interpreted as regular strings. "
+ . "Please use multi-lines definitions instead. Offending line:\n %s"
+ ),
+ $nextline
+ );
+
+ }
+ push @saved_ctn, ( $nextline, $nextref );
+ }
+
+ my $yamlarray; # the parsed YFM content
+ my $yamlres; # containing the parse error, if any
+ try {
+ $yamlarray = YAML::Tiny->read_string($yfm);
+ } catch {
+ $yamlres = $@;
+ }
+
+ if ( defined($yamlres) ) {
+ if ($yfm_lenient) {
+ $yamlres =~ s/ at .*$//; # Remove the error localisation in YAML::Tiny die message, if any (for our test)
+ warn wrap_mod(
+ "po4a::text",
+ dgettext(
+ "po4a",
+ "Proceeding even if the YAML Front Matter could not be parsed. Remove the 'yfm_lenient' option for a stricter behavior.\nIgnored error: %s"
+ ),
+ $yamlres
+ );
+ my $len = ( scalar @saved_ctn ) - 1;
+ while ( $len >= 0 ) {
+ $self->unshiftline( $saved_ctn[ $len - 1 ], $saved_ctn[$len] );
+
+ # print STDERR "Unshift ".$saved_ctn[ $len - 1] ." | ". $saved_ctn[$len] ."\n";
+ $len -= 2;
+ }
+ return 0; # Not a valid YAML
+ } else {
+ die wrap_mod(
+ "po4a::text",
+ dgettext(
+ "po4a",
+ "Could not get the YAML Front Matter from the file. If you did not intend to add a YAML front matter "
+ . "but an horizontal ruler, please use '----' instead, or pass the 'yfm_lenient' option.\nError: %s\nContent of the YFM: %s"
+ ),
+ $yamlres, $yfm
+ );
+ }
+ }
+
+ $self->handle_yaml( 1, $blockref, $yamlarray, \%yfm_keys, $yfm_skip_array, \%yfm_paths );
+ $self->pushline("---\n");
+ return 1; # Valid YAML
+}
+
+sub parse_markdown {
+ my ( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) = @_;
+ if ($expect_header) {
+
+ # It is only possible to find and parse the bibliographic
+ # information or the YAML Front Matter from the first line.
+ # Anyway, stop expecting header information for the next run.
+ $expect_header = 0;
+ if ( $line =~ /^%(.*)$/ ) {
+ parse_markdown_bibliographic_information( $self, $line, $ref );
+ return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
+ } elsif ( $line =~ /^---$/ ) {
+ if ( parse_markdown_yaml_front_matter( $self, $line, $ref ) ) { # successfully parsed
+ return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
+ }
+
+ # If it wasn't a YFM paragraph after all, stop expecting a header and keep going
+ }
+ }
+ if ( ( $line =~ m/^(={4,}|-{4,})$/ )
+ and ( defined($paragraph) )
+ and ( $paragraph =~ m/^[^\n]*\n$/s )
+ and ( length($paragraph) == ( length($line) + 1 ) ) )
+ {
+ # XXX: There can be any number of underlining according
+ # to the documentation. This detection, which avoid
+ # translating the formatting, is only supported if
+ # the underlining has the same size as the header text.
+ # Found title
+ $wrapped_mode = 0;
+ my $level = $line;
+ $level =~ s/^(.).*$/$1/;
+
+ # Remove the trailing newline from the title
+ chomp($paragraph);
+ my $t = $self->translate(
+ $paragraph, $self->{ref}, "Title $level",
+ "wrap" => 0,
+ "flags" => "markdown-text"
+ );
+
+ # Add the newline again for the output
+ $self->pushline( $t . "\n" );
+ $paragraph = "";
+ $wrapped_mode = $defaultwrap;
+ $self->pushline( ( $level x length($t) ) . "\n" );
+ } elsif ( $line =~ m/^(#{1,6})( +)(.*?)( +\1)?$/ ) {
+ my $titlelevel1 = $1;
+ my $titlespaces = $2;
+ my $title = $3;
+ my $titlelevel2 = $4 || "";
+
+ # Found one line title
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $wrapped_mode = 0;
+ $paragraph = "";
+ my $t = $self->translate(
+ $title, $self->{ref}, "Title $titlelevel1",
+ "wrap" => 0,
+ "flags" => "markdown-text"
+ );
+ $self->pushline( $titlelevel1 . $titlespaces . $t . $titlelevel2 . "\n" );
+ $wrapped_mode = $defaultwrap;
+ } elsif ( $line =~ /^[ ]{0,3}([*_-])\s*(?:\1\s*){2,}$/ ) {
+
+ # Horizontal rule
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $self->pushline( $line . "\n" );
+ $paragraph = "";
+ $end_of_paragraph = 1;
+ } elsif ( $line =~ /^([ ]{0,3})(\[[^\]]+\]:[ \t]?.+)$/ ) {
+ my $indentation = $1;
+ my $linkreference = $2;
+
+ # Link reference
+ # TODO: support multiline link reference definition
+ # TODO: treat link title properly
+ # https://spec.commonmark.org/0.30/#link-reference-definitions
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $wrapped_mode = 0;
+ $paragraph = "";
+ my $t = $self->translate(
+ $linkreference, $self->{ref}, "Link reference",
+ "wrap" => 0,
+ "flags" => "link-reference"
+ );
+ $self->pushline( $indentation . $t . "\n" );
+ $wrapped_mode = $defaultwrap;
+ } elsif ( $line =~ /^([ ]{0,3})(([~`])\3{2,})(\s*)([^`]*)\s*$/ ) {
+ my $fence_space_before = $1;
+ my $fence = $2;
+ my $fencechar = $3;
+ my $fence_space_between = $4;
+ my $info_string = $5;
+
+ # fenced code block
+ my $type = "Fenced code block" . ( $info_string ? " ($info_string)" : "" );
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $wrapped_mode = 0;
+ $paragraph = "";
+ $self->pushline("$line\n");
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ my ( $nextline, $nextref ) = $self->shiftline();
+
+ while ( $nextline !~ /^\s{0,3}$fence$fencechar*\s*$/ ) {
+ $paragraph .= "$nextline";
+ ( $nextline, $nextref ) = $self->shiftline();
+ }
+ do_paragraph( $self, $paragraph, $wrapped_mode, $type );
+ $self->pushline($nextline);
+ $paragraph = "";
+ $end_of_paragraph = 1;
+ } elsif ( $line =~ /^([ ]{0,3})(([:])\3{2,})(\s*)([^`]*)\s*$/ ) {
+ my $fence_space_before = $1;
+ my $fence = $2;
+ my $fencechar = $3;
+ my $fence_space_between = $4;
+ my @info_string = ($5);
+
+ # print STDERR "----------------\n";
+ # print STDERR "line: $line\n";
+ # print STDERR "fence: '$fence'; fencechar: '$fencechar'; info: '$info_string'\n";
+
+ # fenced div block (fenced with ::: where code blocks are fenced with ` or ~)
+ # https://pandoc.org/MANUAL.html#divs-and-spans
+ my $info = join( "|", map { chomp $_; $_ } @info_string );
+ my $type = "Fenced div block" . ( $info ? " ($info)" : "" );
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $wrapped_mode = 0;
+ $paragraph = "";
+ $self->pushline("$line\n");
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+
+ my $lvl = 1;
+ while ( $lvl > 0 ) {
+ my ( $nextline, $nextref ) = $self->shiftline();
+ die wrap_mod(
+ "po4a::text",
+ dgettext(
+ "po4a", "Malformed fenced div block: Block starting at %s not closed before the end of the file."
+ ),
+ $ref
+ ) unless ( defined($nextline) );
+
+ # print STDERR "within $lvl: $nextline";
+ if ( $nextline =~ /^\s*:::+\s*$/ ) {
+ my $info = join( "|", map { chomp $_; $_ } @info_string );
+ $type = "Fenced div block" . ( $info ? " ($info)" : "" );
+ if ( $paragraph ne "" ) {
+ do_paragraph( $self, $paragraph, $wrapped_mode, $type );
+ $paragraph = "";
+ }
+ $self->pushline($nextline);
+ $lvl--;
+ while ( scalar @info_string > $lvl ) {
+ pop @info_string;
+ }
+ } elsif ( $nextline =~ /^([ ]{0,3})(([:])\3{2,})(\s*)([^`]*)\s*$/ ) {
+ if ( $paragraph ne "" ) {
+ do_paragraph( $self, $paragraph, $wrapped_mode, $type );
+ $paragraph = "";
+ }
+ $self->pushline($nextline);
+ push @info_string, $5;
+ $lvl++;
+ } else {
+ $paragraph .= $nextline;
+ }
+ }
+ $paragraph = "";
+ $end_of_paragraph = 1;
+
+ # print STDERR "Out now ------------\n";
+ } elsif (
+ $line =~ /^\s*\[\[\!\S+\s*$/ # macro begin
+ or $line =~ /^\s*"""\s*\]\]\s*$/
+ )
+ { # """ textblock inside macro end
+ # Avoid translating Markdown lines containing only markup
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = $defaultwrap;
+ $self->pushline("$line\n");
+ } elsif ( $line =~ /^\s*\[\[\!\S[^\]]*\]\]\s*$/ ) { # sole macro
+ # Preserve some Markdown markup as a single line
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "$line\n";
+ $wrapped_mode = 0;
+ $end_of_paragraph = 1;
+ } elsif ( $line =~ /^"""/ ) { # """ textblock inside macro end
+ # Markdown markup needing separation _before_ this line
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "$line\n";
+ $wrapped_mode = $defaultwrap;
+ } else {
+ return parse_fallback( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
+ }
+ return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
+}
+
+sub parse {
+ my $self = shift;
+ my ( $line, $ref );
+ my $paragraph = "";
+ my $wrapped_mode = $defaultwrap;
+ my $expect_header = 1;
+ my $end_of_paragraph = 0;
+ ( $line, $ref ) = $self->shiftline();
+ my $file = $ref;
+ $file =~ s/:[0-9]+$// if defined($line);
+
+ while ( defined($line) ) {
+ $ref =~ m/^(.*):[0-9]+$/;
+ if ( $1 ne $file ) {
+ $file = $1;
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = $defaultwrap;
+ $expect_header = 1;
+ }
+
+ chomp($line);
+ $self->{ref} = "$ref";
+ ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) =
+ &$parse_func( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
+
+ # paragraphs starting by a bullet, or numbered
+ # or paragraphs with a line containing more than 3 consecutive spaces
+ # are considered as verbatim paragraphs
+ $wrapped_mode = 0 if ( $paragraph =~ m/^(\*|[0-9]+[.)] )/s
+ or $paragraph =~ m/[ \t][ \t][ \t]/s );
+
+ # Paragraphs starting with a table formating (GH extension) are also considered verbatim
+ $wrapped_mode = 0 if ( $paragraph =~ m/^\|/ );
+
+ $wrapped_mode = 0 if ( $tabs eq "verbatim"
+ and $paragraph =~ m/\t/s );
+
+ # Also consider keyvalue paragraphs verbatim, if requested
+ $wrapped_mode = 0 if ( $keyvalue == 1
+ and $paragraph =~ m/^[^ :]+:.*[^\s].*$/s );
+ if ($markdown) {
+
+ # Some Markdown markup can (or might) not survive wrapping
+ $wrapped_mode = 0
+ if (
+ $paragraph =~ /^>/ms # blockquote
+ or $paragraph =~ /^( {8}|\t)/ms # monospaced
+ or $paragraph =~ /^\$(\S+[{}]\S*\s*)+/ms # Xapian macro
+ or $paragraph =~ /<(?![a-z]+[:@])/ms # maybe html (tags but not wiki <URI>)
+ or $paragraph =~ /^[^<]+>/ms # maybe html (tag with vertical space)
+ or $paragraph =~ /\S $/ms # explicit newline
+ or $paragraph =~ /\[\[\!\S[^\]]+$/ms # macro begin
+ );
+ }
+ if ($end_of_paragraph) {
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ $paragraph = "";
+ $wrapped_mode = $defaultwrap;
+ $end_of_paragraph = 0;
+ }
+ ( $line, $ref ) = $self->shiftline();
+ }
+ if ( length $paragraph ) {
+ do_paragraph( $self, $paragraph, $wrapped_mode );
+ }
+}
+
+sub do_paragraph {
+ my ( $self, $paragraph, $wrap ) = ( shift, shift, shift );
+ my $type = shift || $self->{type} || "Plain text";
+ my $flags = "";
+ if ( $type eq "Plain text" and $markdown ) {
+ $flags = "markdown-text";
+ }
+
+ return if ( $paragraph eq "" );
+
+ $wrap = 0 unless $defaultwrap;
+
+ # DEBUG
+ # $type .= " verbatim: '".($self->{verbatim}//"NONE")."' bullet: '$bullets' wrap: '$wrap' indent: '".($self->{indent}//"NONE")."' type: '".($self->{type}//"NONE")."'";
+ # print STDERR "$type\n";
+
+ if ( $bullets and not defined $self->{verbatim} ) {
+
+ # Detect bullets
+ # | * blah blah
+ # |<spaces> blah
+ # | ^-- aligned
+ # <empty line>
+ #
+ # The leading spaces are optional, and other bullets are supported:
+ # - blah o blah + blah
+ # 1. blah 1) blah (1) blah
+ TEST_BULLET:
+ if ( $paragraph =~ m/^(\s*)((?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+)([^\n]*\n)(.*)$/s ) {
+ my $para = $5;
+ my $bullet = $2;
+ my $indent1 = $1;
+ my $indent2 = "$1" . ( ' ' x length $bullet );
+ my $text = $4;
+ while ( $para !~ m/^$indent2(?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+/
+ and $para =~ s/^$indent2(\S[^\n]*\n)//s )
+ {
+ $text .= $1;
+ }
+
+ # TODO: detect if a line starts with the same bullet
+ if ( $text !~ m/\S[ \t][ \t][ \t]+\S/s ) {
+ my $bullet_regex = quotemeta( $indent1 . $bullet );
+ $bullet_regex =~ s/[0-9]+/\\d\+/;
+ if ( $para eq ''
+ or $para =~ m/^(\s*)((?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+)([^\n]*\n)(.*)$/s
+ or $para =~ m/^$bullet_regex\S/s )
+ {
+ my $trans = $self->translate(
+ $text,
+ $self->{ref},
+ "Bullet: '$indent1$bullet'",
+ "flags" => "markdown-text",
+ "wrap" => $defaultwrap,
+ "wrapcol" => -( length $indent2 )
+ );
+ $trans =~ s/^/$indent1$bullet/s;
+ $trans =~ s/\n(.)/\n$indent2$1/sg;
+ $self->pushline( $trans . "\n" );
+ if ( $para eq '' ) {
+ return;
+ } else {
+
+ # Another bullet
+ $paragraph = $para;
+ goto TEST_BULLET;
+ }
+ }
+ }
+ }
+ }
+
+ my $end = "";
+ if ($wrap) {
+ $paragraph =~ s/^(.*?)(\n*)$/$1/s;
+ $end = $2 || "";
+ }
+ my $t = $self->translate(
+ $paragraph,
+ $self->{ref},
+ $type,
+ "comment" => join( "\n", @comments ),
+ "flags" => $flags,
+ "wrap" => $wrap
+ );
+ @comments = ();
+ if ( defined $self->{bullet} ) {
+ my $bullet = $self->{bullet};
+ my $indent1 = $self->{indent};
+ my $indent2 = $indent1 . ( ' ' x length($bullet) );
+ $t =~ s/^/$indent1$bullet/s;
+ $t =~ s/\n(.)/\n$indent2$1/sg;
+ }
+ $self->pushline( $t . $end );
+}
+
+1;
+
+=head1 STATUS OF THIS MODULE
+
+Tested successfully on simple text files and NEWS.Debian files.
+
+=head1 AUTHORS
+
+ Nicolas François <nicolas.francois@centraliens.net>
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright © 2005-2008 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.
+
+ Copyright © 2008-2009, 2018 Jonas Smedegaard <dr@jones.dk>.
+ Copyright © 2020 Martin Quinson <mquinson#debian.org>.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
+
+__END__
+
+# LocalWords: Charset charset po UTF gettext msgid nostrip GPL
diff --git a/lib/Locale/Po4a/TransTractor.pm b/lib/Locale/Po4a/TransTractor.pm
new file mode 100644
index 0000000..0bc5b8f
--- /dev/null
+++ b/lib/Locale/Po4a/TransTractor.pm
@@ -0,0 +1,1337 @@
+#!/usr/bin/perl -w
+
+require Exporter;
+
+package Locale::Po4a::TransTractor;
+use DynaLoader;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+use subs qw(makespace);
+use vars qw($VERSION @ISA @EXPORT);
+$VERSION = "0.72";
+@ISA = qw(DynaLoader);
+@EXPORT = qw(new process translate
+ read write readpo writepo
+ getpoout setpoout get_in_charset get_out_charset handle_yaml);
+
+# Try to use a C extension if present.
+eval("bootstrap Locale::Po4a::TransTractor $VERSION");
+
+use Carp qw(croak confess);
+use Locale::Po4a::Po;
+use Locale::Po4a::Common;
+
+use File::Path; # mkdir before write
+use File::Spec;
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::TransTractor - generic trans(lator ex)tractor.
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+This class is the ancestor of every po4a parser used to parse a document, to
+search translatable strings, to extract them to a PO file and to replace them by
+their translation in the output document.
+
+More formally, it takes the following arguments as input:
+
+=over 2
+
+=item -
+
+a document to translate;
+
+=item -
+
+a PO file containing the translations to use.
+
+=back
+
+As output, it produces:
+
+=over 2
+
+=item -
+
+another PO file, resulting of the extraction of translatable strings from
+the input document;
+
+=item -
+
+a translated document, with the same structure than the one in input, but
+with all translatable strings replaced with the translations found in the
+PO file provided in input.
+
+=back
+
+Here is a graphical representation of this:
+
+ Input document --\ /---> Output document
+ \ / (translated)
+ +-> parse() function -----+
+ / \
+ Input PO --------/ \---> Output PO
+ (extracted)
+
+=head1 FUNCTIONS YOUR PARSER SHOULD OVERRIDE
+
+=over 4
+
+=item parse()
+
+This is where all the work takes place: the parsing of input documents, the
+generation of output, and the extraction of the translatable strings. This
+is pretty simple using the provided functions presented in the section
+B<INTERNAL FUNCTIONS> below. See also the B<SYNOPSIS>, which presents an
+example.
+
+This function is called by the process() function below, but if you choose
+to use the new() function, and to add content manually to your document,
+you will have to call this function yourself.
+
+=item docheader()
+
+This function returns the header we should add to the produced document,
+quoted properly to be a comment in the target language. See the section
+B<Educating developers about translations>, from L<po4a(7)|po4a.7>, for what
+it is good for.
+
+=back
+
+=cut
+
+sub docheader { }
+
+sub parse { }
+
+=head1 SYNOPSIS
+
+The following example parses a list of paragraphs beginning with "<p>". For the sake
+of simplicity, we assume that the document is well formatted, i.e. that '<p>'
+tags are the only tags present, and that this tag is at the very beginning
+of each paragraph.
+
+ sub parse {
+ my $self = shift;
+
+ PARAGRAPH: while (1) {
+ my ($paragraph,$pararef)=("","");
+ my $first=1;
+ my ($line,$lref)=$self->shiftline();
+ while (defined($line)) {
+ if ($line =~ m/<p>/ && !$first--; ) {
+ # Not the first time we see <p>.
+ # Reput the current line in input,
+ # and put the built paragraph to output
+ $self->unshiftline($line,$lref);
+
+ # Now that the document is formed, translate it:
+ # - Remove the leading tag
+ $paragraph =~ s/^<p>//s;
+
+ # - push to output the leading tag (untranslated) and the
+ # rest of the paragraph (translated)
+ $self->pushline( "<p>"
+ . $self->translate($paragraph,$pararef)
+ );
+
+ next PARAGRAPH;
+ } else {
+ # Append to the paragraph
+ $paragraph .= $line;
+ $pararef = $lref unless(length($pararef));
+ }
+
+ # Reinit the loop
+ ($line,$lref)=$self->shiftline();
+ }
+ # Did not get a defined line? End of input file.
+ return;
+ }
+ }
+
+Once you've implemented the parse function, you can use your document
+class, using the public interface presented in the next section.
+
+=head1 PUBLIC INTERFACE for scripts using your parser
+
+=head2 Constructor
+
+=over 4
+
+=item process(%)
+
+This function can do all you need to do with a po4a document in one
+invocation. Its arguments must be packed as a hash. ACTIONS:
+
+=over 3
+
+=item a.
+
+Reads all the PO files specified in po_in_name
+
+=item b.
+
+Reads all original documents specified in file_in_name
+
+=item c.
+
+Parses the document
+
+=item d.
+
+Reads and applies all the addenda specified
+
+=item e.
+
+Writes the translated document to file_out_name (if given)
+
+=item f.
+
+Writes the extracted PO file to po_out_name (if given)
+
+=back
+
+ARGUMENTS, beside the ones accepted by new() (with expected type):
+
+=over 4
+
+=item file_in_name (@)
+
+List of filenames where we should read the input document.
+
+=item file_in_charset ($)
+
+Charset used in the input document (if it isn't specified, use UTF-8).
+
+=item file_out_name ($)
+
+Filename where we should write the output document.
+
+=item file_out_charset ($)
+
+Charset used in the output document (if it isn't specified, use UTF-8).
+
+=item po_in_name (@)
+
+List of filenames where we should read the input PO files from, containing
+the translation which will be used to translate the document.
+
+=item po_out_name ($)
+
+Filename where we should write the output PO file, containing the strings
+extracted from the input document.
+
+=item addendum (@)
+
+List of filenames where we should read the addenda from.
+
+=item addendum_charset ($)
+
+Charset for the addenda.
+
+=back
+
+=item new(%)
+
+Create a new po4a document. Accepted options (in the hash passed as a parameter):
+
+=over 4
+
+=item verbose ($)
+
+Sets the verbosity.
+
+=item debug ($)
+
+Sets the debugging.
+
+=item wrapcol ($)
+
+The column at which we should wrap text in output document (default: 76).
+
+The negative value means not to wrap lines at all.
+
+=back
+
+Also it accepts next options for underlying Po-files: B<porefs>,
+B<copyright-holder>, B<msgid-bugs-address>, B<package-name>,
+B<package-version>, B<wrap-po>.
+
+=cut
+
+sub process {
+ my $self = shift;
+ ## Parameters are passed as an hash to avoid long and error-prone parameter lists
+ my %params = @_;
+
+ # Parameter checking
+ foreach ( keys %params ) {
+ confess "Unexpected parameter to process(): $_. Please report that bug."
+ unless ( $_ eq 'po_in_name'
+ || $_ eq 'po_out_name'
+ || $_ eq 'file_in_name'
+ || $_ eq 'file_in_charset'
+ || $_ eq 'file_out_name'
+ || $_ eq 'file_out_charset'
+ || $_ eq 'addendum'
+ || $_ eq 'addendum_charset'
+ || $_ eq 'srcdir'
+ || $_ eq 'destdir'
+ || $_ eq 'calldir' );
+ }
+
+ $self->{TT}{'file_in_charset'} = $params{'file_in_charset'} // 'UTF-8';
+ $self->{TT}{'file_out_charset'} = $params{'file_out_charset'} // 'UTF-8';
+ $self->{TT}{'addendum_charset'} = $params{'addendum_charset'};
+
+ our ( $destdir, $srcdir, $calldir ) = ( $params{'destdir'}, $params{'srcdir'}, $params{'calldir'} );
+
+ sub _input_file {
+ my $filename = $_[0];
+ return $filename if ( File::Spec->file_name_is_absolute($filename) );
+ foreach ( ( $destdir, $srcdir, $calldir ) ) {
+ next unless defined $_;
+ my $p = File::Spec->catfile( $_, $filename );
+ return $p if -e $p;
+ }
+ return $filename;
+ }
+
+ sub _output_file {
+ my $filename = $_[0];
+ return $filename if ( File::Spec->file_name_is_absolute($filename) );
+ foreach ( ( $destdir, $calldir ) ) {
+ next unless defined $_;
+ return File::Spec->catfile( $_, $filename ) if -d $_ and -w $_;
+ }
+ return $filename;
+ }
+
+ foreach my $file ( @{ $params{'po_in_name'} } ) {
+ my $infile = _input_file($file);
+ print STDERR wrap_mod( "po4a::transtractor::process", "Read PO file $infile" )
+ if $self->debug();
+ $self->readpo($infile);
+ }
+ foreach my $file ( @{ $params{'file_in_name'} } ) {
+ my $infile = _input_file($file);
+ print STDERR wrap_mod( "po4a::transtractor::process", "Read document $infile" )
+ if $self->debug();
+ $self->read( $infile, $file, $params{'file_in_charset'} );
+ }
+ print STDERR wrap_mod( "po4a::transtractor::process", "Call parse()" ) if $self->debug();
+ $self->parse();
+ print STDERR wrap_mod( "po4a::transtractor::process", "Done parse()" ) if $self->debug();
+ foreach my $file ( @{ $params{'addendum'} } ) {
+ my $infile = _input_file($file);
+ print STDERR wrap_mod( "po4a::transtractor::process", "Apply addendum $infile" )
+ if $self->debug();
+ $self->addendum($file) || die "An addendum failed\n";
+ }
+
+ if ( defined $params{'file_out_name'} ) {
+ my $outfile = _output_file( $params{'file_out_name'} );
+ print STDERR wrap_mod( "po4a::transtractor::process", "Write document $outfile" )
+ if $self->debug();
+ $self->write( $outfile, $self->{TT}{'file_out_charset'} );
+ }
+ if ( defined $params{'po_out_name'} ) {
+ my $outfile = _output_file( $params{'po_out_name'} );
+ print STDERR wrap_mod( "po4a::transtractor::process", "Write PO file $outfile" )
+ if $self->debug();
+ $self->writepo($outfile);
+ }
+ return $self;
+}
+
+sub new {
+ ## Determine if we were called via an object-ref or a classname
+ my $this = shift;
+ my $class = ref($this) || $this;
+ my $self = {};
+ my %options = @_;
+ ## Bless ourselves into the desired class and perform any initialization
+ bless $self, $class;
+
+ ## initialize the plugin
+ # prevent the plugin from croaking on the options intended for Po.pm or ourself
+ $self->{options}{'porefs'} = '';
+ $self->{options}{'copyright-holder'} = '';
+ $self->{options}{'msgid-bugs-address'} = '';
+ $self->{options}{'package-name'} = '';
+ $self->{options}{'package-version'} = '';
+ $self->{options}{'wrap-po'} = '';
+ $self->{options}{'wrapcol'} = '';
+
+ # let the plugin parse the options and such
+ $self->initialize(%options);
+
+ ## Create our private data
+ my %po_options;
+ $po_options{'porefs'} = $options{'porefs'};
+ $po_options{'copyright-holder'} = $options{'copyright-holder'};
+ $po_options{'msgid-bugs-address'} = $options{'msgid-bugs-address'};
+ $po_options{'package-name'} = $options{'package-name'};
+ $po_options{'package-version'} = $options{'package-version'};
+ $po_options{'wrap-po'} = $options{'wrap-po'};
+
+ # private data
+ $self->{TT} = ();
+ $self->{TT}{po_in} = Locale::Po4a::Po->new( \%po_options );
+ $self->{TT}{po_out} = Locale::Po4a::Po->new( \%po_options );
+
+ # Warning, $self->{TT}{doc_in} is an array of array:
+ # The document is split on lines, and for each array in array
+ # [0] is the line content, [1] is the reference $filename:$linenum
+ $self->{TT}{doc_in} = ();
+ $self->{TT}{doc_out} = ();
+ if ( defined $options{'verbose'} ) {
+ $self->{TT}{verbose} = $options{'verbose'};
+ }
+ if ( defined $options{'debug'} ) {
+ $self->{TT}{debug} = $options{'debug'};
+ }
+ if ( defined $options{'wrapcol'} ) {
+ if ( $options{'wrapcol'} < 0) {
+ $self->{TT}{wrapcol} = 'Inf';
+ } else {
+ $self->{TT}{wrapcol} = $options{'wrapcol'};
+ }
+ } else {
+ $self->{TT}{wrapcol} = 76;
+ }
+
+ return $self;
+}
+
+=back
+
+=head2 Manipulating document files
+
+=over 4
+
+=item read($$)
+
+Add another input document data at the end of the existing array
+C<< @{$self->{TT}{doc_in}} >>. The argument is the filename to read. If a second
+argument is provided, it is the filename to use in the references.
+
+This array C<< @{$self->{TT}{doc_in}} >> holds this input document data as an
+array of strings with alternating meanings.
+ * The string C<$textline> holding each line of the input text data.
+ * The string C<< $filename:$linenum >> holding its location and called as
+ "reference" (C<linenum> starts with 1).
+
+Please note that it does not parse anything. You should use the parse()
+function when you're done with packing input files into the document.
+
+=cut
+
+sub read() {
+ my $self = shift;
+ my $filename = shift or confess "Cannot write to a file without filename";
+ my $refname = shift or confess "Cannot write to a file without refname";
+ my $charset = shift || 'UTF-8';
+ my $linenum = 0;
+
+ use warnings FATAL => 'utf8';
+ use Encode qw(:fallbacks);
+ use PerlIO::encoding;
+ $PerlIO::encoding::fallback = FB_CROAK;
+
+ my $fh;
+ open( $fh, "<:encoding($charset)", $filename )
+ or croak wrap_msg( dgettext( "po4a", "Cannot read from %s: %s" ), $filename, $! );
+
+ # If we see a BOM while not in UTF-8, we want to croak. But this code is in an eval to deal with
+ # encoding issues. So save the BOM error until after the eval block
+ my $BOM_detected = 0;
+
+ eval {
+ while ( defined( my $textline = <$fh> ) ) {
+ $linenum++;
+ if ( $linenum == 1 && $textline =~ m/^\N{BOM}/ ) { # UTF-8 BOM detected
+ $BOM_detected = 1 if ( uc($charset) ne 'UTF-8' ); # Save the error message for after the eval{} bloc
+ $textline =~ s/^\N{BOM}//;
+ }
+ my $ref = "$refname:$linenum";
+ $textline =~ s/\r$//;
+ my @entry = ( $textline, $ref );
+ push @{ $self->{TT}{doc_in} }, @entry;
+ }
+ };
+ my $error = $@;
+ if ( length($error) ) {
+ chomp $error;
+ die wrap_msg( dgettext( "po4a", "Malformed encoding while reading from file %s with charset %s: %s" ),
+ $filename, $charset, $error );
+ }
+
+ # Croak if we need to
+ if ($BOM_detected) {
+ croak wrap_msg(
+ dgettext(
+ "po4a",
+ "The file %s starts with a BOM char indicating that its encoding is UTF-8, but you specified %s instead."
+ ),
+ $filename,
+ $charset
+ );
+ }
+
+ close $fh
+ or croak wrap_msg( dgettext( "po4a", "Cannot close %s after reading: %s" ), $filename, $! );
+}
+
+=item write($)
+
+Write the translated document to the given filename.
+
+This translated document data are provided by:
+ * C<< $self->docheader() >> holding the header text for the plugin, and
+ * C<< @{$self->{TT}{doc_out}} >> holding each line of the main translated text in the array.
+
+=cut
+
+sub write {
+ my $self = shift;
+ my $filename = shift or confess "Cannot write to a file without filename";
+ my $charset = shift || 'UTF-8';
+
+ use warnings FATAL => 'utf8';
+
+ my $fh;
+ if ( $filename eq '-' ) {
+ $fh = \*STDOUT;
+ } else {
+
+ # make sure the directory in which we should write the localized file exists
+ my $dir = $filename;
+ if ( $dir =~ m|/| ) {
+ $dir =~ s|/[^/]*$||;
+
+ File::Path::mkpath( $dir, 0, 0755 ) # Croaks on error
+ if ( length($dir) && !-e $dir );
+ }
+ open( $fh, ">:encoding($charset)", $filename )
+ or croak wrap_msg( dgettext( "po4a", "Cannot write to %s: %s" ), $filename, $! );
+ }
+
+ map { print $fh $_ } $self->docheader();
+ eval {
+ map { print $fh $_ } @{ $self->{TT}{doc_out} };
+
+ # we use the "eval {} or do {}" approach to deal with exceptions, cf https://perlmaven.com/fatal-errors-in-external-modules
+ # but we want it to fail only if there is an error. It seems to be some cases where "map" returns false even if there is no error.
+ # Thus this final 1 to evaluate to true in absence of error.
+ 1;
+ } or do {
+ my $error = $@ || 'Unknown failure';
+ chomp $error;
+ if ( $charset ne 'UTF-8' && $error =~ /^"\\x\{([^"}]*)\}"/ ) {
+
+ # Attempt to write the char that cannot be written. Very fragile code
+ binmode STDERR, ':encoding(UTF-8)';
+ my $char = chr( hex($1) );
+ die wrap_msg(
+ dgettext( "po4a", "Malformed encoding while writing char '%s' to file %s with charset %s: %s" ),
+ $char, $filename, $charset, $error );
+ } else {
+ die wrap_msg( dgettext( "po4a", "Malformed encoding while writing to file %s with charset %s: %s" ),
+ $filename, $charset, $error );
+ }
+ };
+
+ if ( $filename ne '-' ) {
+ close $fh or croak wrap_msg( dgettext( "po4a", "Cannot close %s after writing: %s" ), $filename, $! );
+ }
+
+}
+
+=back
+
+=head2 Manipulating PO files
+
+=over 4
+
+=item readpo($)
+
+Add the content of a file (which name is passed as argument) to the
+existing input PO. The old content is not discarded.
+
+=item writepo($)
+
+Write the extracted PO file to the given filename.
+
+=item stats()
+
+Returns some statistics about the translation done so far. Please note that
+it's not the same statistics than the one printed by msgfmt
+--statistic. Here, it's stats about recent usage of the PO file, while
+msgfmt reports the status of the file. It is a wrapper to the
+Locale::Po4a::Po::stats_get function applied to the input PO file. Example
+of use:
+
+ [normal use of the po4a document...]
+
+ ($percent,$hit,$queries) = $document->stats();
+ print "We found translations for $percent\% ($hit from $queries) of strings.\n";
+
+=back
+
+=cut
+
+sub getpoout {
+ return $_[0]->{TT}{po_out};
+}
+
+sub setpoout {
+ $_[0]->{TT}{po_out} = $_[1];
+}
+
+sub readpo {
+ $_[0]->{TT}{po_in}->read( $_[1] );
+}
+
+sub writepo {
+ $_[0]->{TT}{po_out}->write( $_[1] );
+}
+
+sub stats {
+ return $_[0]->{TT}{po_in}->stats_get();
+}
+
+=head2 Manipulating addenda
+
+=over 4
+
+=item addendum($)
+
+Please refer to L<po4a(7)|po4a.7> for more information on what addenda are,
+and how translators should write them. To apply an addendum to the translated
+document, simply pass its filename to this function and you are done ;)
+
+This function returns a non-null integer on error.
+
+=cut
+
+# Internal function to read the header.
+sub addendum_parse {
+ my $filename = shift;
+ my $charset = shift || 'UTF-8';
+ my $header;
+
+ my ( $errcode, $mode, $position, $boundary, $bmode, $content ) = ( 1, "", "", "", "", "" );
+
+ unless ( open( INS, "<:encoding($charset)", $filename ) ) {
+ warn wrap_msg( dgettext( "po4a", "Cannot read from %s: %s" ), $filename, $! );
+ goto END_PARSE_ADDFILE;
+ }
+
+ $PerlIO::encoding::fallback = FB_CROAK;
+ eval {
+ unless ( defined( $header = <INS> ) && $header ) {
+ warn wrap_msg( dgettext( "po4a", "Cannot read po4a header from %s." ), $filename );
+ goto END_PARSE_ADDFILE;
+ }
+ } or do {
+ my $error = $@ || 'Unknown failure';
+ chomp $error;
+ die wrap_msg( dgettext( "po4a", "Malformed encoding while reading from file %s with charset %s: %s" ),
+ $filename, $charset, $error );
+ };
+
+ unless ( $header =~ s/PO4A-HEADER://i ) {
+ warn wrap_msg( dgettext( "po4a", "First line of %s does not look like a po4a header." ), $filename );
+ goto END_PARSE_ADDFILE;
+ }
+ foreach my $part ( split( /;/, $header ) ) {
+ unless ( $part =~ m/^\s*([^=]*)=(.*)$/ ) {
+ warn wrap_msg( dgettext( "po4a", "Syntax error in po4a header of %s, near \"%s\"" ), $filename, $part );
+ goto END_PARSE_ADDFILE;
+ }
+ my ( $key, $value ) = ( $1, $2 );
+ $key = lc($key);
+ if ( $key eq 'mode' ) {
+ $mode = lc($value);
+ } elsif ( $key eq 'position' ) {
+ $position = $value;
+ } elsif ( $key eq 'endboundary' ) {
+ $boundary = $value;
+ $bmode = 'after';
+ } elsif ( $key eq 'beginboundary' ) {
+ $boundary = $value;
+ $bmode = 'before';
+ } else {
+ warn wrap_msg( dgettext( "po4a", "Invalid argument in the po4a header of %s: %s" ), $filename, $key );
+ goto END_PARSE_ADDFILE;
+ }
+ }
+
+ unless ( length($mode) ) {
+ warn wrap_msg( dgettext( "po4a", "The po4a header of %s does not define the mode." ), $filename );
+ goto END_PARSE_ADDFILE;
+ }
+ unless ( $mode eq "before" || $mode eq "after" || $mode eq "eof" ) {
+ warn wrap_msg(
+ dgettext(
+ "po4a",
+ "Mode invalid in the po4a header of %s: should be 'before', 'after' or 'eof'. Instead, it is '%s'."
+ ),
+ $filename,
+ $mode
+ );
+ goto END_PARSE_ADDFILE;
+ }
+
+ unless ( length($position) || $mode eq "eof" ) {
+ warn wrap_msg( dgettext( "po4a", "The po4a header of %s does not define the position." ), $filename );
+ goto END_PARSE_ADDFILE;
+ }
+ if ( $mode eq "after" && length($boundary) == 0 ) {
+ warn wrap_msg( dgettext( "po4a", "No ending boundary given in the po4a header, but mode=after." ) );
+ goto END_PARSE_ADDFILE;
+ }
+ if ( $mode eq "eof" && length($position) ) {
+ warn wrap_msg( dgettext( "po4a", "No position needed when mode=eof." ) );
+ goto END_PARSE_ADDFILE;
+ }
+ if ( $mode eq "eof" && length($boundary) ) {
+ warn wrap_msg( dgettext( "po4a", "No ending boundary needed when mode=eof." ) );
+ goto END_PARSE_ADDFILE;
+ }
+
+ eval {
+ while ( defined( my $line = <INS> ) ) {
+ $content .= $line;
+ }
+ };
+ my $error = $@;
+ if ( length($error) ) {
+ chomp $error;
+ die wrap_msg( dgettext( "po4a", "Malformed encoding while reading from file %s with charset %s: %s" ),
+ $filename, $charset, $error );
+ }
+ close INS;
+
+ $errcode = 0;
+ END_PARSE_ADDFILE:
+ return ( $errcode, $mode, $position, $boundary, $bmode, $content );
+}
+
+sub mychomp {
+ my ($str) = shift;
+ chomp($str);
+ return $str;
+}
+
+sub addendum {
+ my ( $self, $filename ) = @_;
+
+ print STDERR wrap_mod( "po4a::transtractor::addendum", "Apply addendum %s", $filename )
+ if $self->debug();
+ unless ($filename) {
+ warn wrap_msg( dgettext( "po4a", "Cannot apply addendum when not given the filename" ) );
+ return 0;
+ }
+ die wrap_msg( dgettext( "po4a", "Addendum %s does not exist." ), $filename )
+ unless -e $filename;
+
+ my ( $errcode, $mode, $position, $boundary, $bmode, $content ) =
+ addendum_parse( $filename, $self->{TT}{'addendum_charset'} );
+ return 0 if ($errcode);
+
+ # In order to make addendum more intuitive, each array item of
+ # @{$self->{TT}{doc_out}} must not have internal "\n". But previous parser
+ # code may put multiple internal "\n" to address things like placeholder
+ # tag handling. Let's normalize array content.
+ # Use internal "\n" as delimiter but keep it by using the lookbehind trick.
+ @{ $self->{TT}{doc_out} } = map { split /(?<=\n)/, $_ } @{ $self->{TT}{doc_out} };
+
+ # Bugs around addendum is hard to understand. So let's print involved data explicitly.
+ if ( $self->debug() ) {
+ print STDERR "Addendum position regex=$position\n";
+ print STDERR "Addendum mode=$mode\n";
+ if ( $mode eq "after" ) {
+ print STDERR "Addendum boundary regex=$boundary\n";
+ print STDERR "Addendum boundary mode=$bmode\n";
+ }
+ print STDERR "Addendum content (begin):\n";
+ print STDERR "$content";
+ print STDERR "Addendum content (end)\n";
+ print STDERR "Output items searched for the addendum insertion position:\n";
+ foreach my $item ( @{ $self->{TT}{doc_out} } ) {
+ print STDERR $item;
+ print STDERR "\n----- [ search item end marker with a preceding newline ] -----\n";
+ }
+ print STDERR "Start searching addendum insertion position...\n";
+ }
+
+ unless ( $mode eq 'eof' ) {
+ my $found = scalar grep { /$position/ } @{ $self->{TT}{doc_out} };
+ if ( $found == 0 ) {
+ warn wrap_msg( dgettext( "po4a", "No candidate position for the addendum %s." ), $filename );
+ return 0;
+ }
+ if ( $found > 1 ) {
+ warn wrap_msg( dgettext( "po4a", "More than one candidate position found for the addendum %s." ),
+ $filename );
+ return 0;
+ }
+ }
+
+ if ( $mode eq "eof" ) {
+ push @{ $self->{TT}{doc_out} }, $content;
+ } elsif ( $mode eq "before" ) {
+ if ( $self->verbose() > 1 || $self->debug() ) {
+ map {
+ print STDERR wrap_msg( dgettext( "po4a", "Addendum '%s' applied before this line: %s" ), $filename, $_ )
+ if (/$position/);
+ } @{ $self->{TT}{doc_out} };
+ }
+ @{ $self->{TT}{doc_out} } = map { /$position/ ? ( $content, $_ ) : $_ } @{ $self->{TT}{doc_out} };
+ } else {
+ my @newres = ();
+
+ do {
+ # make sure it doesn't whine on empty document
+ my $line = scalar @{ $self->{TT}{doc_out} } ? shift @{ $self->{TT}{doc_out} } : "";
+ push @newres, $line;
+ my $outline = mychomp($line);
+ $outline =~ s/^[ \t]*//;
+
+ if ( $line =~ m/$position/ ) {
+ while ( $line = shift @{ $self->{TT}{doc_out} } ) {
+ last if ( $line =~ /$boundary/ );
+ push @newres, $line;
+ }
+ if ( defined $line ) {
+ if ( $bmode eq 'before' ) {
+ print wrap_msg( dgettext( "po4a", "Addendum '%s' applied before this line: %s" ),
+ $filename, $outline )
+ if ( $self->verbose() > 1 || $self->debug() );
+ push @newres, $content;
+ push @newres, $line;
+ } else {
+ print wrap_msg( dgettext( "po4a", "Addendum '%s' applied after the line: %s." ),
+ $filename, $outline )
+ if ( $self->verbose() > 1 || $self->debug() );
+ push @newres, $line;
+ push @newres, $content;
+ }
+ } else {
+ print wrap_msg( dgettext( "po4a", "Addendum '%s' applied at the end of the file." ), $filename )
+ if ( $self->verbose() > 1 || $self->debug() );
+ push @newres, $content;
+ }
+ }
+ } while ( scalar @{ $self->{TT}{doc_out} } );
+ @{ $self->{TT}{doc_out} } = @newres;
+ }
+ print STDERR wrap_mod( "po4a::transtractor::addendum", "Done with addendum %s", $filename )
+ if $self->debug();
+ return 1;
+}
+
+=back
+
+=head1 INTERNAL FUNCTIONS used to write derivative parsers
+
+=head2 Getting input, providing output
+
+Four functions are provided to get input and return output. They are very
+similar to shift/unshift and push/pop of Perl.
+
+ * Perl shift returns the first array item and drop it from the array.
+ * Perl unshift prepends an item to the array as the first array item.
+ * Perl pop returns the last array item and drop it from the array.
+ * Perl push appends an item to the array as the last array item.
+
+The first pair is about input, while the second is about output. Mnemonic: in
+input, you are interested in the first line, what shift gives, and in output
+you want to add your result at the end, like push does.
+
+=over 4
+
+=item shiftline()
+
+This function returns the first line to be parsed and its corresponding
+reference (packed as an array) from the array C<< @{$self->{TT}{doc_in}} >> and
+drop these first 2 array items. Here, the reference is provided by a string
+C<< $filename:$linenum >>.
+
+=item unshiftline($$)
+
+Unshifts the last shifted line of the input document and its corresponding
+reference back to the head of C<< {$self->{TT}{doc_in}} >>.
+
+=item pushline($)
+
+Push a new line to the end of C<< {$self->{TT}{doc_out}} >>.
+
+=item popline()
+
+Pop the last pushed line from the end of C<< {$self->{TT}{doc_out}} >>.
+
+=back
+
+=cut
+
+sub shiftline {
+ my ( $line, $ref ) = ( shift @{ $_[0]->{TT}{doc_in} }, shift @{ $_[0]->{TT}{doc_in} } );
+ return ( $line, $ref );
+}
+
+sub unshiftline {
+ my $self = shift;
+ unshift @{ $self->{TT}{doc_in} }, @_;
+}
+
+sub pushline { push @{ $_[0]->{TT}{doc_out} }, $_[1] if defined $_[1]; }
+sub popline { return pop @{ $_[0]->{TT}{doc_out} }; }
+
+=head2 Marking strings as translatable
+
+One function is provided to handle the text which should be translated.
+
+=over 4
+
+=item translate($$$)
+
+Mandatory arguments:
+
+=over 2
+
+=item -
+
+A string to translate
+
+=item -
+
+The reference of this string (i.e. position in inputfile)
+
+=item -
+
+The type of this string (i.e. the textual description of its structural role;
+used in Locale::Po4a::Po::gettextization(); see also L<po4a(7)|po4a.7>,
+section B<Gettextization: how does it work?>)
+
+=back
+
+This function can also take some extra arguments. They must be organized as
+a hash. For example:
+
+ $self->translate("string","ref","type",
+ 'wrap' => 1);
+
+=over
+
+=item B<wrap>
+
+boolean indicating whether we can consider that whitespaces in string are
+not important. If yes, the function canonizes the string before looking for
+a translation or extracting it, and wraps the translation.
+
+=item B<wrapcol>
+
+the column at which we should wrap (default: the value of B<wrapcol> specified
+during creation of the TransTractor or 76).
+
+The negative value will be substracted from the default.
+
+=item B<comment>
+
+an extra comment to add to the entry.
+
+=back
+
+Actions:
+
+=over 2
+
+=item -
+
+Pushes the string, reference and type to po_out.
+
+=item -
+
+Returns the translation of the string (as found in po_in) so that the
+parser can build the doc_out.
+
+=item -
+
+Handles the charsets to recode the strings before sending them to
+po_out and before returning the translations.
+
+=back
+
+=back
+
+=cut
+
+sub translate {
+ my $self = shift;
+ my ( $string, $ref, $type ) = ( shift, shift, shift );
+ my (%options) = @_;
+
+ return "" unless length($string);
+
+ # my %validoption;
+ # map { $validoption{$_}=1 } (qw(wrap wrapcoll));
+ # foreach (keys %options) {
+ # Carp::confess "internal error: translate() called with unknown arg $_. Valid options: $validoption"
+ # unless $validoption{$_};
+ # }
+
+ if ( !defined $options{'wrapcol'} ) {
+ $options{'wrapcol'} = $self->{TT}{wrapcol}
+ } elsif ( $options{'wrapcol'} < 0 ) {
+ $options{'wrapcol'} = $self->{TT}{wrapcol} + $options{'wrapcol'};
+ }
+ my $transstring = $self->{TT}{po_in}->gettext(
+ $string,
+ 'wrap' => $options{'wrap'} || 0,
+ 'wrapcol' => $options{'wrapcol'}
+ );
+
+ # the comments provided by the modules are automatic comments from the PO point of view
+ $self->{TT}{po_out}->push(
+ 'msgid' => $string,
+ 'reference' => $ref,
+ 'type' => $type,
+ 'automatic' => $options{'comment'},
+ 'flags' => $options{'flags'},
+ 'wrap' => $options{'wrap'} || 0,
+ );
+
+ if ( $options{'wrap'} || 0 ) {
+ $transstring =~ s/( *)$//s;
+ my $trailing_spaces = $1 || "";
+ $transstring =~ s/(?<!\\) +$//gm;
+ $transstring .= $trailing_spaces;
+ }
+
+ return $transstring;
+}
+
+=head2 Misc functions
+
+=over 4
+
+=item verbose()
+
+Returns if the verbose option was passed during the creation of the
+TransTractor.
+
+=cut
+
+sub verbose {
+ if ( defined $_[1] ) {
+ $_[0]->{TT}{verbose} = $_[1];
+ } else {
+ return $_[0]->{TT}{verbose} || 0; # undef and 0 have the same meaning, but one generates warnings
+ }
+}
+
+=item debug()
+
+Returns if the debug option was passed during the creation of the
+TransTractor.
+
+=cut
+
+sub debug {
+ return $_[0]->{TT}{debug};
+}
+
+=item get_in_charset()
+
+This function return the charset that was provided as master charset
+
+=cut
+
+sub get_in_charset() {
+ return $_[0]->{TT}{'file_in_charset'};
+}
+
+=item get_out_charset()
+
+This function will return the charset that should be used in the output
+document (usually useful to substitute the input document's detected charset
+where it has been found).
+
+It will use the output charset specified in the command line. If it wasn't
+specified, it will use the input PO's charset, and if the input PO has the
+default "CHARSET", it will return the input document's charset, so that no
+encoding is performed.
+
+=cut
+
+sub get_out_charset {
+ my $self = shift;
+
+ # Prefer the value specified on the command line
+ return $self->{TT}{'file_out_charset'}
+ if length( $self->{TT}{'file_out_charset'} );
+
+ return $self->{TT}{po_in}->get_charset if $self->{TT}{po_in}->get_charset ne 'CHARSET';
+
+ return $self->{TT}{'file_in_charset'} if length( $self->{TT}{'file_in_charset'} );
+
+ return 'UTF-8';
+}
+
+# Push the translation of a Yaml document or Yaml Front-Matter header, parsed by YAML::Tiny in any case
+# $is_yfm is a boolean indicating whether we are dealing with a Front Matter (true value) or whole document (false value)
+sub handle_yaml {
+ my ( $self, $is_yfm, $blockref, $yamlarray, $yfm_keys, $yfm_skip_array, $yfm_paths ) = @_;
+
+ die "Empty YAML " . ( $is_yfm ? "Front Matter" : "document" ) unless ( length($yamlarray) > 0 );
+
+ my ( $indent, $ctx ) = ( 0, "" );
+ foreach my $cursor (@$yamlarray) {
+
+ # An empty document
+ if ( !defined $cursor ) {
+ $self->pushline("---\n");
+
+ # Do nothing
+
+ # A scalar document
+ } elsif ( !ref $cursor ) {
+ $self->pushline("---\n");
+ $self->pushline(
+ format_scalar(
+ $self->translate(
+ $cursor, $blockref,
+ "YAML " . ( $is_yfm ? "Front Matter " : "" ) . "(scalar)",
+ "wrap" => 0
+ )
+ )
+ );
+
+ # A list at the root
+ } elsif ( ref $cursor eq 'ARRAY' ) {
+ if (@$cursor) {
+ $self->pushline("---\n");
+ do_array( $self, $is_yfm, $blockref, $cursor, $indent, $ctx, $yfm_keys, $yfm_skip_array, $yfm_paths );
+ } else {
+ $self->pushline("---[]\n");
+ }
+
+ # A hash at the root
+ } elsif ( ref $cursor eq 'HASH' ) {
+ if (%$cursor) {
+ $self->pushline("---\n");
+ do_hash( $self, $is_yfm, $blockref, $cursor, $indent, $ctx, $yfm_keys, $yfm_skip_array, $yfm_paths );
+ } else {
+ $self->pushline("--- {}\n");
+ }
+
+ } else {
+ die( "Cannot serialize " . ref($cursor) );
+ }
+ }
+
+ # Escape the string to make it valid in YAML.
+ # This is very similar to YAML::Tiny::_dump_scalar but does not do the internal->UTF-8 decoding,
+ # as the translations that we feed into this function are already in UTF-8
+ sub format_scalar {
+ my $string = $_[0];
+ my $is_key = $_[1];
+
+ return '~' unless defined $string;
+ return "''" unless length $string;
+ if ( Scalar::Util::looks_like_number($string) ) {
+
+ # keys and values that have been used as strings get quoted
+ if ($is_key) {
+ return qq['$string'];
+ } else {
+ return $string;
+ }
+ }
+ if ( $string =~ /[\\\'\n]/ ) {
+ $string =~ s/\\/\\\\/g;
+ $string =~ s/"/\\"/g;
+ $string =~ s/\n/\\n/g;
+ return qq|"$string"|;
+ }
+ if ( $string =~ /(?:^[~!@#%&*|>?:,'"`{}\[\]]|^-+$|\s|:\z)/ ) {
+ return "'$string'";
+ }
+ return $string;
+ }
+
+ sub do_array {
+ my ( $self, $is_yfm, $blockref, $array, $indent, $ctx, $yfm_keys, $yfm_skip_array, $yfm_paths ) = @_;
+ foreach my $el (@$array) {
+ my $header = ( ' ' x $indent ) . '- ';
+ my $type = ref $el;
+ if ( !$type ) {
+ if ($yfm_skip_array) {
+ $self->pushline( $header . YAML::Tiny::_dump_scalar( "dummy", $el, 0 ) . "\n" );
+ } else {
+ $self->pushline(
+ $header
+ . format_scalar(
+ $self->translate(
+ $el, $blockref,
+ ( $is_yfm ? "Yaml Front Matter " : "" ) . "Array Element:$ctx", "wrap" => 0
+ )
+ )
+ . "\n"
+ );
+ }
+
+ } elsif ( $type eq 'ARRAY' ) {
+ if (@$el) {
+ $self->pushline( $header . "\n" );
+ do_array( $self, $is_yfm, $blockref, $el, $indent + 1,
+ $ctx, $yfm_keys, $yfm_skip_array, $yfm_paths );
+ } else {
+ $self->pushline( $header . " []\n" );
+ }
+
+ } elsif ( $type eq 'HASH' ) {
+ if ( keys %$el ) {
+ $self->pushline( $header . "\n" );
+ do_hash( $self, $is_yfm, $blockref, $el, $indent + 1, $ctx, $yfm_keys, $yfm_skip_array,
+ $yfm_paths );
+ } else {
+ $self->pushline( $header . " {}\n" );
+ }
+
+ } else {
+ die "YAML $type references not supported";
+ }
+ }
+ }
+
+ sub do_hash {
+ my ( $self, $is_yfm, $blockref, $hash, $indent, $ctx, $yfm_keys, $yfm_skip_array, $yfm_paths ) = @_;
+
+ foreach my $name ( sort keys %$hash ) {
+ my $el = $hash->{$name} // "";
+ my $header = ( ' ' x $indent ) . YAML::Tiny::_dump_scalar( "dummy", $name, 1 ) . ":";
+
+ unless ( length($el) > 0 ) { # empty element, as in "tags: " with nothing after the column
+ $self->pushline( $header . "\n" );
+ next;
+ }
+
+ my $type = ref $el;
+ if ( !$type ) {
+ my %keys = %{$yfm_keys};
+ my %paths = %{$yfm_paths};
+ my $path = "$ctx $name" =~ s/^\s+|\s+$//gr; # Need to trim the path, at least when there is no ctx yet
+
+ if ( ( $el eq 'false' ) or ( $el eq 'true' ) ) { # Do not translate nor quote booleans
+ $self->pushline("$header $el\n");
+ } elsif (
+ ( scalar %keys > 0 && exists $keys{$name} ) or # the key we need is provided
+ ( scalar %paths > 0 && exists $paths{$path} ) or # that path is provided
+ ( scalar %keys == 0 && scalar %paths == 0 ) # no key and no path provided
+ )
+ {
+ my $translation = $self->translate(
+ $el, $blockref,
+ ( $is_yfm ? "Yaml Front Matter " : "" ) . "Hash Value:$ctx $name",
+ "wrap" => 0
+ );
+
+ # add extra quotes to the parameter, as a protection to the extra chars that the translator could add
+ $self->pushline( $header . ' ' . format_scalar($translation) . "\n" );
+ } else {
+
+ # Work around a bug in YAML::Tiny that quotes numbers
+ # See https://github.com/Perl-Toolchain-Gang/YAML-Tiny#additional-perl-specific-notes
+ if ( Scalar::Util::looks_like_number($el) ) {
+ $self->pushline("$header $el\n");
+ } else {
+ $self->pushline( $header . ' ' . YAML::Tiny::_dump_scalar( "dummy", $el ) . "\n" );
+ }
+ }
+
+ } elsif ( $type eq 'ARRAY' ) {
+ if (@$el) {
+ $self->pushline( $header . "\n" );
+ do_array(
+ $self, $is_yfm, $blockref, $el, $indent + 1, "$ctx $name",
+ $yfm_keys, $yfm_skip_array, $yfm_paths
+ );
+ } else {
+ $self->pushline( $header . " []\n" );
+ }
+
+ } elsif ( $type eq 'HASH' ) {
+ if ( keys %$el ) {
+ $self->pushline( $header . "\n" );
+ do_hash(
+ $self, $is_yfm, $blockref, $el, $indent + 1, "$ctx $name",
+ $yfm_keys, $yfm_skip_array, $yfm_paths
+ );
+ } else {
+ $self->pushline( $header . " {}\n" );
+ }
+
+ } else {
+ die "YAML $type references not supported";
+ }
+ }
+ }
+}
+
+=back
+
+=head1 FUTURE DIRECTIONS
+
+One shortcoming of the current TransTractor is that it can't handle
+translated document containing all languages, like debconf templates, or
+.desktop files.
+
+To address this problem, the only interface changes needed are:
+
+=over 2
+
+=item -
+
+take a hash as po_in_name (a list per language)
+
+=item -
+
+add an argument to translate to indicate the target language
+
+=item -
+
+make a pushline_all function, which would make pushline of its content for
+all languages, using a map-like syntax:
+
+ $self->pushline_all({ "Description[".$langcode."]=".
+ $self->translate($line,$ref,$langcode)
+ });
+
+=back
+
+Will see if it's enough ;)
+
+=head1 AUTHORS
+
+ Denis Barbier <barbier@linuxfr.org>
+ Martin Quinson (mquinson#debian.org)
+ Jordi Vilalta <jvprat@gmail.com>
+
+=cut
+
+1;
diff --git a/lib/Locale/Po4a/Wml.pm b/lib/Locale/Po4a/Wml.pm
new file mode 100644
index 0000000..8c93d3b
--- /dev/null
+++ b/lib/Locale/Po4a/Wml.pm
@@ -0,0 +1,210 @@
+#!/usr/bin/perl -w
+
+# Po4a::Wml.pm
+#
+# extract and translate translatable strings from a WML (web markup language) documents
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+########################################################################
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Wml - convert WML (web markup language) documents from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::Wml is a module to help the translation of WML documents into
+other [human] languages. Do not mixup the WML we are speaking about here
+(web markup language) and the WAP crap used on cell phones.
+
+Please note that this module relies upon the Locale::Po4a::Xhtml
+module, which also relies upon the Locale::Po4a::Xml module. This
+means that all tags for web page expressions are assumed to be written
+in the XHTML syntax.
+
+=head1 OPTIONS ACCEPTED BY THIS MODULE
+
+NONE.
+
+=head1 STATUS OF THIS MODULE
+
+This module works for some simple documents, but is still young.
+Currently, the biggest issue of the module is probably that it cannot
+handle documents that contain non-XML inline tags such as <email
+"foo@example.org">, which are often defined in the WML. Improvements
+will be added in the future releases.
+
+=cut
+
+package Locale::Po4a::Wml;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+require Exporter;
+use vars qw(@ISA @EXPORT);
+@ISA = qw(Locale::Po4a::Xhtml);
+@EXPORT = qw();
+
+use Locale::Po4a::Common;
+use Locale::Po4a::Xhtml;
+use File::Temp;
+
+sub initialize {
+ my $self = shift;
+ my %options = @_;
+
+ $self->SUPER::initialize(%options);
+
+ print "Call treat_options\n" if $self->{options}{'debug'};
+ $self->treat_options;
+}
+
+sub read {
+ my ( $self, $filename, $refname, $charset ) = @_;
+ my $tmp_filename;
+ ( undef, $tmp_filename ) = File::Temp::tempfile(
+ "po4aXXXX",
+ DIR => File::Spec->tmpdir(),
+ SUFFIX => ".xml",
+ OPEN => 0,
+ UNLINK => 0
+ ) or die wrap_msg( gettext("Cannot create a temporary XML file: %s"), $! );
+ my $file;
+ open FILEIN, "$filename" or die "Cannot read $filename: $!\n";
+ {
+ $/ = undef;
+ $file = <FILEIN>;
+ }
+ $/ = "\n";
+
+ # Mask perl cruft out of XML sight
+ while (( $file =~ m|^(.*?)<perl>(.*?)</perl>(.*?)$|ms )
+ or ( $file =~ m|^(.*?)<:(.*?):>(.*)$|ms ) )
+ {
+ my ( $pre, $in, $post ) = ( $1, $2, $3 );
+ $in =~ s/</PO4ALT/g;
+ $in =~ s/>/PO4AGT/g;
+ $file = "${pre}<!--PO4ABEGINPERL${in}PO4AENDPERL-->$post";
+ }
+
+ # Mask mp4h cruft
+ while ( $file =~ s|^#(.*)$|<!--PO4ASHARPBEGIN$1PO4ASHARPEND-->|m ) {
+ my $line = $1;
+ print STDERR "PROTECT HEADER: $line\n"
+ if $self->{options}{'debug'};
+
+ # If the wml tag has a title attribute, use a fake
+ # <title> xml tag to enable the extraction
+ # for translation in the xml parser.
+ if ( $line =~ m/title="([^"]*)"/ ) {
+ $file = "<title>$1</title>\n" . $file;
+ }
+ }
+
+ # Validate define-tag tag's argument
+ $file =~ s|(<define-tag\s+)([^\s>]+)|$1PO4ADUMMYATTR="$2"|g;
+
+ # Flush the result to disk
+ open OUTFILE, ">$tmp_filename";
+ print OUTFILE $file;
+ close INFILE;
+ close OUTFILE or die "Cannot write $tmp_filename: $!\n";
+
+ push @{ $self->{DOCXML}{infile} }, $tmp_filename;
+ $self->{DOCWML}{$tmp_filename} = $filename;
+ $self->Locale::Po4a::TransTractor::read( $tmp_filename, $refname, $charset );
+ unlink "$tmp_filename";
+}
+
+sub parse {
+ my $self = shift;
+
+ foreach my $filename ( @{ $self->{DOCXML}{infile} } ) {
+ $self->Locale::Po4a::Xml::parse_file($filename);
+ my $org_filename = $self->{DOCWML}{$filename};
+
+ # Fix the references
+ foreach my $msgid ( keys %{ $self->{TT}{po_out}{po} } ) {
+ $self->{TT}{po_out}{po}{$msgid}{'reference'} =~ s|$filename(:\d+)|$org_filename$1|o;
+ }
+
+ # Get the document back (undoing our WML masking)
+ # FIXME: need to join the file first, and then split?
+ my @doc_out;
+ my $cnt = 0;
+ my $title_node;
+ my $title;
+
+ foreach my $line ( @{ $self->{TT}{doc_out} } ) {
+ if ( !$cnt ) {
+ if ( !$title_node && $line =~ m/<title>/ ) {
+ $title_node = $line;
+ } elsif ($title_node) {
+ $title_node .= $line;
+ if ( $title_node =~ m/<title>(.*?)<\/title>/ ) {
+ $title = $1;
+ $cnt = 1;
+ }
+ } else {
+ $cnt = 1;
+ }
+ } else {
+ if ( $line =~ s/^<!--PO4ASHARPBEGIN(.*?)PO4ASHARPEND-->/#$1/mg && $title ) {
+ $line =~ s/title="[^"]*"$/title="$title"/mg;
+ }
+ $line =~ s/<!--PO4ABEGINPERL(.*?)PO4AENDPERL-->/<:$1:>/sg;
+ $line =~ s/(<define-tag\s+)PO4ADUMMYATTR="([^"]*)"/$1$2/g;
+ $line =~ s/PO4ALT/</sg;
+ $line =~ s/PO4AGT/>/sg;
+ push @doc_out, $line;
+ }
+ }
+
+ # Do a simple left trim
+ foreach my $line (@doc_out) {
+ if ( $line =~ m/\s+/ ) {
+ shift @doc_out;
+ } else {
+ last;
+ }
+ }
+
+ $self->{TT}{doc_out} = \@doc_out;
+ }
+}
+
+1;
+
+=head1 AUTHORS
+
+ Martin Quinson (mquinson#debian.org)
+ Noriada Kobayashi <nori1@dolphin.c.u-tokyo.ac.jp>
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright © 2005 SPI, Inc.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
diff --git a/lib/Locale/Po4a/Xhtml.pm b/lib/Locale/Po4a/Xhtml.pm
new file mode 100644
index 0000000..7d11db3
--- /dev/null
+++ b/lib/Locale/Po4a/Xhtml.pm
@@ -0,0 +1,245 @@
+#!/usr/bin/perl
+
+# Po4a::Xhtml.pm
+#
+# extract and translate translatable strings from XHTML documents.
+#
+# This code extracts plain text from tags and attributes from strict XHTML
+# documents.
+#
+# Copyright © 2005 Yves Rütschlé <po4a@rutschle.net>
+# Copyright © 2007-2008 Nicolas François <nicolas.francois@centraliens.net>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+########################################################################
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Xhtml - convert XHTML documents from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::Xhtml is a module to help the translation of XHTML documents into
+other [human] languages.
+
+=head1 OPTIONS ACCEPTED BY THIS MODULE
+
+These are this module's particular options:
+
+=over 4
+
+=item B<includessi>[B<=>I<rootpath>]
+
+Include files specified by an include SSI (Server Side Includes) element
+(e.g. <!--#include virtual="/foo/bar.html" -->).
+
+B<Note:> You should use it only for static files.
+
+An additional I<rootpath> parameter can be specified. It specifies the root
+path to find files included by a B<virtual> attribute.
+
+=back
+
+=head1 STATUS OF THIS MODULE
+
+This module is fully functional, as it relies in the L<Locale::Po4a::Xml>
+module. This only defines the translatable tags and attributes.
+
+"It works for me", which means I use it successfully on my personal Web site.
+However, YMMV: please let me know if something doesn't work for you.
+
+=head1 SEE ALSO
+
+L<Locale::Po4a::TransTractor(3pm)>, L<Locale::Po4a::Xml(3pm)>, L<po4a(7)|po4a.7>
+
+=head1 AUTHORS
+
+ Yves Rütschlé <po4a@rutschle.net>
+ Nicolas François <nicolas.francois@centraliens.net>
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright © 2004 Yves Rütschlé <po4a@rutschle.net>
+ Copyright © 2007-2008 Nicolas François <nicolas.francois@centraliens.net>
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
+
+package Locale::Po4a::Xhtml;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+use Locale::Po4a::Xml;
+use vars qw(@tag_types);
+*tag_types = \@Locale::Po4a::Xml::tag_types;
+
+use Locale::Po4a::Common;
+use Carp qw(croak);
+
+use vars qw(@ISA);
+@ISA = qw(Locale::Po4a::Xml);
+
+sub tag_extract_SSI {
+ my ( $self, $remove ) = ( shift, shift );
+ my ( $eof, @tag ) = $self->get_string_until(
+ "-->",
+ {
+ include => 1,
+ remove => $remove,
+ unquoted => 1
+ }
+ );
+ my ( $t, $r ) = @tag;
+ if ( $t =~ m/<!--#include (file|virtual)="(.*?)"\s-->/s ) {
+ my $includefile;
+ if ( $1 eq "file" ) {
+ $includefile = ".";
+ } else {
+ $includefile = $self->{options}{'includessi'};
+ }
+ $includefile .= $2;
+ if ( !$remove ) {
+ $self->get_string_until(
+ "-->",
+ {
+ include => 1,
+ remove => 1,
+ unquoted => 1
+ }
+ );
+ }
+ my $linenum = 0;
+ my @include;
+
+ open( my $in, $includefile )
+ or croak wrap_mod( "po4a::xml", dgettext( "po4a", "Cannot read from %s: %s" ), $includefile, $! );
+ while ( defined( my $includeline = <$in> ) ) {
+ $linenum++;
+ my $includeref = $includefile . ":$linenum";
+ push @include, ( $includeline, $includeref );
+ }
+ close $in
+ or croak wrap_mod( "po4a::xml", dgettext( "po4a", "Cannot close %s after reading: %s" ), $includefile, $! );
+
+ while (@include) {
+ my ( $ir, $il ) = ( pop @include, pop @include );
+ $self->unshiftline( $il, $ir );
+ }
+ $t =~ s/<!--#include/<!-- SSI included by po4a: /;
+ $self->unshiftline( $t, $r );
+ }
+ return ( $eof, @tag );
+}
+
+sub initialize {
+ my $self = shift;
+ my %options = @_;
+
+ $self->{options}{'includessi'} = '';
+
+ $self->SUPER::initialize(%options);
+
+ $self->{options}{'wrap'} = 1;
+ $self->{options}{'doctype'} = $self->{options}{'doctype'} || 'html';
+
+ # Default tags are translated (text rewrapped), and introduce a break.
+ # The following list indicates the list of tags which should be
+ # translated without rewrapping.
+ $self->{options}{'_default_translated'} .= '
+ W<pre>
+ ';
+
+ # The following list indicates the list of tags which should be
+ # translated inside the current block, without introducing a break.
+ $self->{options}{'_default_inline'} .= '
+ <a>
+ <abbr>
+ <acronym>
+ <b>
+ <big>
+ <bdo>
+ <button>
+ <cite>
+ <code>
+ <del>
+ <dfn>
+ <em>
+ <i>
+ <ins>
+ <input>
+ <kbd>
+ <label>
+ <object>
+ <q>
+ <samp>
+ <select>
+ <small>
+ <span>
+ <strong>
+ <sub>
+ <sup>
+ <textarea>
+ <tt>
+ <u>
+ <var>
+ ';
+
+ # Ignored tags: <img>
+ # Technically, <img> is an inline tag, but setting it as such is
+ # annoying, and not usually useful, unless you use images to
+ # write text (in which case you have bigger problems than this
+ # program not inlining img: you now have to translate all your
+ # images. That'll teach you).
+ # If you choose to translate images, you may also want to set
+ # <map> as placeholder and <area> as inline.
+
+ $self->{options}{'_default_attributes'} .= '
+ alt
+ lang
+ title
+ ';
+
+ $self->{options}{'optionalclosingtag'} = 1;
+
+ print "Call treat_options\n" if $self->{options}{'debug'};
+ $self->treat_options;
+
+ if ( defined $self->{options}{'includessi'}
+ and length $self->{options}{'includessi'} )
+ {
+ foreach (@tag_types) {
+ if ( $_->{beginning} eq "!--#" ) {
+ $_->{f_extract} = \&tag_extract_SSI;
+ }
+ }
+
+ # FIXME: the directory may be named "1" ;(
+ if ( $self->{options}{'includessi'} eq "1" ) {
+ $self->{options}{'includessi'} = ".";
+ }
+ }
+}
diff --git a/lib/Locale/Po4a/Xml.pm b/lib/Locale/Po4a/Xml.pm
new file mode 100644
index 0000000..3666ac7
--- /dev/null
+++ b/lib/Locale/Po4a/Xml.pm
@@ -0,0 +1,2539 @@
+#!/usr/bin/perl
+
+# Po4a::Xml.pm
+#
+# extract and translate translatable strings from XML documents.
+#
+# This code extracts plain text from tags and attributes from generic
+# XML documents, and it can be used as a base to build modules for
+# XML-based documents.
+#
+# Copyright © 2004 Jordi Vilalta <jvprat@gmail.com>
+# Copyright © 2008-2009 Nicolas François <nicolas.francois@centraliens.net>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+########################################################################
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Xml - convert XML documents and derivates from/to PO files
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+Locale::Po4a::Xml is a module to help the translation of XML documents into
+other [human] languages. It can also be used as a base to build modules for
+XML-based documents.
+
+=cut
+
+package Locale::Po4a::Xml;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+require Exporter;
+use vars qw(@ISA @EXPORT);
+@ISA = qw(Locale::Po4a::TransTractor);
+@EXPORT = qw(new initialize @tag_types);
+
+use Locale::Po4a::TransTractor;
+use Locale::Po4a::Common;
+use Carp qw(croak);
+use File::Basename;
+use File::Spec;
+
+#It will maintain the path from the root tag to the current one
+my @path;
+
+#It will contain a list of external entities and their attached paths
+my %entities;
+
+my @comments;
+my %translate_options_cache;
+my $input_charset;
+
+# This shiftline function returns the next line of the document being parsed
+# (and its reference).
+# For XML, it overloads the Transtractor shiftline to handle:
+# - text file inclusion if includeexternal option is set
+# - dropping of the text in the XML comment <!--... -->
+my $_shiftline_in_comment = 0;
+
+sub shiftline {
+ my $self = shift;
+
+ # call Transtractor's shiftline
+ my ( $line, $ref ) = $self->SUPER::shiftline();
+ return ( $line, $ref ) if ( not defined $line );
+
+ if ( $self->{options}{'includeexternal'} ) {
+ my $tmp;
+
+ for my $k ( keys %entities ) {
+ if ( $line =~ m/^(.*?)&$k;(.*)$/s ) {
+ my ( $before, $after ) = ( $1, $2 );
+ my $linenum = 0;
+ my @textentries;
+
+ $tmp = $before;
+ my $tmp_in_comment = 0;
+ if ($_shiftline_in_comment) {
+ if ( $before =~ m/^.*?-->(.*)$/s ) {
+ $tmp = $1;
+ $tmp_in_comment = 0;
+ } else {
+ $tmp_in_comment = 1;
+ }
+ }
+ if ( $tmp_in_comment == 0 ) {
+ while ( $tmp =~ m/^.*?<!--.*?-->(.*)$/s ) {
+ $tmp = $1;
+ }
+ if ( $tmp =~ m/<!--/s ) {
+ $tmp_in_comment = 1;
+ }
+ }
+ next if ($tmp_in_comment);
+
+ open( my $in, '<:encoding(' . ( $input_charset // 'UTF-8' ) . ')', $entities{$k} )
+ or croak wrap_mod( "po4a::xml::shiftline", dgettext( "po4a", "%s: Cannot read from %s: %s" ),
+ $ref, $entities{$k}, $! );
+ while ( defined( my $textline = <$in> ) ) {
+ $linenum++;
+ my $textref = $entities{$k} . ":$linenum";
+ push @textentries, ( $textline, $textref );
+ }
+ close $in
+ or
+ croak wrap_mod( "po4a::xml::shiftline", dgettext( "po4a", "%s: Cannot close %s after reading: %s" ),
+ $ref, $entities{$k}, $! );
+
+ push @textentries, ( $after, $ref );
+ $line = $before . ( shift @textentries );
+ $ref .= " " . ( shift @textentries );
+ $self->unshiftline(@textentries);
+ }
+ }
+
+ $tmp = $line;
+ if ($_shiftline_in_comment) {
+ if ( $line =~ m/^.*?-->(.*)$/s ) {
+ $tmp = $1;
+ $_shiftline_in_comment = 0;
+ } else {
+ $_shiftline_in_comment = 1;
+ }
+ }
+ if ( $_shiftline_in_comment == 0 ) {
+ while ( $tmp =~ m/^.*?<!--.*?-->(.*)$/s ) {
+ $tmp = $1;
+ }
+ if ( $tmp =~ m/<!--/s ) {
+ $_shiftline_in_comment = 1;
+ }
+ }
+ }
+
+ return ( $line, $ref );
+}
+
+sub read {
+ my ( $self, $filename, $refname, $charset ) = @_;
+ croak wrap_mod( "po4a::xml", dgettext( "po4a", "Cannot have more than one input charset in XML files (%s and %s)" ),
+ $input_charset, $charset )
+ if ( defined $input_charset && $input_charset ne $charset );
+ $input_charset = $charset;
+ push @{ $self->{DOCPOD}{infile} }, $filename;
+ $self->Locale::Po4a::TransTractor::read( $filename, $refname, $charset );
+}
+
+sub parse {
+ my $self = shift;
+ map { $self->{'current_file'} = $_; $self->parse_file($_) } @{ $self->{DOCPOD}{infile} };
+}
+
+# @save_holders is a stack of references to ('paragraph', 'translation',
+# 'sub_translations', 'open', 'close', 'folded_attributes') hashes, where:
+# paragraph is a reference to an array (see paragraph in the
+# treat_content() subroutine) of strings followed by
+# references. It contains the @paragraph array as it was
+# before the processing was interrupted by a tag introducing
+# a placeholder.
+# translation is the translation of this level up to now
+# sub_translations is a reference to an array of strings containing the
+# translations which must replace the placeholders.
+# open is the tag which opened the placeholder.
+# close is the tag which closed the placeholder.
+# folded_attributes is a hash of tags with their attributes (<tag attrs=...>
+# strings), referenced by the folded tag id, which should
+# replace the <tag po4a-id=id> strings in the current
+# translation.
+#
+# If @save_holders only has 1 holder, then we are not processing the
+# content of a holder, we are translating the document.
+my @save_holders;
+
+# If we are at the bottom of the stack and there is no <placeholder ...> in
+# the current translation, we can push the translation in the translated
+# document.
+# Otherwise, we keep the translation in the current holder.
+
+# This pushline function outputs a next translated text string.
+# For XML, it overloads the Transtractor pushline to handle:
+# - placeholder tag replacement with translated text with tags
+#
+# This twist causes the string pushed into @{$self->{TT}{doc_out}} to have
+# multi-line contents with internal "\n" for placeholder tags.
+sub pushline {
+ my ( $self, $line ) = ( shift, shift );
+
+ my $holder = $save_holders[$#save_holders];
+ my $translation = $holder->{'translation'};
+ $translation .= $line;
+
+ while ( %{ $holder->{folded_attributes} }
+ and $translation =~ m/^(.*)<([^>]+?)\s+po4a-id=([0-9]+)>(.*)$/s )
+ {
+ my $begin = $1;
+ my $tag = $2;
+ my $id = $3;
+ my $end = $4;
+ if ( defined $holder->{folded_attributes}->{$id} ) {
+
+ # TODO: check if the tag is the same
+ $translation = $begin . $holder->{folded_attributes}->{$id} . $end;
+ delete $holder->{folded_attributes}->{$id};
+ } else {
+
+ # TODO: It will be hard to identify the location.
+ # => find a way to retrieve the reference.
+ die wrap_mod(
+ "po4a::xml::pushline",
+ dgettext(
+ "po4a",
+ "'po4a-id=%d' in the translation does not exist in the original string (or 'po4a-id=%d' used twice in the translation)."
+ ),
+ $id, $id
+ );
+ }
+ }
+
+ # TODO: check that %folded_attributes is empty at some time
+ # => in translate_paragraph?
+
+ if ( ( $#save_holders > 0 )
+ or ( $translation =~ m/<placeholder\s/s ) )
+ {
+ $holder->{'translation'} = $translation;
+ } else {
+ $self->SUPER::pushline($translation);
+ $holder->{'translation'} = '';
+ }
+}
+
+=head1 TRANSLATING WITH PO4A::XML
+
+This module can be used directly to handle generic XML documents. This will
+extract all tag's content, and no attributes, since it's where the text is
+written in most XML based documents.
+
+There are some options (described in the next section) that can customize
+this behavior. If this doesn't fit to your document format you're encouraged
+to write your own module derived from this, to describe your format's details.
+See the section B<WRITING DERIVATE MODULES> below, for the process description.
+
+=cut
+
+#
+# Parse file and translate it
+#
+sub parse_file {
+ my ( $self, $filename ) = @_;
+ my $eof = 0;
+
+ print wrap_mod( "po4a::xml::parse_file", dgettext( "po4a", ">>> filename = '%s'" ), $filename )
+ if $self->{options}{'debug'};
+ while ( !$eof ) {
+
+ # We get all the text until the next breaking tag (not
+ # inline) and translate it
+ $eof = $self->treat_content;
+ if ( !$eof ) {
+
+ # And then we treat the following breaking tag
+ $eof = $self->treat_tag;
+ }
+ }
+}
+
+=head1 OPTIONS ACCEPTED BY THIS MODULE
+
+The global debug option causes this module to show the excluded strings, in
+order to see if it skips something important.
+
+These are this module's particular options:
+
+=over 4
+
+=item B<nostrip>
+
+Prevents it to strip the spaces around the extracted strings.
+
+=item B<wrap>
+
+Canonicalizes the string to translate, considering that whitespaces are not
+important, and wraps the translated document. This option can be overridden
+by custom tag options. See the B<translated> option below.
+
+=item B<unwrap_attributes>
+
+Attributes are wrapped by default. This option disables wrapping.
+
+=item B<caseinsensitive>
+
+It makes the tags and attributes searching to work in a case insensitive
+way. If it's defined, it will treat E<lt>BooKE<gt>laNG and E<lt>BOOKE<gt>Lang as E<lt>bookE<gt>lang.
+
+=item B<escapequotes>
+
+Escape quotes in output strings. Necessary, for example, for creating
+string resources for use by Android build tools.
+
+See also: https://developer.android.com/guide/topics/resources/string-resource.html
+
+=item B<includeexternal>
+
+When defined, external entities are included in the generated (translated)
+document, and for the extraction of strings. If it's not defined, you
+will have to translate external entities separately as independent
+documents.
+
+=item B<ontagerror>
+
+This option defines the behavior of the module when it encounters invalid
+XML syntax (a closing tag which does not match the last opening tag).
+It can take the following values:
+
+=over
+
+=item I<fail>
+
+This is the default value.
+The module will exit with an error.
+
+=item I<warn>
+
+The module will continue, and will issue a warning.
+
+=item I<silent>
+
+The module will continue without any warnings.
+
+=back
+
+Be careful when using this option.
+It is generally recommended to fix the input file.
+
+=item B<tagsonly>
+
+Note: This option is deprecated.
+
+Extracts only the specified tags in the B<tags> option. Otherwise, it
+will extract all the tags except the ones specified.
+
+=item B<doctype>
+
+String that will try to match with the first line of the document's doctype
+(if defined). If it doesn't, a warning will indicate that the document
+might be of a bad type.
+
+=item B<addlang>
+
+String indicating the path (e.g. E<lt>bbbE<gt>E<lt>aaaE<gt>) of a tag
+where a lang="..." attribute shall be added. The language will be defined
+as the basename of the PO file without any .po extension.
+
+=item B<optionalclosingtag>
+
+Boolean indicating whether closing tags are optional (as in HTML). By default,
+missing closing tags raise an error handled according to B<ontagerror>.
+
+=item B<tags>
+
+Note: This option is deprecated.
+You should use the B<translated> and B<untranslated> options instead.
+
+Space-separated list of tags you want to translate or skip. By default,
+the specified tags will be excluded, but if you use the "tagsonly" option,
+the specified tags will be the only ones included. The tags must be in the
+form E<lt>aaaE<gt>, but you can join some (E<lt>bbbE<gt>E<lt>aaaE<gt>) to say that the content of
+the tag E<lt>aaaE<gt> will only be translated when it's into a E<lt>bbbE<gt> tag.
+
+You can also specify some tag options by putting some characters in front of
+the tag hierarchy. For example, you can put I<w> (wrap) or I<W> (don't wrap)
+to override the default behavior specified by the global B<wrap> option.
+
+Example: WE<lt>chapterE<gt>E<lt>titleE<gt>
+
+=item B<attributes>
+
+Space-separated list of tag's attributes you want to translate. You can
+specify the attributes by their name (for example, "lang"), but you can
+prefix it with a tag hierarchy, to specify that this attribute will only be
+translated when it's in the specified tag. For example: E<lt>bbbE<gt>E<lt>aaaE<gt>lang
+specifies that the lang attribute will only be translated if it's in an
+E<lt>aaaE<gt> tag, and it's in a E<lt>bbbE<gt> tag.
+
+=item B<foldattributes>
+
+Do not translate attributes in inline tags.
+Instead, replace all attributes of a tag by po4a-id=<id>.
+
+This is useful when attributes shall not be translated, as this simplifies the
+strings for translators, and avoids typos.
+
+=item B<customtag>
+
+Space-separated list of tags which should not be treated as tags.
+These tags are treated as inline, and do not need to be closed.
+
+=item B<break>
+
+Space-separated list of tags which should break the sequence.
+By default, all tags break the sequence.
+
+The tags must be in the form <aaa>, but you can join some
+(<bbb><aaa>), if a tag (<aaa>) should only be considered
+when it's within another tag (<bbb>).
+
+Please note a tag should be listed in only one of the B<break>, B<inline>
+B<placeholder>, or B<customtag> setting string.
+
+=item B<inline>
+
+Space-separated list of tags which should be treated as inline.
+By default, all tags break the sequence.
+
+The tags must be in the form <aaa>, but you can join some
+(<bbb><aaa>), if a tag (<aaa>) should only be considered
+when it's within another tag (<bbb>).
+
+=item B<placeholder>
+
+Space-separated list of tags which should be treated as placeholders.
+Placeholders do not break the sequence, but the content of placeholders is
+translated separately.
+
+The location of the placeholder in its block will be marked with a string
+similar to:
+
+ <placeholder type=\"footnote\" id=\"0\"/>
+
+The tags must be in the form <aaa>, but you can join some
+(<bbb><aaa>), if a tag (<aaa>) should only be considered
+when it's within another tag (<bbb>).
+
+=item B<break-pi>
+
+By default, Processing Instructions (i.e., C<E<lt>? ... ?E<gt>> tags) are handled as inline tags.
+Pass this option if you want the PI to be handled as breaking tag.
+Note that unprocessed PHP tags are handled as Processing Instructions by the parser.
+
+=item B<nodefault>
+
+Space separated list of tags that the module should not try to set by
+default in any category.
+
+If you have a tag which has its default setting by the subclass of this module
+but you want to set alternative setting, you need to list that tag as a part of
+the B<nodefault> setting string.
+
+=item B<cpp>
+
+Support C preprocessor directives.
+When this option is set, po4a will consider preprocessor directives as
+paragraph separators.
+This is important if the XML file must be preprocessed because otherwise
+the directives may be inserted in the middle of lines if po4a consider it
+belong to the current paragraph, and they won't be recognized by the
+preprocessor.
+Note: the preprocessor directives must only appear between tags
+(they must not break a tag).
+
+=item B<translated>
+
+Space-separated list of tags you want to translate.
+
+The tags must be in the form <aaa>, but you can join some
+(<bbb><aaa>), if a tag (<aaa>) should only be considered
+when it's within another tag (<bbb>).
+
+You can also specify some tag options by putting some characters in front of
+the tag hierarchy. This overrides the default behavior specified by the global
+B<wrap> and B<defaulttranslateoption> option.
+
+=over
+
+=item I<w>
+
+Tags should be translated and content can be re-wrapped.
+
+=item I<W>
+
+Tags should be translated and content should not be re-wrapped.
+
+=item I<i>
+
+Tags should be translated inline.
+
+=item I<p>
+
+Tags should be translated as placeholders.
+
+=back
+
+Internally, the XML parser only cares about these four options: I<w> I<W> I<i> I<p>.
+
+* Tags listed in B<break> are set to I<w> or I<W> depending on the B<wrap> option.
+
+* Tags listed in B<inline> are set to I<i>.
+
+* Tags listed in B<placeholder> are set to I<p>.
+
+* Tags listed in B<untranslated> are without any of these options set.
+
+You can verify actual internal parameter behavior by invoking B<po4a> with
+B<--debug> option.
+
+Example: WE<lt>chapterE<gt>E<lt>titleE<gt>
+
+Please note a tag should be listed in either B<translated> or B<untranslated>
+setting string.
+
+=item B<untranslated>
+
+Space-separated list of tags you do not want to translate.
+
+The tags must be in the form <aaa>, but you can join some
+(<bbb><aaa>), if a tag (<aaa>) should only be considered
+when it's within another tag (<bbb>).
+
+Please note a translatable inline tag in an untranslated tag is treated as a
+translatable breaking tag, I<i> setting is dropped and I<w> or I<W> is set
+depending on the B<wrap> option.
+
+=item B<defaulttranslateoption>
+
+The default categories for tags that are not in any of the translated,
+untranslated, break, inline, or placeholder.
+
+This is a set of letters as defined in B<translated> and this setting is only
+valid for translatable tags.
+
+=back
+
+=cut
+
+sub initialize {
+ my $self = shift;
+ my %options = @_;
+
+ # Reset the path
+ @path = ();
+
+ # Initialize the stack of holders
+ my @paragraph = ();
+ my @sub_translations = ();
+ my %folded_attributes;
+ my %holder = (
+ 'paragraph' => \@paragraph,
+ 'translation' => "",
+ 'sub_translations' => \@sub_translations,
+ 'folded_attributes' => \%folded_attributes
+ );
+ @save_holders = ( \%holder );
+
+ $self->{options}{'addlang'} = 0;
+ $self->{options}{'nostrip'} = 0;
+ $self->{options}{'wrap'} = 0;
+ $self->{options}{'unwrap_attributes'} = 0;
+ $self->{options}{'caseinsensitive'} = 0;
+ $self->{options}{'escapequotes'} = 0;
+ $self->{options}{'tagsonly'} = 0;
+ $self->{options}{'tags'} = '';
+ $self->{options}{'break'} = '';
+ $self->{options}{'translated'} = '';
+ $self->{options}{'untranslated'} = '';
+ $self->{options}{'defaulttranslateoption'} = '';
+ $self->{options}{'attributes'} = '';
+ $self->{options}{'foldattributes'} = 0;
+ $self->{options}{'inline'} = '';
+ $self->{options}{'break-pi'} = 0;
+ $self->{options}{'placeholder'} = '';
+ $self->{options}{'customtag'} = '';
+ $self->{options}{'doctype'} = '';
+ $self->{options}{'nodefault'} = '';
+ $self->{options}{'includeexternal'} = 0;
+ $self->{options}{'ontagerror'} = "fail";
+ $self->{options}{'cpp'} = 0;
+
+ $self->{options}{'verbose'} = '';
+ $self->{options}{'debug'} = '';
+
+ foreach my $opt ( keys %options ) {
+ if ( $options{$opt} ) {
+ die wrap_mod( "po4a::xml::initialize", dgettext( "po4a", "Unknown option: %s" ), $opt )
+ unless exists $self->{options}{$opt};
+ $self->{options}{$opt} = $options{$opt};
+ }
+ }
+
+ # Default options set by modules. Forbidden for users.
+ $self->{options}{'_default_translated'} = '';
+ $self->{options}{'_default_untranslated'} = '';
+ $self->{options}{'_default_break'} = '';
+ $self->{options}{'_default_inline'} = '';
+ $self->{options}{'_default_placeholder'} = '';
+ $self->{options}{'_default_attributes'} = '';
+ $self->{options}{'_default_customtag'} = '';
+
+ # It will maintain the list of the translatable tags
+ $self->{tags} = ();
+ $self->{translated} = ();
+ $self->{untranslated} = ();
+
+ # It will maintain the list of the translatable attributes
+ $self->{attributes} = ();
+
+ # It will maintain the list of the breaking tags
+ $self->{break} = ();
+
+ # It will maintain the list of the inline tags
+ $self->{inline} = ();
+
+ # It will maintain the list of the placeholder tags
+ $self->{placeholder} = ();
+
+ # It will maintain the list of the customtag tags
+ $self->{customtag} = ();
+
+ # list of the tags that must not be set in the tags or inline category
+ # by this module or sub-module (unless specified in an option)
+ $self->{nodefault} = ();
+
+ print "Call treat_options\n" if $self->{options}{'debug'};
+ $self->treat_options;
+
+ # Clear cache
+ %translate_options_cache = ();
+}
+
+=head1 WRITING DERIVATIVE MODULES
+
+=head2 DEFINE WHAT TAGS AND ATTRIBUTES TO TRANSLATE
+
+The simplest customization is to define which tags and attributes you want
+the parser to translate. This should be done in the initialize function.
+First you should call the main initialize, to get the command-line options,
+and then, append your custom definitions to the options hash. If you want
+to treat some new options from command line, you should define them before
+calling the main initialize:
+
+ $self->{options}{'new_option'}='';
+ $self->SUPER::initialize(%options);
+ $self->{options}{'_default_translated'}.=' <p> <head><title>';
+ $self->{options}{'attributes'}.=' <p>lang id';
+ $self->{options}{'_default_inline'}.=' <br>';
+ $self->treat_options;
+
+You should use the B<_default_inline>, B<_default_break>,
+B<_default_placeholder>, B<_default_translated>, B<_default_untranslated>,
+and B<_default_attributes> options in derivative modules. This allow users
+to override the default behavior defined in your module with command line
+options.
+
+=head2 OVERRIDE THE DEFAULT BEHAVIOR WITH COMMAND LINE OPTIONS
+
+If you don't like the default behavior of this xml module and its derivative
+modules, you can provide command line options to change their behavior.
+
+See L<Locale::Po4a::Docbook(3pm)|Locale::Po4a::Docbook>,
+
+
+=head2 OVERRIDING THE found_string FUNCTION
+
+Another simple step is to override the function "found_string", which
+receives the extracted strings from the parser, in order to translate them.
+There you can control which strings you want to translate, and perform
+transformations to them before or after the translation itself.
+
+It receives the extracted text, the reference on where it was, and a hash
+that contains extra information to control what strings to translate, how
+to translate them and to generate the comment.
+
+The content of these options depends on the kind of string it is (specified in an
+entry of this hash):
+
+=over
+
+=item type="tag"
+
+The found string is the content of a translatable tag. The entry "tag_options"
+contains the option characters in front of the tag hierarchy in the module
+"tags" option.
+
+=item type="attribute"
+
+Means that the found string is the value of a translatable attribute. The
+entry "attribute" has the name of the attribute.
+
+=back
+
+It must return the text that will replace the original in the translated
+document. Here's a basic example of this function:
+
+ sub found_string {
+ my ($self,$text,$ref,$options)=@_;
+ $text = $self->translate($text,$ref,"type ".$options->{'type'},
+ 'wrap'=>$self->{options}{'wrap'});
+ return $text;
+ }
+
+There's another simple example in the new Dia module, which only filters
+some strings.
+
+=cut
+
+sub found_string {
+ my ( $self, $text, $ref, $options ) = @_;
+
+ if ( $text =~ m/^\s*$/s ) {
+ return $text;
+ }
+
+ my $comment;
+ my $wrap = $self->{options}{'wrap'};
+
+ if ( $options->{'type'} eq "tag" ) {
+ $comment = "Content of: " . $self->get_path;
+
+ if ( $options->{'tag_options'} =~ /w/ ) {
+ $wrap = 1;
+ }
+ if ( $options->{'tag_options'} =~ /W/ ) {
+ $wrap = 0;
+ }
+ } elsif ( $options->{'type'} eq "attribute" ) {
+ $comment = "Attribute '" . $options->{'attribute'} . "' of: " . $self->get_path;
+ if ( $self->{options}{'unwrap_attributes'} == 0 ) {
+ $wrap = 1;
+ } else {
+ $wrap = 0;
+ }
+ } elsif ( $options->{'type'} eq "CDATA" ) {
+ $comment = "CDATA";
+ $wrap = 0;
+ } else {
+ die wrap_ref_mod( $ref, "po4a::xml", dgettext( "po4a", "Internal error: unknown type identifier '%s'." ),
+ $options->{'type'} );
+ }
+ $text = $self->translate( $text, $ref, $comment, 'wrap' => $wrap, comment => $options->{'comments'} );
+ if ( $self->{options}{'escapequotes'} ) {
+ $text =~ s/'/\\'/g;
+ $text =~ s/"/\\"/g;
+ }
+ return $text;
+}
+
+=head2 MODIFYING TAG TYPES (TODO)
+
+This is a more complex one, but it enables a (almost) total customization.
+It's based on a list of hashes, each one defining a tag type's behavior. The
+list should be sorted so that the most general tags are after the most
+concrete ones (sorted first by the beginning and then by the end keys). To
+define a tag type you'll have to make a hash with the following keys:
+
+=over 4
+
+=item B<beginning>
+
+Specifies the beginning of the tag, after the "E<lt>".
+
+=item B<end>
+
+Specifies the end of the tag, before the "E<gt>".
+
+=item B<breaking>
+
+It says if this is a breaking tag class. A non-breaking (inline) tag is one
+that can be taken as part of the content of another tag. It can take the
+values false (0), true (1) or undefined. If you leave this undefined, you'll
+have to define the f_breaking function that will say whether a concrete tag of
+this class is a breaking tag or not.
+
+=item B<f_breaking>
+
+It's a function that will tell if the next tag is a breaking one or not. It
+should be defined if the B<breaking> option is not.
+
+=item B<f_extract>
+
+If you leave this key undefined, the generic extraction function will have to
+extract the tag itself. It's useful for tags that can have other tags or
+special structures in them, so that the main parser doesn't get mad. This
+function receives a boolean that says if the tag should be removed from the
+input stream or not.
+
+=item B<f_translate>
+
+This function receives the tag (in the get_string_until() format) and returns
+the translated tag (translated attributes or all needed transformations) as a
+single string.
+
+=back
+
+=cut
+
+##### Generic XML tag types #####'
+
+our @tag_types = (
+ {
+ beginning => "!--#",
+ end => "--",
+ breaking => 0,
+ f_extract => \&tag_extract_comment,
+ f_translate => \&tag_trans_comment
+ },
+ {
+ beginning => "!--",
+ end => "--",
+ breaking => 0,
+ f_extract => \&tag_extract_comment,
+ f_translate => \&tag_trans_comment
+ },
+ {
+ beginning => "?xml",
+ end => "?",
+ breaking => 1,
+ f_translate => \&tag_trans_xmlhead
+ },
+ {
+ beginning => "?",
+ end => "?",
+ breaking => 0, # Can be changed with option break-pi
+ f_translate => \&tag_trans_procins
+ },
+ {
+ beginning => "!DOCTYPE",
+ end => "",
+ breaking => 1,
+ f_extract => \&tag_extract_doctype,
+ f_translate => \&tag_trans_doctype
+ },
+ {
+ beginning => "![CDATA[",
+ end => "]]",
+ breaking => 1,
+ f_extract => \&CDATA_extract,
+ f_translate => \&CDATA_trans
+ },
+ {
+ beginning => "/",
+ end => "",
+ f_breaking => \&tag_break_close,
+ f_translate => \&tag_trans_close
+ },
+ {
+ beginning => "",
+ end => "/",
+ f_breaking => \&tag_break_alone,
+ f_translate => \&tag_trans_alone
+ },
+ {
+ beginning => "",
+ end => "",
+ f_breaking => \&tag_break_open,
+ f_translate => \&tag_trans_open
+ }
+);
+
+sub tag_extract_comment {
+ my ( $self, $remove ) = ( shift, shift );
+ my ( $eof, @tag ) = $self->get_string_until( '-->', { include => 1, remove => $remove } );
+ return ( $eof, @tag );
+}
+
+sub tag_trans_comment {
+ my ( $self, @tag ) = @_;
+ return $self->join_lines(@tag);
+}
+
+sub tag_trans_xmlhead {
+ my ( $self, @tag ) = @_;
+
+ # We don't have to translate anything from here: throw away references
+ my $tag = $self->join_lines(@tag);
+ $tag =~ /encoding=(("|')|)(.*?)(\s|\2)/s;
+ my $in_charset = $3;
+ my $out_charset = $self->get_out_charset;
+
+ if ( defined $in_charset ) {
+ croak wrap_mod(
+ "po4a::xml",
+ dgettext(
+ "po4a",
+ "The file %s declares %s as encoding, but you provided %s as master charset. Please change either setting."
+ ),
+ $self->{'current_file'},
+ $in_charset,
+ $input_charset
+ ) if ( length( $input_charset // '' ) > 0 && uc($input_charset) ne uc($in_charset) );
+
+ $tag =~ s/$in_charset/$out_charset/;
+ } else {
+ if ( $tag =~ m/standalone/ ) {
+ $tag =~ s/(standalone)/encoding="$out_charset" $1/;
+ } else {
+ $tag .= " encoding=\"$out_charset\"";
+ }
+ }
+
+ return $tag;
+}
+
+sub tag_trans_procins {
+ my ( $self, @tag ) = @_;
+ return $self->join_lines(@tag);
+}
+
+sub tag_extract_doctype {
+ my ( $self, $remove ) = ( shift, shift );
+
+ # Check if there is an internal subset (between []).
+ my ( $eof, @tag ) = $self->get_string_until( '>', { include => 1, unquoted => 1 } );
+ my $parity = 0;
+ my $paragraph = "";
+ map { $parity = 1 - $parity; $paragraph .= $parity ? $_ : ""; } @tag;
+ my $found = 0;
+ if ( $paragraph =~ m/<.*\[.*</s ) {
+ $found = 1;
+ }
+
+ if ( not $found ) {
+ ( $eof, @tag ) = $self->get_string_until( '>', { include => 1, remove => $remove, unquoted => 1 } );
+ } else {
+ ( $eof, @tag ) =
+ $self->get_string_until( ']\s*>', { include => 1, remove => $remove, unquoted => 1, regex => 1 } );
+ }
+ return ( $eof, @tag );
+}
+
+sub tag_trans_doctype {
+
+ # This check is not really reliable. There are system and public
+ # identifiers. Only the public one could be checked reliably.
+ my ( $self, @tag ) = @_;
+ if ( defined $self->{options}{'doctype'} ) {
+ my $doctype = $self->{options}{'doctype'};
+ if ( $tag[0] !~ /\Q$doctype\E/i ) {
+ warn wrap_ref_mod(
+ $tag[1],
+ "po4a::xml",
+ dgettext(
+ "po4a",
+ "Bad document type. '%s' expected. You can fix this warning with a -o doctype option, or ignore this check with -o doctype=\"\"."
+ ),
+ $doctype
+ );
+ }
+ }
+ my $i = 0;
+ my $basedir = $tag[1];
+ $basedir =~ s/:[0-9]+$//;
+ $basedir = dirname($basedir);
+
+ while ( $i < $#tag ) {
+ my $t = $tag[$i];
+ my $ref = $tag[ $i + 1 ];
+ if ( $t =~ /^(\s*<!ENTITY\s+)(.*)$/is ) {
+ my $part1 = $1;
+ my $part2 = $2;
+ my $includenow = 0;
+ my $file = 0;
+ my $name = "";
+ if ( $part2 =~ /^(%\s+)(.*)$/s ) {
+ $part1 .= $1;
+ $part2 = $2;
+ $includenow = 1;
+ }
+ $part2 =~ /^(\S+)(\s+)(.*)$/s;
+ $name = $1;
+ $part1 .= $1 . $2;
+ $part2 = $3;
+ if ( $part2 =~ /^(SYSTEM\s+)(.*)$/is ) {
+ $part1 .= $1;
+ $part2 = $2;
+ $file = 1;
+ if ( $self->{options}{'includeexternal'} ) {
+ $entities{$name} = $part2;
+ $entities{$name} =~ s/^"?(.*?)".*$/$1/s;
+ $entities{$name} = File::Spec->catfile( $basedir, $entities{$name} );
+ }
+ }
+ if ( ( not $file ) and ( not $includenow ) ) {
+ if ( $part2 =~ m/^\s*(["'])(.*)\1(\s*>.*)$/s ) {
+ my $comment = "Content of the $name entity";
+ my $quote = $1;
+ my $text = $2;
+ $part2 = $3;
+ $text = $self->translate( $text, $ref, $comment, 'wrap' => 1 );
+ $t = $part1 . "$quote$text$quote$part2";
+ }
+ }
+
+ # print $part1."\n";
+ # print $name."\n";
+ # print $part2."\n";
+ }
+ $tag[$i] = $t;
+ $i += 2;
+ }
+ return $self->join_lines(@tag);
+}
+
+sub tag_break_close {
+ my ( $self, @tag ) = @_;
+ my $struct = $self->get_path;
+ my $options = $self->get_translate_options($struct);
+ if ( $options =~ m/[ip]/ ) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+sub tag_trans_close {
+ my ( $self, @tag ) = @_;
+ my $name = $self->get_tag_name(@tag);
+
+ my $test = pop @path;
+ if ( !defined($test) || $test ne $name ) {
+
+ # Check whether it's simply a missing closing tag that I could survive
+ if ( ( $self->{options}{'optionalclosingtag'} // 0 ) == 1 ) {
+ my $found = 0;
+ map { $found = 1 if $_ eq $name } @path;
+ if ($found)
+ { # The opening tag corresponding to the one closed now exists in the path. Pop everything in between
+ while ( $test ne $name ) {
+ $test = pop @path;
+ }
+ return $self->join_lines(@tag);
+ }
+ }
+
+ my $ontagerror = $self->{options}{'ontagerror'};
+ if ( $ontagerror eq "warn" ) {
+ warn wrap_ref_mod(
+ $tag[1],
+ "po4a::xml",
+ dgettext( "po4a", "Unexpected closing tag </%s> found. The main document may be wrong. Continuing…" ),
+ $name
+ );
+ } elsif ( $ontagerror ne "silent" ) {
+ die wrap_ref_mod( $tag[1], "po4a::xml",
+ dgettext( "po4a", "Unexpected closing tag </%s> found. The main document may be wrong." ), $name );
+ }
+ }
+ return $self->join_lines(@tag);
+}
+
+sub CDATA_extract {
+ my ( $self, $remove ) = ( shift, shift );
+ my ( $eof, @tag ) = $self->get_string_until( ']]>', { include => 1, unquoted => 0, remove => $remove } );
+
+ return ( $eof, @tag );
+}
+
+sub CDATA_trans {
+ my ( $self, @tag ) = @_;
+ return $self->found_string( $self->join_lines(@tag), $tag[1], { 'type' => "CDATA" } );
+}
+
+sub tag_break_alone {
+ my ( $self, @tag ) = @_;
+ my $struct = $self->get_path( $self->get_tag_name(@tag) );
+ if ( $self->get_translate_options($struct) =~ m/[ip]/ ) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+sub tag_trans_alone {
+ my ( $self, @tag ) = @_;
+ my $name = $self->get_tag_name(@tag);
+ push @path, $name;
+
+ $name = $self->treat_attributes(@tag);
+
+ pop @path;
+ return $name;
+}
+
+sub tag_break_open {
+ my ( $self, @tag ) = @_;
+ my $struct = $self->get_path( $self->get_tag_name(@tag) );
+ my $options = $self->get_translate_options($struct);
+ if ( $options =~ m/[ip]/ ) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+sub tag_trans_open {
+ my ( $self, @tag ) = @_;
+ my $name = $self->get_tag_name(@tag);
+ push @path, $name;
+
+ $name = $self->treat_attributes(@tag);
+
+ if ( defined $self->{options}{'addlang'} ) {
+ my $struct = $self->get_path();
+ if ( $struct eq $self->{options}{'addlang'} ) {
+ $name .= ' lang="' . $self->{TT}{po_in}->{lang} . '"';
+ }
+ }
+
+ return $name;
+}
+
+##### END of Generic XML tag types #####
+
+=head1 INTERNAL FUNCTIONS used to write derivative parsers
+
+=head2 WORKING WITH TAGS
+
+=over 4
+
+=item get_path()
+
+This function returns the path to the current tag from the document's root,
+in the form E<lt>htmlE<gt>E<lt>bodyE<gt>E<lt>pE<gt>.
+
+An additional array of tags (without brackets) can be passed as argument.
+These path elements are added to the end of the current path.
+
+=cut
+
+sub get_path {
+ my $self = shift;
+ my @add = @_;
+ if ( @path > 0 or @add > 0 ) {
+ return "<" . join( "><", @path, @add ) . ">";
+ } else {
+ return "outside any tag (error?)";
+ }
+}
+
+=item tag_type()
+
+This function returns the index from the tag_types list that fits to the next
+tag in the input stream, or -1 if it's at the end of the input file.
+
+Here, the tag has structure started by E<lt> and end by E<gt> and it can
+contain multiple lines.
+
+This works on the array C<< @{$self->{TT}{doc_in}} >> holding input document
+data and reference indirectly via C<< $self->shiftline() >> and C<<
+$self->unshiftline($$) >>.
+
+=cut
+
+sub tag_type {
+ my $self = shift;
+ my ( $line, $ref ) = $self->shiftline();
+ my ( $match1, $match2 );
+ my $found = 0;
+ my $i = 0;
+
+ if ( !defined($line) ) { return -1; }
+
+ $self->unshiftline( $line, $ref );
+ my ( $eof, @lines ) = $self->get_string_until( ">", { include => 1, unquoted => 1 } );
+ my $line2 = $self->join_lines(@lines);
+ while ( !$found && $i < @tag_types ) {
+ ( $match1, $match2 ) = ( $tag_types[$i]->{beginning}, $tag_types[$i]->{end} );
+ if ( $line =~ /^<\Q$match1\E/ ) {
+ if ( !defined( $tag_types[$i]->{f_extract} ) ) {
+
+ #print substr($line2,length($line2)-1-length($match2),1+length($match2))."\n";
+ if ( defined($line2) and $line2 =~ /\Q$match2\E>$/ ) {
+ $found = 1;
+
+ #print "YES: <".$match1." ".$match2.">\n";
+ } else {
+
+ #print "NO: <".$match1." ".$match2.">\n";
+ $i++;
+ }
+ } else {
+ $found = 1;
+ }
+ } else {
+ $i++;
+ }
+ }
+ if ( !$found ) {
+
+ #It should never enter here, unless you undefine the most
+ #general tags (as <...>)
+ chomp $line;
+ die $ref . ": Unknown tag type: " . $line . "\n";
+ } else {
+ return $i;
+ }
+}
+
+=item extract_tag($$)
+
+This function returns the next tag from the input stream without the beginning
+and end, in an array form, to maintain the references from the input file. It
+has two parameters: the type of the tag (as returned by tag_type) and a
+boolean, that indicates if it should be removed from the input stream.
+
+This works on the array C<< @{$self->{TT}{doc_in}} >> holding input document
+data and reference indirectly via C<< $self->shiftline() >> and C<<
+$self->unshiftline($$) >>.
+
+=cut
+
+sub extract_tag {
+ my ( $self, $type, $remove ) = ( shift, shift, shift );
+ my ( $match1, $match2 ) = ( $tag_types[$type]->{beginning}, $tag_types[$type]->{end} );
+ my ( $eof, @tag );
+ if ( defined( $tag_types[$type]->{f_extract} ) ) {
+
+ # <!--# ... -->, <!-- ... -->, <!DOCTYPE ... >, or <![CDATA[ ... ]]>
+ ( $eof, @tag ) = &{ $tag_types[$type]->{f_extract} }( $self, $remove );
+ } else {
+
+ # <?xml ?>, <? ... ?>, </ tag>, <tag />, or <tag >.
+ ( $eof, @tag ) = $self->get_string_until( $match2 . ">", { include => 1, remove => $remove, unquoted => 1 } );
+ }
+
+ # Please note even index of array @tag holds actual text of input line
+ # Please note odd index of array @tag holds its reference = $filename:$flinenum
+ $tag[0] =~ /^<\Q$match1\E(.*)$/s;
+ $tag[0] = $1;
+ $tag[ $#tag - 1 ] =~ /^(.*)\Q$match2\E>$/s;
+ $tag[ $#tag - 1 ] = $1;
+
+ # Please note even index of array @tag holds tag string
+ return ( $eof, @tag );
+}
+
+=item get_tag_name(@)
+
+This function returns the name of the tag passed as an argument, in the array
+form returned by extract_tag.
+
+=cut
+
+sub get_tag_name {
+ my ( $self, @tag ) = @_;
+ $tag[0] =~ /^(\S*)/;
+ return $1;
+}
+
+=item breaking_tag()
+
+This function returns a boolean that says if the next tag in the input stream
+is a breaking tag or not (inline tag). It leaves the input stream intact.
+
+=cut
+
+sub breaking_tag {
+ my $self = shift;
+ my $break;
+
+ my $type = $self->tag_type;
+ if ( $type == -1 ) { return 0; }
+
+ $break = $tag_types[$type]->{breaking};
+ if ( !defined($break) ) {
+
+ # This tag's breaking depends on its content
+ my ( $eof, @lines ) = $self->extract_tag( $type, 0 );
+ $break = &{ $tag_types[$type]->{f_breaking} }( $self, @lines );
+ }
+
+ # print "TAG TYPE = ".$type." (<".$tag_types[$type]->{beginning}.") break:$break\n";
+ return $break;
+}
+
+=item treat_tag()
+
+This function translates the next tag from the input stream. Using each
+tag type's custom translation functions.
+
+This works on the array C<< @{$self->{TT}{doc_in}} >> holding input document
+data and reference indirectly via C<< $self->shiftline() >> and C<<
+$self->unshiftline($$) >>.
+
+=cut
+
+sub treat_tag {
+ my $self = shift;
+ my $type = $self->tag_type;
+
+ my ( $match1, $match2 ) = ( $tag_types[$type]->{beginning}, $tag_types[$type]->{end} );
+ my ( $eof, @lines ) = $self->extract_tag( $type, 1 );
+
+ # Please note even index of array @lines holds actual text of input line
+ # Please note odd index of array @lines holds its reference = $filename:$flinenum
+
+ $lines[0] =~ /^(\s*)(.*)$/s;
+ my $space1 = $1;
+ $lines[0] = $2;
+ $lines[ $#lines - 1 ] =~ /^(.*?)(\s*)$/s;
+ my $space2 = $2;
+ $lines[ $#lines - 1 ] = $1;
+
+ # Calling this tag type's specific handling (translation of attributes...)
+ my $line = &{ $tag_types[$type]->{f_translate} }( $self, @lines );
+ print wrap_mod( "po4a::xml::treat_tag", "%s: type=%s <%s%s%s%s%s>",
+ $lines[1], $type, $match1, $space1, $line, $space2, $match2 )
+ if $self->{options}{'debug'};
+ $self->pushline( "<" . $match1 . $space1 . $line . $space2 . $match2 . ">" );
+ return $eof;
+}
+
+=item tag_in_list($@)
+
+This function returns a string value that says if the first argument (a tag
+hierarchy) matches any of the tags from the second argument (a list of tags
+or tag hierarchies). If it doesn't match, it returns 0. Else, it returns the
+matched tag's options (the characters in front of the tag) or 1 (if that tag
+doesn't have options).
+
+=back
+
+=cut
+
+sub tag_in_list ($$$) {
+ my ( $self, $path, $list ) = @_;
+ if ( $self->{options}{'caseinsensitive'} ) {
+ $path = lc $path;
+ }
+
+ while (1) {
+ if ( defined $list->{$path} ) {
+ if ( length $list->{$path} ) {
+ return $list->{$path};
+ } else {
+ return 1;
+ }
+ }
+ last unless ( $path =~ m/</ );
+ $path =~ s/^<.*?>//;
+ }
+
+ return 0;
+}
+
+=head2 WORKING WITH ATTRIBUTES
+
+=over 4
+
+=item treat_attributes(@)
+
+This function handles the translation of the tags' attributes. It receives the tag
+without the beginning / end marks, and then it finds the attributes, and it
+translates the translatable ones (specified by the module option B<attributes>).
+This returns a plain string with the translated tag.
+
+=back
+
+=cut
+
+sub treat_attributes {
+ my ( $self, @tag ) = @_;
+
+ $tag[0] =~ /^(\S*)(.*)/s;
+ my $text = $1;
+ $tag[0] = $2;
+
+ while (@tag) {
+ my $complete = 1;
+
+ $text .= $self->skip_spaces( \@tag );
+ if (@tag) {
+
+ # Get the attribute's name
+ $complete = 0;
+
+ $tag[0] =~ /^([^\s=]+)(.*)/s;
+ my $name = $1;
+ my $ref = $tag[1];
+ $tag[0] = $2;
+ $text .= $name;
+ $text .= $self->skip_spaces( \@tag );
+ if (@tag) {
+
+ # Get the '='
+ if ( $tag[0] =~ /^=(.*)/s ) {
+ $tag[0] = $1;
+ $text .= "=";
+ $text .= $self->skip_spaces( \@tag );
+ if (@tag) {
+
+ # Get the value
+ my $value = "";
+ $ref = $tag[1];
+ my $quot = substr( $tag[0], 0, 1 );
+ if ( $quot ne "\"" and $quot ne "'" ) {
+
+ # Unquoted value
+ $quot = "";
+ $tag[0] =~ /^(\S+)(.*)/s;
+ $value = $1;
+ $tag[0] = $2;
+ } else {
+
+ # Quoted value
+ $text .= $quot;
+ $tag[0] =~ /^\Q$quot\E(.*)/s;
+ $tag[0] = $1;
+ while ( $tag[0] !~ /\Q$quot\E/ ) {
+ $value .= $tag[0];
+ shift @tag;
+ shift @tag;
+ }
+ $tag[0] =~ /^(.*?)\Q$quot\E(.*)/s;
+ $value .= $1;
+ $tag[0] = $2;
+ }
+ $complete = 1;
+ if ( $self->tag_in_list( $self->get_path . $name, $self->{attributes} ) ) {
+ $text .= $self->found_string( $value, $ref, { type => "attribute", attribute => $name } );
+ } else {
+ print wrap_mod(
+ "po4a::xml::treat_attributes",
+ dgettext(
+ "po4a",
+ "%s: attribute '%s' is not defined in module option 'attributes' and\n"
+ . ".... is not translated for the attribute path '%s'"
+ ),
+ $ref, $value,
+ $self->get_path . $name
+ ) if $self->{options}{'debug'};
+ $text .= $value;
+ }
+ $text .= $quot;
+ }
+ } else { # This is an attribute with no '=' sign, nothing to translate
+ $complete = 1;
+ }
+ }
+
+ unless ($complete) {
+ my $ontagerror = $self->{options}{'ontagerror'};
+ if ( $ontagerror eq "warn" ) {
+ warn wrap_mod( "po4a::xml::treat_attributes",
+ dgettext( "po4a", "%s: Bad attribute syntax. Continuing…" ), $ref );
+ } elsif ( $ontagerror ne "silent" ) {
+ die wrap_mod( "po4a::xml::treat_attributes", dgettext( "po4a", "%s: Bad attribute syntax" ), $ref );
+ }
+ }
+ }
+ }
+ return $text;
+}
+
+# Returns an empty string if the content in the $path should not be
+# translated.
+#
+# Otherwise, returns the set of options for translation:
+# w: the content shall be re-wrapped
+# W: the content shall not be re-wrapped
+# i: the tag shall be inlined
+# p: a placeholder shall replace the tag (and its content)
+# n: a custom tag
+# f: fold attribute
+#
+# A translatable inline tag in an untranslated tag is treated as a translatable breaking tag.
+sub get_translate_options {
+ my $self = shift;
+ my $path = shift;
+
+ if ( defined $translate_options_cache{$path} ) {
+
+ # print "option($path)=".$translate_options_cache{$path}." (cached)\n";
+ return $translate_options_cache{$path};
+ }
+
+ my $options = "";
+ my $translate = 0;
+ my $usedefault = 1;
+
+ my $inlist = 0;
+ my $tag = $self->get_tag_from_list( $path, $self->{tags} );
+ if ( defined $tag ) {
+ $inlist = 1;
+ }
+
+ # Note: tags option is deprecated. --> $inlist should be 0 now
+
+ if ( $self->{options}{'tagsonly'} eq $inlist ) {
+
+ # Note: tags option is deprecated. --> $inlist should be 0 now
+ # Default is not to use tagsonly --> You are here.
+ $usedefault = 0;
+ if ( defined $tag ) {
+ $options = $tag;
+ $options =~ s/<.*$//;
+ } else {
+
+ # Note: tags option is deprecated. --> $tag is undefined
+ # $self->{options}{'wrap'} = 0 ... xml inherent default
+ # $self->{options}{'wrap'} = 1 ... docbook overridden default
+ # This sets all tags unlisted in translated nor untranslated to become translated tag normally
+ if ( $self->{options}{'wrap'} ) {
+ $options = "w";
+ } else {
+ $options = "W";
+ }
+ }
+ $translate = 1;
+ }
+
+ # TODO: a less precise set of tags should not override a more precise one
+ # The tags and tagsonly options are deprecated.
+ # The translated and untranslated options have a higher priority.
+ $tag = $self->get_tag_from_list( $path, $self->{translated} );
+ if ( defined $tag ) {
+ $usedefault = 0;
+ $options = $tag;
+ $options =~ s/<.*$//;
+ $translate = 1;
+ }
+
+ if ( $translate and $options !~ m/w/i ) {
+ $options .= ( $self->{options}{'wrap'} ) ? "w" : "W";
+ }
+
+ if ( not defined $tag ) {
+ $tag = $self->get_tag_from_list( $path, $self->{untranslated} );
+ if ( defined $tag ) {
+ $usedefault = 0;
+ $options = "";
+ $translate = 0;
+ }
+ }
+
+ $tag = $self->get_tag_from_list( $path, $self->{inline} );
+ if ( defined $tag ) {
+ $usedefault = 0;
+ $options .= "i";
+ } else {
+ $tag = $self->get_tag_from_list( $path, $self->{placeholder} );
+ if ( defined $tag ) {
+ $usedefault = 0;
+ $options .= "p";
+ }
+ }
+
+ $tag = $self->get_tag_from_list( $path, $self->{customtag} );
+ if ( defined $tag ) {
+ $usedefault = 0;
+ $options = "in"; # This erases any other setting
+ }
+
+ if ($usedefault) {
+ $options = $self->{options}{'defaulttranslateoption'};
+ }
+
+ # A translatable inline tag in an untranslated tag is treated as a
+ # translatable breaking tag.
+ if ( $options =~ m/i/ ) {
+ my $ppath = $path;
+ $ppath =~ s/<[^>]*>$//;
+ my $poptions = $self->get_translate_options($ppath);
+ if ( $poptions eq "" ) {
+ $options =~ s/i//;
+ print wrap_mod(
+ "po4a::xml::get_translate_options",
+ dgettext(
+ "po4a",
+ "%s: translation option='%s'.\n *** the original translation option is overridden here since parent path='%s' is untranslated,"
+ ),
+ $path, $options, $ppath
+ ) if $self->{options}{'debug'};
+ }
+ }
+
+ if ( $options =~ m/i/ and $self->{options}{'foldattributes'} ) {
+ $options .= "f";
+ }
+
+ if ( $options !~ m/i/ and $self->{options}{'foldattributes'} ) {
+ print wrap_mod(
+ "po4a::xml::get_translate_options",
+ dgettext( "po4a", "%s: foldattributes setting ignored since '%s' is not inline tag" ),
+ $path, $tag
+ ) if $self->{options}{'debug'};
+ }
+
+ $translate_options_cache{$path} = $options;
+
+ # print "option($path)=".$translate_options_cache{$path}." (new)\n";
+
+ #print wrap_mod("po4a::xml::get_translate_options", dgettext ("po4a", "%s: options: '%s'"), $path, $options) if $self->{options}{'debug'};
+ return $options;
+}
+
+# Return the tag (or biggest set of tags) of a list which matches with the
+# given path.
+#
+# The tag (or set of tags) is returned with its options.
+#
+# If no tags could match the path, undef is returned.
+sub get_tag_from_list ($$$) {
+ my ( $self, $path, $list ) = @_;
+ if ( $self->{options}{'caseinsensitive'} ) {
+ $path = lc $path;
+ }
+
+ while (1) {
+ if ( defined $list->{$path} ) {
+ return $list->{$path} . $path;
+ }
+ last unless ( $path =~ m/</ );
+ $path =~ s/^<.*?>//;
+ }
+
+ return undef;
+}
+
+=head2 WORKING WITH TAGGED CONTENTS
+
+=over 4
+
+
+=item treat_content()
+
+This function gets the text until the next breaking tag (not inline) from the
+input stream. Translate it using each tag type's custom translation functions.
+
+This works on the array C<< @{$self->{TT}{doc_in}} >> holding input document
+data and reference indirectly via C<< $self->shiftline() >> and C<<
+$self->unshiftline($$) >>.
+
+=back
+
+=cut
+
+sub treat_content {
+ my $self = shift;
+ my $blank = "";
+
+ # Indicates if the paragraph will have to be translated
+ my $translate = "";
+
+ my ( $eof, @paragraph ) = $self->get_string_until( '<', { remove => 1 } );
+
+ # Please note even index of array @paragraph holds actual text of input line
+ # Please note odd index of array @paragraph holds its reference = $filename:$flinenum
+
+ while ( !$eof and !$self->breaking_tag ) {
+ NEXT_TAG:
+
+ # Loop if tag is <!--# ... -->, <!-- ... -->, </tag>, <tag />, or <tag >
+ my @text;
+ my $type = $self->tag_type;
+ my $f_extract = $tag_types[$type]->{'f_extract'};
+ if ( defined($f_extract)
+ and $f_extract eq \&tag_extract_comment )
+ {
+ # if tag is <!--# ... --> or <!-- ... -->, remove this tag from the
+ # input stream and save its content to @comments for use by
+ # translate_paragraph.
+ print wrap_mod( "po4a::xml::treat_content", "%s: type='%s'", $paragraph[1], $type )
+ if $self->{options}{'debug'};
+ ( $eof, @text ) = $self->extract_tag( $type, 1 );
+
+ # Add "\0" to mark end of each separate comment
+ $text[ $#text - 1 ] .= "\0";
+ if ( $tag_types[$type]->{'beginning'} eq "!--#" ) {
+ $text[0] = "#" . $text[0];
+ }
+ push @comments, @text;
+ } else {
+
+ # if tag is </tag>, <tag />, or <tag >, get its tag name
+ # alone in @tag without touching the input stream, then get this
+ # whole tag with attributes in @text while removing this whole tag
+ # from the input stream for use by translate_paragraph.
+ my ( $tmpeof, @tag ) = $self->extract_tag( $type, 0 );
+
+ # Append the found inline tag
+ ( $eof, @text ) = $self->get_string_until(
+ '>',
+ {
+ include => 1,
+ remove => 1,
+ unquoted => 1
+ }
+ );
+
+ # print "cur: ".$self->get_tag_name(@tag)."\n";
+
+ # Append or remove the opening/closing tag from the tag path
+ if ( $tag_types[$type]->{'end'} eq "" ) {
+ if ( $tag_types[$type]->{'beginning'} eq "" ) {
+ $self->treat_content_open_tag( \@tag, \@paragraph, \@text );
+ } elsif ( $tag_types[$type]->{'beginning'} eq "/" ) {
+ $self->treat_content_close_tag( \@tag, \@paragraph, \@text );
+ }
+ } elsif ( $tag_types[$type]->{'beginning'} eq ""
+ && $tag_types[$type]->{'end'} eq "/" )
+ {
+ # As for empty-element tag,
+ # treat as if both open and close tags exist
+ $self->treat_content_open_tag( \@tag, \@paragraph, \@text );
+ $self->treat_content_close_tag( \@tag, \@paragraph, \@text );
+ }
+ push @paragraph, @text;
+ }
+
+ # Next tag
+ ( $eof, @text ) = $self->get_string_until( '<', { remove => 1 } );
+ if ( $#text > 0 ) {
+
+ # Check if text (extracted after the inline tag)
+ # has to be translated
+ push @paragraph, @text;
+ }
+ }
+
+ # This strips the extracted strings
+ # (only if you don't specify the 'nostrip' option, and if the
+ # paragraph can be re-wrapped)
+ $translate = $self->get_translate_options( $self->get_path );
+ if ( !$self->{options}{'nostrip'} and $translate !~ m/W/ ) {
+ my $clean = 0;
+
+ # Clean the beginning
+ while ( !$clean and $#paragraph > 0 ) {
+ $paragraph[0] =~ /^(\s*)(.*)/s;
+ my $match = $1;
+ if ( $paragraph[0] eq $match ) {
+ if ( $match ne "" ) {
+ $self->pushline($match);
+ }
+ shift @paragraph;
+ shift @paragraph;
+ } else {
+ $paragraph[0] = $2;
+ if ( $match ne "" ) {
+ $self->pushline($match);
+ }
+ $clean = 1;
+ }
+ }
+ $clean = 0;
+
+ # Clean the end
+ while ( !$clean and $#paragraph > 0 ) {
+ $paragraph[ $#paragraph - 1 ] =~ /^(.*?)(\s*)$/s;
+ my $match = $2;
+ if ( $paragraph[ $#paragraph - 1 ] eq $match ) {
+ if ( $match ne "" ) {
+ $blank = $match . $blank;
+ }
+ pop @paragraph;
+ pop @paragraph;
+ } else {
+ $paragraph[ $#paragraph - 1 ] = $1;
+ if ( $match ne "" ) {
+ $blank = $match . $blank;
+ }
+ $clean = 1;
+ }
+ }
+ }
+
+ # Translate the string when needed
+ # This will either push the translation in the translated document or
+ # in the current holder translation.
+ $self->translate_paragraph(@paragraph);
+
+ # Push the trailing blanks
+ if ( $blank ne "" ) {
+ $self->pushline($blank);
+ }
+ return $eof;
+}
+
+# Processes open tags during getting texts.
+# Performs special process for placeholder and attribute folding.
+sub treat_content_open_tag {
+ my $self = shift;
+ my ( $tag, $paragraph, $text ) = @_;
+
+ # tag is <tag >
+ my $cur_tag_name = $self->get_tag_name(@$tag);
+ my $t_opts = $self->get_translate_options( $self->get_path($cur_tag_name) );
+ if ( $t_opts =~ m/p/ ) {
+
+ # tag has a placeholder option, append a "<placeholder
+ # type=cur_tag_name id =id_index>" tag to @$paragraph.
+ # using $self->get_tag_name(@$tag) as cur_tag_name and
+ # using $#{$save_holders[$#save_holders]->{'sub_translations'}} + 1
+ # as id_index
+ my $last_holder = $save_holders[$#save_holders];
+ my $placeholder_str =
+ "<placeholder type=\""
+ . $cur_tag_name
+ . "\" id=\""
+ . ( $#{ $last_holder->{'sub_translations'} } + 1 ) . "\"/>";
+ push @$paragraph, ( $placeholder_str, $text->[1] );
+ my @saved_paragraph = @$paragraph;
+
+ $last_holder->{'paragraph'} = \@saved_paragraph;
+
+ # make attributes be able to be translated
+ my $open_tag = $self->join_lines(@$text);
+ if ( $open_tag =~ m/^<(\s*)(\S+\s+\S.*)>$/s ) {
+ my ( $ws, $tag_inner ) = ( $1, $2 );
+ $tag_inner =~ s|(\s*/)$||;
+ my $postfix = $1;
+ push @path, $cur_tag_name;
+ $open_tag = "<" . $ws . $self->treat_attributes($tag_inner) . $postfix . ">";
+ pop @path;
+ }
+
+ # Then we must push a new holder into @save_holders
+ my @new_paragraph = ();
+ my @sub_translations = ();
+ my %folded_attributes;
+ my %new_holder = (
+ 'paragraph' => \@new_paragraph,
+ 'open' => $open_tag,
+ 'translation' => "",
+ 'close' => undef,
+ 'sub_translations' => \@sub_translations,
+ 'folded_attributes' => \%folded_attributes
+ );
+ push @save_holders, \%new_holder;
+
+ # reset @$text holding the whole tag with attributes
+ # to empty
+ @$text = ();
+
+ # reset the current @$paragraph (for the current holder)
+ # to empty.
+ @$paragraph = ();
+
+ } elsif ( $t_opts =~ m/f/ ) {
+
+ # tag has a "f" option for folded attributes
+ my $tag_full = $self->join_lines(@$text);
+ my $tag_ref = $text->[1];
+ if ( $tag_full =~ m/^<(\s*)(\S+\s+\S.*)>$/s ) {
+ my ( $ws, $tag_inner ) = ( $1, $2 );
+ my $holder = $save_holders[$#save_holders];
+ my $id = 0;
+ foreach ( keys %{ $holder->{folded_attributes} } ) {
+ $id = $_ + 1 if ( $_ >= $id );
+ }
+
+ # make attributes be able to be translated
+ $tag_inner =~ s|(\s*/)$||;
+ my $postfix = $1;
+ push @path, $cur_tag_name;
+ $holder->{folded_attributes}->{$id} = "<" . $ws . $self->treat_attributes($tag_inner) . $postfix . ">";
+ pop @path;
+
+ @$text = ( "<$cur_tag_name po4a-id=$id>", $tag_ref );
+ }
+ }
+ unless ( $t_opts =~ m/n/ ) {
+
+ # unless "n" for custom (such as non-XML HTML) tag, update @path
+ push @path, $cur_tag_name;
+ }
+}
+
+# Processes close tags during getting texts.
+# Performs special process for placeholder.
+sub treat_content_close_tag {
+ my $self = shift;
+ my ( $tag, $paragraph, $text ) = @_;
+
+ # tag is </tag>
+
+ # Verify this closing tag matches with the last opening tag
+ # while removing the last opening tag in @path
+ my $test = pop @path;
+ my $name = $self->get_tag_name(@$tag);
+ if ( !defined($test)
+ || $test ne $name )
+ {
+ my $ontagerror = $self->{options}{'ontagerror'};
+ if ( $ontagerror eq "warn" ) {
+ warn wrap_ref_mod(
+ $tag->[1],
+ "po4a::xml",
+ dgettext( "po4a", "Unexpected closing tag </%s> found. The main document may be wrong. Continuing…" ),
+ $name
+ );
+ } elsif ( $ontagerror ne "silent" ) {
+ die wrap_ref_mod( $tag->[1], "po4a::xml",
+ dgettext( "po4a", "Unexpected closing tag </%s> found. The main document may be wrong." ), $name );
+ }
+ }
+
+ if ( $self->get_translate_options( $self->get_path( $self->get_tag_name(@$tag) ) ) =~ m/p/ ) {
+
+ # this closing tag has a placeholder option
+
+ # revert @path to include this tag for translate_paragraph
+ push @path, $self->get_tag_name(@$tag);
+
+ # Now translate this paragraph if needed.
+ # This will call pushline and append the
+ # translation to the current holder's translation.
+ $self->translate_paragraph(@$paragraph);
+
+ # remove this tag from @path
+ pop @path;
+
+ # Now that this holder is closed, we can remove
+ # the holder from the stack.
+ my $holder = pop @save_holders;
+
+ # We need to keep the translation of this holder
+ my $translation = $holder->{'open'} . $holder->{'translation'};
+ $translation .= $self->join_lines(@$text);
+
+ @$text = ();
+
+ # Then we store the translation in the previous
+ # holder's sub_translations array
+ my $previous_holder = $save_holders[$#save_holders];
+ push @{ $previous_holder->{'sub_translations'} }, $translation;
+
+ # We also need to restore the @$paragraph array, as
+ # it was before we encountered the holder.
+ @$paragraph = @{ $previous_holder->{'paragraph'} };
+ }
+}
+
+# Translate a @paragraph array of (string, reference).
+# The $translate argument indicates if the strings must be translated or
+# just pushed
+sub translate_paragraph {
+ my $self = shift;
+ my @paragraph = @_;
+ my $translate = $self->get_translate_options( $self->get_path );
+
+ while ( ( scalar @paragraph )
+ and ( $paragraph[0] =~ m/^\s*\n/s ) )
+ {
+ $self->pushline( $paragraph[0] );
+ shift @paragraph;
+ shift @paragraph;
+ }
+
+ my $comments;
+ while (@comments) {
+ my ( $comment, $eoc );
+ do {
+ my ( $t, $l ) = ( shift @comments, shift @comments );
+ $t =~ s/\n?(\0)?$//;
+ $eoc = $1;
+ $comment .= "\n" if defined $comment;
+ $comment .= $t;
+ } until ($eoc);
+ $comments .= "\n" if defined $comments;
+ $comments .= $comment;
+ $self->pushline( "<!--" . $comment . "-->\n" ) if defined $comment;
+ }
+ @comments = ();
+
+ if ( $self->{options}{'cpp'} ) {
+ my @tmp = @paragraph;
+ @paragraph = ();
+ while (@tmp) {
+ my ( $t, $l ) = ( shift @tmp, shift @tmp );
+
+ # #include can be followed by a filename between
+ # <> brackets. In that case, the argument won't be
+ # handled in the same call to translate_paragraph.
+ # Thus do not try to match "include ".
+ if ( $t =~ m/^#[ \t]*(if |endif|undef |include|else|ifdef |ifndef |define )/si ) {
+ if (@paragraph) {
+ $self->translate_paragraph(@paragraph);
+ @paragraph = ();
+ $self->pushline("\n");
+ }
+ $self->pushline($t);
+ } else {
+ push @paragraph, ( $t, $l );
+ }
+ }
+ }
+
+ my $para = $self->join_lines(@paragraph);
+ if ( length($para) > 0 ) {
+ if ( $translate ne "" ) {
+
+ # This tag should be translated
+ print wrap_mod(
+ "po4a::xml::translate_paragraph",
+ "%s: path='%s', translation option='%s'",
+ $paragraph[1], $self->get_path, $translate
+ ) if $self->{options}{'debug'};
+ $self->pushline(
+ $self->found_string(
+ $para,
+ $paragraph[1],
+ {
+ type => "tag",
+ tag_options => $translate,
+ comments => $comments
+ }
+ )
+ );
+ } else {
+
+ # Inform that this tag isn't translated in debug mode
+ print wrap_mod(
+ "po4a::xml::translate_paragraph",
+ "%s: path='%s', translation option='%s' (no translation)",
+ $paragraph[1], $self->get_path, $translate
+ ) if $self->{options}{'debug'};
+ $self->pushline($para);
+ }
+ }
+
+ # Now the paragraph is fully translated.
+ # If we have all the holders' translation, we can replace the
+ # placeholders by their translations.
+ # We must wait to have all the translations because the holders are
+ # numbered.
+ {
+ my $holder = $save_holders[$#save_holders];
+ my $translation = $holder->{'translation'};
+
+ # Count the number of <placeholder ...> in $translation
+ my $count = 0;
+ my $str = $translation;
+ while ( ( defined $str )
+ and ( $str =~ m/^.*?<placeholder\s+[^>]*>(.*)$/s ) )
+ {
+ die wrap_mod(
+ "po4a::xml",
+ dgettext(
+ "po4a",
+ "Invalid placeholder in the translation (the 'type' and 'id' must be present, in this order).\n%s\n\nPlease fix your translation."
+ ),
+ $str
+ ) unless ( $str =~ m/^.*?<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>(.*)$/s );
+
+ $count += 1;
+ $str = $2;
+ if ( $holder->{'sub_translations'}->[$1] =~ m/<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>/s ) {
+ $count = -1;
+ last;
+ }
+ }
+
+ if ( ( defined $translation )
+ and ( scalar( @{ $holder->{'sub_translations'} } ) == $count ) )
+ {
+ # OK, all the holders of the current paragraph are
+ # closed (and translated).
+ # Replace them by their translation.
+ while ( $translation =~ m/^(.*?)<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>(.*)$/s ) {
+
+ # FIXME: we could also check that
+ # * the holder exists
+ # * all the holders are used
+ $translation = $1 . $holder->{'sub_translations'}->[$2] . $3;
+ }
+
+ # We have our translation
+ $holder->{'translation'} = $translation;
+
+ # And there is no need for any holder in it.
+ my @sub_translations = ();
+ $holder->{'sub_translations'} = \@sub_translations;
+ }
+ }
+
+}
+
+=head2 WORKING WITH THE MODULE OPTIONS
+
+=over 4
+
+=item treat_options()
+
+This function fills the internal structures that contain the tags, attributes
+and inline data with the options of the module (specified in the command-line
+or in the initialize function).
+
+=back
+
+=cut
+
+sub treat_options {
+ my $self = shift;
+
+ if ( $self->{options}{'caseinsensitive'} ) {
+ $self->{options}{'nodefault'} = lc $self->{options}{'nodefault'};
+ $self->{options}{'tags'} = lc $self->{options}{'tags'};
+ $self->{options}{'break'} = lc $self->{options}{'break'};
+ $self->{options}{'_default_break'} = lc $self->{options}{'_default_break'};
+ $self->{options}{'translated'} = lc $self->{options}{'translated'};
+ $self->{options}{'_default_translated'} = lc $self->{options}{'_default_translated'};
+ $self->{options}{'untranslated'} = lc $self->{options}{'untranslated'};
+ $self->{options}{'_default_untranslated'} = lc $self->{options}{'_default_untranslated'};
+ $self->{options}{'attributes'} = lc $self->{options}{'attributes'};
+ $self->{options}{'_default_attributes'} = lc $self->{options}{'_default_attributes'};
+ $self->{options}{'inline'} = lc $self->{options}{'inline'};
+ $self->{options}{'_default_inline'} = lc $self->{options}{'_default_inline'};
+ $self->{options}{'placeholder'} = lc $self->{options}{'placeholder'};
+ $self->{options}{'_default_placeholder'} = lc $self->{options}{'_default_placeholder'};
+ $self->{options}{'customtag'} = lc $self->{options}{'customtag'};
+ $self->{options}{'_default_customtag'} = lc $self->{options}{'_default_customtag'};
+ }
+
+ $self->{options}{'nodefault'} =~ /^\s*(.*)\s*$/s;
+ my %list_nodefault;
+ foreach ( split( /\s+/s, $1 ) ) {
+ $list_nodefault{$_} = 1;
+ }
+ $self->{nodefault} = \%list_nodefault;
+
+ $self->{options}{'tags'} =~ /^\s*(.*)\s*$/s;
+ if ( length $self->{options}{'tags'} ) {
+ warn wrap_mod(
+ "po4a::xml::treat_options",
+ dgettext(
+ "po4a",
+ "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories."
+ ),
+ "tags"
+ );
+ }
+ foreach ( split( /\s+/s, $1 ) ) {
+ $_ =~ m/^(.*?)(<.*)$/;
+ $self->{tags}->{$2} = $1 || "";
+ }
+
+ if ( $self->{options}{'tagsonly'} ) {
+ warn wrap_mod(
+ "po4a::xml::treat_options",
+ dgettext(
+ "po4a",
+ "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories."
+ ),
+ "tagsonly"
+ );
+ }
+
+ $self->{options}{'break'} =~ /^\s*(.*)\s*$/s;
+ foreach my $tag ( split( /\s+/s, $1 ) ) {
+ $tag =~ m/^(.*?)(<.*)$/;
+ $self->{break}->{$2} = $1 || "";
+ }
+ $self->{options}{'_default_break'} =~ /^\s*(.*)\s*$/s;
+ foreach my $tag ( split( /\s+/s, $1 ) ) {
+ $tag =~ m/^(.*?)(<.*)$/;
+ $self->{break}->{$2} = $1 || ""
+ unless $list_nodefault{$2}
+ or defined $self->{break}->{$2};
+ }
+
+ $self->{options}{'translated'} =~ /^\s*(.*)\s*$/s;
+ foreach my $tag ( split( /\s+/s, $1 ) ) {
+ $tag =~ m/^(.*?)(<.*)$/;
+ $self->{translated}->{$2} = $1 || "";
+ }
+ $self->{options}{'_default_translated'} =~ /^\s*(.*)\s*$/s;
+ foreach my $tag ( split( /\s+/s, $1 ) ) {
+ $tag =~ m/^(.*?)(<.*)$/;
+ $self->{translated}->{$2} = $1 || ""
+ unless $list_nodefault{$2}
+ or defined $self->{translated}->{$2};
+ }
+
+ $self->{options}{'untranslated'} =~ /^\s*(.*)\s*$/s;
+ foreach my $tag ( split( /\s+/s, $1 ) ) {
+ $tag =~ m/^(.*?)(<.*)$/;
+ $self->{untranslated}->{$2} = $1 || "";
+ }
+ $self->{options}{'_default_untranslated'} =~ /^\s*(.*)\s*$/s;
+ foreach my $tag ( split( /\s+/s, $1 ) ) {
+ $tag =~ m/^(.*?)(<.*)$/;
+ $self->{untranslated}->{$2} = $1 || ""
+ unless $list_nodefault{$2}
+ or defined $self->{untranslated}->{$2};
+ }
+
+ $self->{options}{'attributes'} =~ /^\s*(.*)\s*$/s;
+ foreach my $tag ( split( /\s+/s, $1 ) ) {
+ if ( $tag =~ m/^(.*?)(<.*)$/ ) {
+ $self->{attributes}->{$2} = $1 || "";
+ } else {
+ $self->{attributes}->{$tag} = "";
+ }
+ }
+ $self->{options}{'_default_attributes'} =~ /^\s*(.*)\s*$/s;
+ foreach my $tag ( split( /\s+/s, $1 ) ) {
+ if ( $tag =~ m/^(.*?)(<.*)$/ ) {
+ $self->{attributes}->{$2} = $1 || ""
+ unless $list_nodefault{$2}
+ or defined $self->{attributes}->{$2};
+ } else {
+ $self->{attributes}->{$tag} = ""
+ unless $list_nodefault{$tag}
+ or defined $self->{attributes}->{$tag};
+ }
+ }
+
+ $self->{options}{'inline'} =~ /^\s*(.*)\s*$/s;
+ foreach my $tag ( split( /\s+/s, $1 ) ) {
+ $tag =~ m/^(.*?)(<.*)$/;
+ $self->{inline}->{$2} = $1 || "";
+ }
+ $self->{options}{'_default_inline'} =~ /^\s*(.*)\s*$/s;
+ foreach my $tag ( split( /\s+/s, $1 ) ) {
+ $tag =~ m/^(.*?)(<.*)$/;
+ $self->{inline}->{$2} = $1 || ""
+ unless $list_nodefault{$2}
+ or defined $self->{inline}->{$2};
+ }
+
+ $self->{options}{'placeholder'} =~ /^\s*(.*)\s*$/s;
+ foreach my $tag ( split( /\s+/s, $1 ) ) {
+ $tag =~ m/^(.*?)(<.*)$/;
+ $self->{placeholder}->{$2} = $1 || "";
+ }
+ $self->{options}{'_default_placeholder'} =~ /^\s*(.*)\s*$/s;
+ foreach my $tag ( split( /\s+/s, $1 ) ) {
+ $tag =~ m/^(.*?)(<.*)$/;
+ $self->{placeholder}->{$2} = $1 || ""
+ unless $list_nodefault{$2}
+ or defined $self->{placeholder}->{$2};
+ }
+
+ $self->{options}{'customtag'} =~ /^\s*(.*)\s*$/s;
+ foreach my $tag ( split( /\s+/s, $1 ) ) {
+ $tag =~ m/^(.*?)(<.*)$/;
+ $self->{customtag}->{$2} = $1 || "";
+ }
+ $self->{options}{'_default_customtag'} =~ /^\s*(.*)\s*$/s;
+ foreach my $tag ( split( /\s+/s, $1 ) ) {
+ $tag =~ m/^(.*?)(<.*)$/;
+ $self->{customtag}->{$2} = $1 || ""
+ unless $list_nodefault{$2}
+ or defined $self->{customtag}->{$2};
+ }
+
+ # If break-pi is provided, we should ensure that: $tag_types[the one of PI]->breaking is 1
+ if ( $self->{options}{'break-pi'} ) {
+ for ( my $i = 0 ; $i < @tag_types ; $i++ ) {
+ if ( $tag_types[$i]->{beginning} eq '?' && $tag_types[$i]->{end} eq '?' ) {
+ $tag_types[$i]->{breaking} = 1;
+ }
+ }
+ }
+
+ foreach my $tagtype (qw(untranslated)) {
+ foreach my $tag ( sort keys %{ $self->{$tagtype} } ) {
+ warn
+ "po4a::xml::treat_options: WARN: tag='$tag' is %s tag, translation option='$self->{$tagtype}->{$tag}' is ignores wW.\n"
+ if $self->{$tagtype}->{$tag} =~ m/wW/;
+ }
+ }
+ foreach my $tagtype (qw(inline break placeholder customtag)) {
+ foreach my $tag ( sort keys %{ $self->{$tagtype} } ) {
+ die
+ "po4a::xml::treat_options: WARN: tag='$tag' is %s tag, translation option='$self->{$tagtype}->{$tag}' is ignored.\n"
+ if $self->{$tagtype}->{$tag} ne "";
+ }
+ }
+ foreach my $tagtype (qw(attributes)) {
+ foreach my $tag ( sort keys %{ $self->{$tagtype} } ) {
+ warn
+ "po4a::xml::treat_options: WARN: tag='$tag' is %s tag, translation option='$self->{$tagtype}->{$tag}' is ignored.\n"
+ if $self->{$tagtype}->{$tag} ne "";
+ }
+ }
+
+ # Debug output of internal parameters for generic XML parser
+ # Marked content of a XML tag can be either "translated" or "untranslated".
+ # -- XML tags in these may specify options: wWip
+ # Extraction of XML content can be one of "inline", "break", "placeholder", or "customtag".
+ # -- XML tags in these must not specify options
+ if ( $self->{options}{'debug'} ) {
+ foreach my $tagtype (qw(translated untranslated)) {
+ foreach my $tag ( sort keys %{ $self->{$tagtype} } ) {
+ print
+ "po4a::xml::treat_options: $tag: translation option='$self->{$tagtype}->{$tag}' (original), but listed in '$tagtype'";
+ foreach my $tagtype1 (qw(inline break placeholder customtag)) {
+ if ( exists $self->{$tagtype1}->{$tag} ) {
+ print " / '$tagtype1'";
+ }
+ }
+ print "\n";
+ print wrap_mod( "po4a::xml::treat_options", "%s: translation option='%s' (valid)",
+ $tag, $self->get_translate_options($tag) );
+ }
+ }
+ foreach my $tag ( sort keys %{ $self->{'attributes'} } ) {
+ print "po4a::xml::treat_options: $tag: translated attributes.\n";
+ }
+ }
+
+ # There should be no translated and untranslated tags
+ foreach my $tag ( keys %{ $self->{translated} } ) {
+ die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ),
+ $tag, "translated", "untranslated" )
+ if defined $self->{untranslated}->{$tag};
+ }
+
+ # There should be no inline, break, placeholder, and customtag tags
+ foreach my $tag ( keys %{ $self->{inline} } ) {
+ die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ),
+ $tag, "inline", "break" )
+ if defined $self->{break}->{$tag};
+ die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ),
+ $tag, "inline", "placeholder" )
+ if defined $self->{placeholder}->{$tag};
+ die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ),
+ $tag, "inline", "customtag" )
+ if defined $self->{customtag}->{$tag};
+ }
+ foreach my $tag ( keys %{ $self->{break} } ) {
+ die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ),
+ $tag, "break", "placeholder" )
+ if defined $self->{placeholder}->{$tag};
+ die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ),
+ $tag, "break", "customtag" )
+ if defined $self->{customtag}->{$tag};
+ }
+ foreach my $tag ( keys %{ $self->{placeholder} } ) {
+ die wrap_mod( "po4a::xml::treat_options", dgettext( "po4a", "Tag '%s' both in the %s and %s categories." ),
+ $tag, "placeholder", "customtag" )
+ if defined $self->{customtag}->{$tag};
+ }
+}
+
+=head2 GETTING TEXT FROM THE INPUT DOCUMENT
+
+=over
+
+=item get_string_until($%)
+
+This function returns an array with the lines (and references) from the input
+document until it finds the first argument. The second argument is an options
+hash. Value 0 means disabled (the default) and 1, enabled.
+
+The valid options are:
+
+=over 4
+
+=item B<include>
+
+This makes the returned array to contain the searched text
+
+=item B<remove>
+
+This removes the returned stream from the input
+
+=item B<unquoted>
+
+This ensures that the searched text is outside any quotes
+
+=item B<regex>
+
+This denotes that the first argument is a regular expression rather than
+an plain string
+
+=back
+
+=cut
+
+sub get_string_until {
+ my ( $self, $search ) = ( shift, shift );
+ my $options = shift;
+ my ( $include, $remove, $unquoted, $regex ) = ( 0, 0, 0, 0 );
+
+ if ( defined( $options->{include} ) ) { $include = $options->{include}; }
+ if ( defined( $options->{remove} ) ) { $remove = $options->{remove}; }
+ if ( defined( $options->{unquoted} ) ) { $unquoted = $options->{unquoted}; }
+ if ( defined( $options->{regex} ) ) { $regex = $options->{regex}; }
+
+ my ( $line, $ref ) = $self->shiftline();
+ my ( @text, $paragraph );
+ my ( $eof, $found ) = ( 0, 0 );
+
+ $search = "\Q$search\E" unless $regex;
+ while ( defined($line) and !$found ) {
+ push @text, ( $line, $ref );
+ $paragraph .= $line;
+ if ($unquoted) {
+ if ( $paragraph =~ /^((\".*?\")|(\'.*?\')|[^\"\'])*$search/s ) {
+ $found = 1;
+ }
+ } else {
+ if ( $paragraph =~ /$search/s ) {
+ $found = 1;
+ }
+ }
+ if ( !$found ) {
+ ( $line, $ref ) = $self->shiftline();
+ }
+ }
+
+ if ( !defined($line) ) { $eof = 1; }
+
+ if ($found) {
+ $line = "";
+ if ($unquoted) {
+ $paragraph =~ /^(?:(?:\".*?\")|(?:\'.*?\')|[^\"\'])*?$search(.*)$/s;
+ $line = $1;
+ $text[ $#text - 1 ] =~ s/\Q$line\E$//s;
+ } else {
+ $paragraph =~ /$search(.*)$/s;
+ $line = $1;
+ $text[ $#text - 1 ] =~ s/\Q$line\E$//s;
+ }
+ if ( !$include ) {
+ $text[ $#text - 1 ] =~ /^(.*)($search.*)$/s;
+ $text[ $#text - 1 ] = $1;
+ $line = $2 . $line;
+ }
+ if ( defined($line) and ( $line ne "" ) ) {
+ $self->unshiftline( $line, $text[$#text] );
+ }
+ }
+ if ( !$remove ) {
+ $self->unshiftline(@text);
+ }
+
+ #If we get to the end of the file, we return the whole paragraph
+ return ( $eof, @text );
+}
+
+=item skip_spaces(\@)
+
+This function receives as argument the reference to a paragraph (in the format
+returned by get_string_until), skips his heading spaces and returns them as
+a simple string.
+
+=cut
+
+sub skip_spaces {
+ my ( $self, $pstring ) = @_;
+ my $space = "";
+
+ while ( @$pstring and ( @$pstring[0] =~ /^(\s+)(.*)$/s or @$pstring[0] eq "" ) ) {
+ if ( @$pstring[0] ne "" ) {
+ $space .= $1;
+ @$pstring[0] = $2;
+ }
+
+ if ( @$pstring[0] eq "" ) {
+ shift @$pstring;
+ shift @$pstring;
+ }
+ }
+ return $space;
+}
+
+=item join_lines(@)
+
+This function returns a simple string with the text from the argument array
+(discarding the references).
+
+=cut
+
+sub join_lines {
+ my ( $self, @lines ) = @_;
+ my ( $line, $ref );
+ my $text = "";
+ while ( $#lines > 0 ) {
+ ( $line, $ref ) = ( shift @lines, shift @lines );
+ $text .= $line;
+ }
+ return $text;
+}
+
+=back
+
+=head1 STATUS OF THIS MODULE
+
+This module can translate tags and attributes.
+
+=head1 TODO LIST
+
+DOCTYPE (ENTITIES)
+
+There is a minimal support for the translation of entities. They are
+translated as a whole, and tags are not taken into account. Multilines
+entities are not supported and entities are always rewrapped during the
+translation.
+
+MODIFY TAG TYPES FROM INHERITED MODULES
+(move the tag_types structure inside the $self hash?)
+
+=head1 SEE ALSO
+
+L<Locale::Po4a::TransTractor(3pm)|Locale::Po4a::TransTractor>,
+L<po4a(7)|po4a.7>
+
+=head1 AUTHORS
+
+ Jordi Vilalta <jvprat@gmail.com>
+ Nicolas François <nicolas.francois@centraliens.net>
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright © 2004 Jordi Vilalta <jvprat@gmail.com>
+ Copyright © 2008-2009 Nicolas François <nicolas.francois@centraliens.net>
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
+
+1;
diff --git a/lib/Locale/Po4a/Yaml.pm b/lib/Locale/Po4a/Yaml.pm
new file mode 100644
index 0000000..919c40c
--- /dev/null
+++ b/lib/Locale/Po4a/Yaml.pm
@@ -0,0 +1,160 @@
+# Locale::Po4a::Yaml -- Convert yaml files to PO file, for translation.
+#
+# This program is free software; you may redistribute it and/or modify it
+# under the terms of GPL v2.0 or later (see COPYING).
+#
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Yaml - convert YAML files from/to PO files
+
+=head1 DESCRIPTION
+
+Locale::Po4a::Yaml is a module to help the translation of Yaml files into other
+[human] languages.
+
+The module extracts the value of YAML hashes and arrays. Hash keys are
+not extracted.
+
+NOTE: This module parses the YAML file with YAML::Tiny.
+
+=head1 OPTIONS ACCEPTED BY THIS MODULE
+
+These are this module's particular options:
+
+=over
+
+=item B<keys>
+
+Space-separated list of hash keys to process for extraction, all
+other keys are skipped. Keys are matched with a case-sensitive match.
+If B<paths> and B<keys> are used together, values are included if they are
+matched by at least one of the options.
+Arrays values are always returned unless the B<skip_array> option is
+provided.
+
+=item B<paths>
+
+Comma-separated list of hash paths to process for extraction, all
+other paths are skipped. Paths are matched with a case-sensitive match.
+If B<paths> and B<keys> are used together, values are included if they are
+matched by at least one of the options.
+Arrays values are always returned unless the B<skip_array> option is
+provided.
+
+=item B<skip_array>
+
+Do not translate array values.
+
+=back
+
+=head1 SEE ALSO
+
+L<Locale::Po4a::TransTractor(3pm)>, L<po4a(7)|po4a.7>
+
+=head1 AUTHORS
+
+ Brian Exelbierd <bex@pobox.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+ Copyright © 2017 Brian Exelbierd.
+ Copyright © 2022 Martin Quinson <mquinson#debian.org>.
+
+This program is free software; you may redistribute it and/or modify it
+under the terms of GPL v2.0 or later (see the COPYING file).
+
+=cut
+
+############################################################################
+# Modules and declarations
+############################################################################
+
+package Locale::Po4a::Yaml;
+
+use Locale::Po4a::TransTractor;
+use Locale::Po4a::Common;
+use YAML::Tiny;
+use Scalar::Util;
+use Encode;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+require Exporter;
+
+use vars qw(@ISA @EXPORT $AUTOLOAD);
+@ISA = qw(Locale::Po4a::TransTractor);
+@EXPORT = qw();
+
+my %yfm_keys = ();
+my %yfm_paths = ();
+
+sub initialize {
+ my $self = shift;
+ my %options = @_;
+
+ $self->{options}{'keys'} = '';
+ $self->{options}{'paths'} = '';
+ $self->{options}{'debug'} = 0;
+ $self->{options}{'verbose'} = 1;
+ $self->{options}{'skip_array'} = 0;
+
+ foreach my $opt ( keys %options ) {
+ die wrap_mod( "po4a::yaml", dgettext( "po4a", "Unknown option: %s" ), $opt )
+ unless exists $self->{options}{$opt};
+ $self->{options}{$opt} = $options{$opt};
+ }
+
+ map {
+ $_ =~ s/^\s+|\s+$//g; # Trim the keys before using them
+ $yfm_keys{$_} = 1
+ } ( split( /[, ]/, $self->{options}{keys} ) );
+
+ # map { print STDERR "key: '$_'\n"; } (keys %yfm_keys);
+
+ map {
+ $_ =~ s/^\s+|\s+$//g; # Trim the keys before using them
+ $yfm_paths{$_} = 1
+ } ( split( /,/, $self->{options}{paths} ) );
+}
+
+sub parse {
+ my $self = shift;
+ my $yfm;
+
+ # Get the ref of the first line. We'll use it as the ref for the whole doc
+ my ( $line, $ref ) = $self->shiftline();
+ $self->unshiftline( $line, $ref );
+
+ while (1) {
+ my ( $nextline, $nextref ) = $self->shiftline();
+
+ if ( not defined($nextline) ) {
+ last;
+ } elsif ( $nextline =~ /: [\[\{]/ ) {
+ die wrap_mod(
+ "po4a::text",
+ dgettext(
+ "po4a",
+ "Inline lists and dictionaries on a single line are not correctly handled the parser we use (YAML::Tiny): they are interpreted as regular strings. "
+ . "Please use multi-lines definitions instead. Offending line:\n %s"
+ ),
+ $nextline
+ );
+ }
+
+ $yfm .= $nextline;
+ }
+
+ my $yamlarray = YAML::Tiny->read_string($yfm)
+ || die "YAML::Tiny failed to parse the content of $ref: $!";
+
+ $self->handle_yaml( 0, $ref, $yamlarray, \%yfm_keys, $self->{options}{skip_array}, \%yfm_paths );
+}
+
+1;
+__END__