1 files changed, 1337 insertions, 0 deletions
diff --git a/lib/Locale/Po4a/TransTractor.pm b/lib/Locale/Po4a/TransTractor.pm
new file mode 100644
index 0000000..0bc5b8f
--- /dev/null
+++ b/lib/Locale/Po4a/TransTractor.pm
@@ -0,0 +1,1337 @@
+#!/usr/bin/perl -w
+
+require Exporter;
+
+package Locale::Po4a::TransTractor;
+use DynaLoader;
+
+use 5.16.0;
+use strict;
+use warnings;
+
+use subs qw(makespace);
+use vars qw($VERSION @ISA @EXPORT);
+$VERSION = "0.72";
+@ISA     = qw(DynaLoader);
+@EXPORT  = qw(new process translate
+  read write readpo writepo
+  getpoout setpoout get_in_charset get_out_charset handle_yaml);
+
+# Try to use a C extension if present.
+eval("bootstrap Locale::Po4a::TransTractor $VERSION");
+
+use Carp qw(croak confess);
+use Locale::Po4a::Po;
+use Locale::Po4a::Common;
+
+use File::Path;    # mkdir before write
+use File::Spec;
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::TransTractor - generic trans(lator ex)tractor.
+
+=head1 DESCRIPTION
+
+The po4a (PO for anything) project goal is to ease translations (and more
+interestingly, the maintenance of translations) using gettext tools on
+areas where they were not expected like documentation.
+
+This class is the ancestor of every po4a parser used to parse a document, to
+search translatable strings, to extract them to a PO file and to replace them by
+their translation in the output document.
+
+More formally, it takes the following arguments as input:
+
+=over 2
+
+=item -
+
+a document to translate;
+
+=item -
+
+a PO file containing the translations to use.
+
+=back
+
+As output, it produces:
+
+=over 2
+
+=item -
+
+another PO file, resulting of the extraction of translatable strings from
+the input document;
+
+=item -
+
+a translated document, with the same structure than the one in input, but
+with all translatable strings replaced with the translations found in the
+PO file provided in input.
+
+=back
+
+Here is a graphical representation of this:
+
+   Input document --\                             /---> Output document
+                     \                           /       (translated)
+                      +-> parse() function -----+
+                     /                           \
+   Input PO --------/                             \---> Output PO
+                                                         (extracted)
+
+=head1 FUNCTIONS YOUR PARSER SHOULD OVERRIDE
+
+=over 4
+
+=item parse()
+
+This is where all the work takes place: the parsing of input documents, the
+generation of output, and the extraction of the translatable strings. This
+is pretty simple using the provided functions presented in the section
+B<INTERNAL FUNCTIONS> below. See also the B<SYNOPSIS>, which presents an
+example.
+
+This function is called by the process() function below, but if you choose
+to use the new() function, and to add content manually to your document,
+you will have to call this function yourself.
+
+=item docheader()
+
+This function returns the header we should add to the produced document,
+quoted properly to be a comment in the target language.  See the section
+B<Educating developers about translations>, from L<po4a(7)|po4a.7>, for what
+it is good for.
+
+=back
+
+=cut
+
+sub docheader { }
+
+sub parse { }
+
+=head1 SYNOPSIS
+
+The following example parses a list of paragraphs beginning with "<p>". For the sake
+of simplicity, we assume that the document is well formatted, i.e. that '<p>'
+tags are the only tags present, and that this tag is at the very beginning
+of each paragraph.
+
+ sub parse {
+   my $self = shift;
+
+   PARAGRAPH: while (1) {
+       my ($paragraph,$pararef)=("","");
+       my $first=1;
+       my ($line,$lref)=$self->shiftline();
+       while (defined($line)) {
+           if ($line =~ m/<p>/ && !$first--; ) {
+               # Not the first time we see <p>.
+               # Reput the current line in input,
+               #  and put the built paragraph to output
+               $self->unshiftline($line,$lref);
+
+               # Now that the document is formed, translate it:
+               #   - Remove the leading tag
+               $paragraph =~ s/^<p>//s;
+
+               #   - push to output the leading tag (untranslated) and the
+               #     rest of the paragraph (translated)
+               $self->pushline(  "<p>"
+                               . $self->translate($paragraph,$pararef)
+                               );
+
+               next PARAGRAPH;
+           } else {
+               # Append to the paragraph
+               $paragraph .= $line;
+               $pararef = $lref unless(length($pararef));
+           }
+
+           # Reinit the loop
+           ($line,$lref)=$self->shiftline();
+       }
+       # Did not get a defined line? End of input file.
+       return;
+   }
+ }
+
+Once you've implemented the parse function, you can use your document
+class, using the public interface presented in the next section.
+
+=head1 PUBLIC INTERFACE for scripts using your parser
+
+=head2 Constructor
+
+=over 4
+
+=item process(%)
+
+This function can do all you need to do with a po4a document in one
+invocation. Its arguments must be packed as a hash. ACTIONS:
+
+=over 3
+
+=item a.
+
+Reads all the PO files specified in po_in_name
+
+=item b.
+
+Reads all original documents specified in file_in_name
+
+=item c.
+
+Parses the document
+
+=item d.
+
+Reads and applies all the addenda specified
+
+=item e.
+
+Writes the translated document to file_out_name (if given)
+
+=item f.
+
+Writes the extracted PO file to po_out_name (if given)
+
+=back
+
+ARGUMENTS, beside the ones accepted by new() (with expected type):
+
+=over 4
+
+=item file_in_name (@)
+
+List of filenames where we should read the input document.
+
+=item file_in_charset ($)
+
+Charset used in the input document (if it isn't specified, use UTF-8).
+
+=item file_out_name ($)
+
+Filename where we should write the output document.
+
+=item file_out_charset ($)
+
+Charset used in the output document (if it isn't specified, use UTF-8).
+
+=item po_in_name (@)
+
+List of filenames where we should read the input PO files from, containing
+the translation which will be used to translate the document.
+
+=item po_out_name ($)
+
+Filename where we should write the output PO file, containing the strings
+extracted from the input document.
+
+=item addendum (@)
+
+List of filenames where we should read the addenda from.
+
+=item addendum_charset ($)
+
+Charset for the addenda.
+
+=back
+
+=item new(%)
+
+Create a new po4a document. Accepted options (in the hash passed as a parameter):
+
+=over 4
+
+=item verbose ($)
+
+Sets the verbosity.
+
+=item debug ($)
+
+Sets the debugging.
+
+=item wrapcol ($)
+
+The column at which we should wrap text in output document (default: 76).
+
+The negative value means not to wrap lines at all.
+
+=back
+
+Also it accepts next options for underlying Po-files: B<porefs>,
+B<copyright-holder>, B<msgid-bugs-address>, B<package-name>,
+B<package-version>, B<wrap-po>.
+
+=cut
+
+sub process {
+    my $self = shift;
+    ## Parameters are passed as an hash to avoid long and error-prone parameter lists
+    my %params = @_;
+
+    # Parameter checking
+    foreach ( keys %params ) {
+        confess "Unexpected parameter to process(): $_. Please report that bug."
+          unless ( $_ eq 'po_in_name'
+            || $_ eq 'po_out_name'
+            || $_ eq 'file_in_name'
+            || $_ eq 'file_in_charset'
+            || $_ eq 'file_out_name'
+            || $_ eq 'file_out_charset'
+            || $_ eq 'addendum'
+            || $_ eq 'addendum_charset'
+            || $_ eq 'srcdir'
+            || $_ eq 'destdir'
+            || $_ eq 'calldir' );
+    }
+
+    $self->{TT}{'file_in_charset'}  = $params{'file_in_charset'}  // 'UTF-8';
+    $self->{TT}{'file_out_charset'} = $params{'file_out_charset'} // 'UTF-8';
+    $self->{TT}{'addendum_charset'} = $params{'addendum_charset'};
+
+    our ( $destdir, $srcdir, $calldir ) = ( $params{'destdir'}, $params{'srcdir'}, $params{'calldir'} );
+
+    sub _input_file {
+        my $filename = $_[0];
+        return $filename if ( File::Spec->file_name_is_absolute($filename) );
+        foreach ( ( $destdir, $srcdir, $calldir ) ) {
+            next unless defined $_;
+            my $p = File::Spec->catfile( $_, $filename );
+            return $p if -e $p;
+        }
+        return $filename;
+    }
+
+    sub _output_file {
+        my $filename = $_[0];
+        return $filename if ( File::Spec->file_name_is_absolute($filename) );
+        foreach ( ( $destdir, $calldir ) ) {
+            next unless defined $_;
+            return File::Spec->catfile( $_, $filename ) if -d $_ and -w $_;
+        }
+        return $filename;
+    }
+
+    foreach my $file ( @{ $params{'po_in_name'} } ) {
+        my $infile = _input_file($file);
+        print STDERR wrap_mod( "po4a::transtractor::process", "Read PO file $infile" )
+          if $self->debug();
+        $self->readpo($infile);
+    }
+    foreach my $file ( @{ $params{'file_in_name'} } ) {
+        my $infile = _input_file($file);
+        print STDERR wrap_mod( "po4a::transtractor::process", "Read document $infile" )
+          if $self->debug();
+        $self->read( $infile, $file, $params{'file_in_charset'} );
+    }
+    print STDERR wrap_mod( "po4a::transtractor::process", "Call parse()" ) if $self->debug();
+    $self->parse();
+    print STDERR wrap_mod( "po4a::transtractor::process", "Done parse()" ) if $self->debug();
+    foreach my $file ( @{ $params{'addendum'} } ) {
+        my $infile = _input_file($file);
+        print STDERR wrap_mod( "po4a::transtractor::process", "Apply addendum $infile" )
+          if $self->debug();
+        $self->addendum($file) || die "An addendum failed\n";
+    }
+
+    if ( defined $params{'file_out_name'} ) {
+        my $outfile = _output_file( $params{'file_out_name'} );
+        print STDERR wrap_mod( "po4a::transtractor::process", "Write document $outfile" )
+          if $self->debug();
+        $self->write( $outfile, $self->{TT}{'file_out_charset'} );
+    }
+    if ( defined $params{'po_out_name'} ) {
+        my $outfile = _output_file( $params{'po_out_name'} );
+        print STDERR wrap_mod( "po4a::transtractor::process", "Write PO file $outfile" )
+          if $self->debug();
+        $self->writepo($outfile);
+    }
+    return $self;
+}
+
+sub new {
+    ## Determine if we were called via an object-ref or a classname
+    my $this    = shift;
+    my $class   = ref($this) || $this;
+    my $self    = {};
+    my %options = @_;
+    ## Bless ourselves into the desired class and perform any initialization
+    bless $self, $class;
+
+    ## initialize the plugin
+    # prevent the plugin from croaking on the options intended for Po.pm or ourself
+    $self->{options}{'porefs'}             = '';
+    $self->{options}{'copyright-holder'}   = '';
+    $self->{options}{'msgid-bugs-address'} = '';
+    $self->{options}{'package-name'}       = '';
+    $self->{options}{'package-version'}    = '';
+    $self->{options}{'wrap-po'}            = '';
+    $self->{options}{'wrapcol'}            = '';
+
+    # let the plugin parse the options and such
+    $self->initialize(%options);
+
+    ## Create our private data
+    my %po_options;
+    $po_options{'porefs'}             = $options{'porefs'};
+    $po_options{'copyright-holder'}   = $options{'copyright-holder'};
+    $po_options{'msgid-bugs-address'} = $options{'msgid-bugs-address'};
+    $po_options{'package-name'}       = $options{'package-name'};
+    $po_options{'package-version'}    = $options{'package-version'};
+    $po_options{'wrap-po'}            = $options{'wrap-po'};
+
+    # private data
+    $self->{TT}         = ();
+    $self->{TT}{po_in}  = Locale::Po4a::Po->new( \%po_options );
+    $self->{TT}{po_out} = Locale::Po4a::Po->new( \%po_options );
+
+    # Warning, $self->{TT}{doc_in} is an array of array:
+    #  The document is split on lines, and for each array in array
+    #  [0] is the line content, [1] is the reference $filename:$linenum
+    $self->{TT}{doc_in}  = ();
+    $self->{TT}{doc_out} = ();
+    if ( defined $options{'verbose'} ) {
+        $self->{TT}{verbose} = $options{'verbose'};
+    }
+    if ( defined $options{'debug'} ) {
+        $self->{TT}{debug} = $options{'debug'};
+    }
+    if ( defined $options{'wrapcol'} ) {
+        if ( $options{'wrapcol'} < 0) {
+            $self->{TT}{wrapcol} = 'Inf';
+        } else {
+            $self->{TT}{wrapcol} = $options{'wrapcol'};
+        }
+    } else {
+        $self->{TT}{wrapcol} = 76;
+    }
+
+    return $self;
+}
+
+=back
+
+=head2 Manipulating document files
+
+=over 4
+
+=item read($$)
+
+Add another input document data at the end of the existing array
+C<< @{$self->{TT}{doc_in}} >>. The argument is the filename to read. If a second
+argument is provided, it is the filename to use in the references.
+
+This array C<< @{$self->{TT}{doc_in}} >> holds this input document data as an
+array of strings with alternating meanings.
+ * The string C<$textline> holding each line of the input text data.
+ * The string C<< $filename:$linenum >> holding its location and called as
+   "reference" (C<linenum> starts with 1).
+
+Please note that it does not parse anything. You should use the parse()
+function when you're done with packing input files into the document.
+
+=cut
+
+sub read() {
+    my $self     = shift;
+    my $filename = shift or confess "Cannot write to a file without filename";
+    my $refname  = shift or confess "Cannot write to a file without refname";
+    my $charset  = shift || 'UTF-8';
+    my $linenum  = 0;
+
+    use warnings FATAL => 'utf8';
+    use Encode qw(:fallbacks);
+    use PerlIO::encoding;
+    $PerlIO::encoding::fallback = FB_CROAK;
+
+    my $fh;
+    open( $fh, "<:encoding($charset)", $filename )
+      or croak wrap_msg( dgettext( "po4a", "Cannot read from %s: %s" ), $filename, $! );
+
+    # If we see a BOM while not in UTF-8, we want to croak. But this code is in an eval to deal with
+    # encoding issues. So save the BOM error until after the eval block
+    my $BOM_detected = 0;
+
+    eval {
+        while ( defined( my $textline = <$fh> ) ) {
+            $linenum++;
+            if ( $linenum == 1 && $textline =~ m/^\N{BOM}/ ) {    # UTF-8 BOM detected
+                $BOM_detected = 1 if ( uc($charset) ne 'UTF-8' );    # Save the error message for after the eval{} bloc
+                $textline =~ s/^\N{BOM}//;
+            }
+            my $ref = "$refname:$linenum";
+            $textline =~ s/\r$//;
+            my @entry = ( $textline, $ref );
+            push @{ $self->{TT}{doc_in} }, @entry;
+        }
+    };
+    my $error = $@;
+    if ( length($error) ) {
+        chomp $error;
+        die wrap_msg( dgettext( "po4a", "Malformed encoding while reading from file %s with charset %s: %s" ),
+            $filename, $charset, $error );
+    }
+
+    # Croak if we need to
+    if ($BOM_detected) {
+        croak wrap_msg(
+            dgettext(
+                "po4a",
+                "The file %s starts with a BOM char indicating that its encoding is UTF-8, but you specified %s instead."
+            ),
+            $filename,
+            $charset
+        );
+    }
+
+    close $fh
+      or croak wrap_msg( dgettext( "po4a", "Cannot close %s after reading: %s" ), $filename, $! );
+}
+
+=item write($)
+
+Write the translated document to the given filename.
+
+This translated document data are provided by:
+ * C<< $self->docheader() >> holding the header text for the plugin, and
+ * C<< @{$self->{TT}{doc_out}} >> holding each line of the main translated text in the array.
+
+=cut
+
+sub write {
+    my $self     = shift;
+    my $filename = shift or confess "Cannot write to a file without filename";
+    my $charset  = shift || 'UTF-8';
+
+    use warnings FATAL => 'utf8';
+
+    my $fh;
+    if ( $filename eq '-' ) {
+        $fh = \*STDOUT;
+    } else {
+
+        # make sure the directory in which we should write the localized file exists
+        my $dir = $filename;
+        if ( $dir =~ m|/| ) {
+            $dir =~ s|/[^/]*$||;
+
+            File::Path::mkpath( $dir, 0, 0755 )    # Croaks on error
+              if ( length($dir) && !-e $dir );
+        }
+        open( $fh, ">:encoding($charset)", $filename )
+          or croak wrap_msg( dgettext( "po4a", "Cannot write to %s: %s" ), $filename, $! );
+    }
+
+    map { print $fh $_ } $self->docheader();
+    eval {
+        map { print $fh $_ } @{ $self->{TT}{doc_out} };
+
+        # we use the "eval {} or do {}" approach to deal with exceptions, cf https://perlmaven.com/fatal-errors-in-external-modules
+        # but we want it to fail only if there is an error. It seems to be some cases where "map" returns false even if there is no error.
+        # Thus this final 1 to evaluate to true in absence of error.
+        1;
+    } or do {
+        my $error = $@ || 'Unknown failure';
+        chomp $error;
+        if ( $charset ne 'UTF-8' && $error =~ /^"\\x\{([^"}]*)\}"/ ) {
+
+            # Attempt to write the char that cannot be written. Very fragile code
+            binmode STDERR, ':encoding(UTF-8)';
+            my $char = chr( hex($1) );
+            die wrap_msg(
+                dgettext( "po4a", "Malformed encoding while writing char '%s' to file %s with charset %s: %s" ),
+                $char, $filename, $charset, $error );
+        } else {
+            die wrap_msg( dgettext( "po4a", "Malformed encoding while writing to file %s with charset %s: %s" ),
+                $filename, $charset, $error );
+        }
+    };
+
+    if ( $filename ne '-' ) {
+        close $fh or croak wrap_msg( dgettext( "po4a", "Cannot close %s after writing: %s" ), $filename, $! );
+    }
+
+}
+
+=back
+
+=head2 Manipulating PO files
+
+=over 4
+
+=item readpo($)
+
+Add the content of a file (which name is passed as argument) to the
+existing input PO. The old content is not discarded.
+
+=item writepo($)
+
+Write the extracted PO file to the given filename.
+
+=item stats()
+
+Returns some statistics about the translation done so far. Please note that
+it's not the same statistics than the one printed by msgfmt
+--statistic. Here, it's stats about recent usage of the PO file, while
+msgfmt reports the status of the file. It is a wrapper to the
+Locale::Po4a::Po::stats_get function applied to the input PO file. Example
+of use:
+
+    [normal use of the po4a document...]
+
+    ($percent,$hit,$queries) = $document->stats();
+    print "We found translations for $percent\%  ($hit from $queries) of strings.\n";
+
+=back
+
+=cut
+
+sub getpoout {
+    return $_[0]->{TT}{po_out};
+}
+
+sub setpoout {
+    $_[0]->{TT}{po_out} = $_[1];
+}
+
+sub readpo {
+    $_[0]->{TT}{po_in}->read( $_[1] );
+}
+
+sub writepo {
+    $_[0]->{TT}{po_out}->write( $_[1] );
+}
+
+sub stats {
+    return $_[0]->{TT}{po_in}->stats_get();
+}
+
+=head2 Manipulating addenda
+
+=over 4
+
+=item addendum($)
+
+Please refer to L<po4a(7)|po4a.7> for more information on what addenda are,
+and how translators should write them. To apply an addendum to the translated
+document, simply pass its filename to this function and you are done ;)
+
+This function returns a non-null integer on error.
+
+=cut
+
+# Internal function to read the header.
+sub addendum_parse {
+    my $filename = shift;
+    my $charset  = shift || 'UTF-8';
+    my $header;
+
+    my ( $errcode, $mode, $position, $boundary, $bmode, $content ) = ( 1, "", "", "", "", "" );
+
+    unless ( open( INS, "<:encoding($charset)", $filename ) ) {
+        warn wrap_msg( dgettext( "po4a", "Cannot read from %s: %s" ), $filename, $! );
+        goto END_PARSE_ADDFILE;
+    }
+
+    $PerlIO::encoding::fallback = FB_CROAK;
+    eval {
+        unless ( defined( $header = <INS> ) && $header ) {
+            warn wrap_msg( dgettext( "po4a", "Cannot read po4a header from %s." ), $filename );
+            goto END_PARSE_ADDFILE;
+        }
+    } or do {
+        my $error = $@ || 'Unknown failure';
+        chomp $error;
+        die wrap_msg( dgettext( "po4a", "Malformed encoding while reading from file %s with charset %s: %s" ),
+            $filename, $charset, $error );
+    };
+
+    unless ( $header =~ s/PO4A-HEADER://i ) {
+        warn wrap_msg( dgettext( "po4a", "First line of %s does not look like a po4a header." ), $filename );
+        goto END_PARSE_ADDFILE;
+    }
+    foreach my $part ( split( /;/, $header ) ) {
+        unless ( $part =~ m/^\s*([^=]*)=(.*)$/ ) {
+            warn wrap_msg( dgettext( "po4a", "Syntax error in po4a header of %s, near \"%s\"" ), $filename, $part );
+            goto END_PARSE_ADDFILE;
+        }
+        my ( $key, $value ) = ( $1, $2 );
+        $key = lc($key);
+        if ( $key eq 'mode' ) {
+            $mode = lc($value);
+        } elsif ( $key eq 'position' ) {
+            $position = $value;
+        } elsif ( $key eq 'endboundary' ) {
+            $boundary = $value;
+            $bmode    = 'after';
+        } elsif ( $key eq 'beginboundary' ) {
+            $boundary = $value;
+            $bmode    = 'before';
+        } else {
+            warn wrap_msg( dgettext( "po4a", "Invalid argument in the po4a header of %s: %s" ), $filename, $key );
+            goto END_PARSE_ADDFILE;
+        }
+    }
+
+    unless ( length($mode) ) {
+        warn wrap_msg( dgettext( "po4a", "The po4a header of %s does not define the mode." ), $filename );
+        goto END_PARSE_ADDFILE;
+    }
+    unless ( $mode eq "before" || $mode eq "after" || $mode eq "eof" ) {
+        warn wrap_msg(
+            dgettext(
+                "po4a",
+                "Mode invalid in the po4a header of %s: should be 'before', 'after' or 'eof'. Instead, it is '%s'."
+            ),
+            $filename,
+            $mode
+        );
+        goto END_PARSE_ADDFILE;
+    }
+
+    unless ( length($position) || $mode eq "eof" ) {
+        warn wrap_msg( dgettext( "po4a", "The po4a header of %s does not define the position." ), $filename );
+        goto END_PARSE_ADDFILE;
+    }
+    if ( $mode eq "after" && length($boundary) == 0 ) {
+        warn wrap_msg( dgettext( "po4a", "No ending boundary given in the po4a header, but mode=after." ) );
+        goto END_PARSE_ADDFILE;
+    }
+    if ( $mode eq "eof" && length($position) ) {
+        warn wrap_msg( dgettext( "po4a", "No position needed when mode=eof." ) );
+        goto END_PARSE_ADDFILE;
+    }
+    if ( $mode eq "eof" && length($boundary) ) {
+        warn wrap_msg( dgettext( "po4a", "No ending boundary needed when mode=eof." ) );
+        goto END_PARSE_ADDFILE;
+    }
+
+    eval {
+        while ( defined( my $line = <INS> ) ) {
+            $content .= $line;
+        }
+    };
+    my $error = $@;
+    if ( length($error) ) {
+        chomp $error;
+        die wrap_msg( dgettext( "po4a", "Malformed encoding while reading from file %s with charset %s: %s" ),
+            $filename, $charset, $error );
+    }
+    close INS;
+
+    $errcode = 0;
+  END_PARSE_ADDFILE:
+    return ( $errcode, $mode, $position, $boundary, $bmode, $content );
+}
+
+sub mychomp {
+    my ($str) = shift;
+    chomp($str);
+    return $str;
+}
+
+sub addendum {
+    my ( $self, $filename ) = @_;
+
+    print STDERR wrap_mod( "po4a::transtractor::addendum", "Apply addendum %s", $filename )
+      if $self->debug();
+    unless ($filename) {
+        warn wrap_msg( dgettext( "po4a", "Cannot apply addendum when not given the filename" ) );
+        return 0;
+    }
+    die wrap_msg( dgettext( "po4a", "Addendum %s does not exist." ), $filename )
+      unless -e $filename;
+
+    my ( $errcode, $mode, $position, $boundary, $bmode, $content ) =
+      addendum_parse( $filename, $self->{TT}{'addendum_charset'} );
+    return 0 if ($errcode);
+
+    # In order to make addendum more intuitive, each array item of
+    # @{$self->{TT}{doc_out}} must not have internal "\n".  But previous parser
+    # code may put multiple internal "\n" to address things like placeholder
+    # tag handling.  Let's normalize array content.
+    # Use internal "\n" as delimiter but keep it by using the lookbehind trick.
+    @{ $self->{TT}{doc_out} } = map { split /(?<=\n)/, $_ } @{ $self->{TT}{doc_out} };
+
+    # Bugs around addendum is hard to understand.  So let's print involved data explicitly.
+    if ( $self->debug() ) {
+        print STDERR "Addendum position regex=$position\n";
+        print STDERR "Addendum mode=$mode\n";
+        if ( $mode eq "after" ) {
+            print STDERR "Addendum boundary regex=$boundary\n";
+            print STDERR "Addendum boundary mode=$bmode\n";
+        }
+        print STDERR "Addendum content (begin):\n";
+        print STDERR "$content";
+        print STDERR "Addendum content (end)\n";
+        print STDERR "Output items searched for the addendum insertion position:\n";
+        foreach my $item ( @{ $self->{TT}{doc_out} } ) {
+            print STDERR $item;
+            print STDERR "\n----- [ search item end marker with a preceding newline ] -----\n";
+        }
+        print STDERR "Start searching addendum insertion position...\n";
+    }
+
+    unless ( $mode eq 'eof' ) {
+        my $found = scalar grep { /$position/ } @{ $self->{TT}{doc_out} };
+        if ( $found == 0 ) {
+            warn wrap_msg( dgettext( "po4a", "No candidate position for the addendum %s." ), $filename );
+            return 0;
+        }
+        if ( $found > 1 ) {
+            warn wrap_msg( dgettext( "po4a", "More than one candidate position found for the addendum %s." ),
+                $filename );
+            return 0;
+        }
+    }
+
+    if ( $mode eq "eof" ) {
+        push @{ $self->{TT}{doc_out} }, $content;
+    } elsif ( $mode eq "before" ) {
+        if ( $self->verbose() > 1 || $self->debug() ) {
+            map {
+                print STDERR wrap_msg( dgettext( "po4a", "Addendum '%s' applied before this line: %s" ), $filename, $_ )
+                  if (/$position/);
+            } @{ $self->{TT}{doc_out} };
+        }
+        @{ $self->{TT}{doc_out} } = map { /$position/ ? ( $content, $_ ) : $_ } @{ $self->{TT}{doc_out} };
+    } else {
+        my @newres = ();
+
+        do {
+            # make sure it doesn't whine on empty document
+            my $line = scalar @{ $self->{TT}{doc_out} } ? shift @{ $self->{TT}{doc_out} } : "";
+            push @newres, $line;
+            my $outline = mychomp($line);
+            $outline =~ s/^[ \t]*//;
+
+            if ( $line =~ m/$position/ ) {
+                while ( $line = shift @{ $self->{TT}{doc_out} } ) {
+                    last if ( $line =~ /$boundary/ );
+                    push @newres, $line;
+                }
+                if ( defined $line ) {
+                    if ( $bmode eq 'before' ) {
+                        print wrap_msg( dgettext( "po4a", "Addendum '%s' applied before this line: %s" ),
+                            $filename, $outline )
+                          if ( $self->verbose() > 1 || $self->debug() );
+                        push @newres, $content;
+                        push @newres, $line;
+                    } else {
+                        print wrap_msg( dgettext( "po4a", "Addendum '%s' applied after the line: %s." ),
+                            $filename, $outline )
+                          if ( $self->verbose() > 1 || $self->debug() );
+                        push @newres, $line;
+                        push @newres, $content;
+                    }
+                } else {
+                    print wrap_msg( dgettext( "po4a", "Addendum '%s' applied at the end of the file." ), $filename )
+                      if ( $self->verbose() > 1 || $self->debug() );
+                    push @newres, $content;
+                }
+            }
+        } while ( scalar @{ $self->{TT}{doc_out} } );
+        @{ $self->{TT}{doc_out} } = @newres;
+    }
+    print STDERR wrap_mod( "po4a::transtractor::addendum", "Done with addendum %s", $filename )
+      if $self->debug();
+    return 1;
+}
+
+=back
+
+=head1 INTERNAL FUNCTIONS used to write derivative parsers
+
+=head2 Getting input, providing output
+
+Four functions are provided to get input and return output. They are very
+similar to shift/unshift and push/pop of Perl.
+
+ * Perl shift returns the first array item and drop it from the array.
+ * Perl unshift prepends an item to the array as the first array item.
+ * Perl pop returns the last array item and drop it from the array.
+ * Perl push appends an item to the array as the last array item.
+
+The first pair is about input, while the second is about output. Mnemonic: in
+input, you are interested in the first line, what shift gives, and in output
+you want to add your result at the end, like push does.
+
+=over 4
+
+=item shiftline()
+
+This function returns the first line to be parsed and its corresponding
+reference (packed as an array) from the array C<< @{$self->{TT}{doc_in}} >> and
+drop these first 2 array items.  Here, the reference is provided by a string
+C<< $filename:$linenum >>.
+
+=item unshiftline($$)
+
+Unshifts the last shifted line of the input document and its corresponding
+reference back to the head of C<< {$self->{TT}{doc_in}} >>.
+
+=item pushline($)
+
+Push a new line to the end of C<< {$self->{TT}{doc_out}} >>.
+
+=item popline()
+
+Pop the last pushed line from the end of C<< {$self->{TT}{doc_out}} >>.
+
+=back
+
+=cut
+
+sub shiftline {
+    my ( $line, $ref ) = ( shift @{ $_[0]->{TT}{doc_in} }, shift @{ $_[0]->{TT}{doc_in} } );
+    return ( $line, $ref );
+}
+
+sub unshiftline {
+    my $self = shift;
+    unshift @{ $self->{TT}{doc_in} }, @_;
+}
+
+sub pushline { push @{ $_[0]->{TT}{doc_out} }, $_[1] if defined $_[1]; }
+sub popline  { return pop @{ $_[0]->{TT}{doc_out} }; }
+
+=head2 Marking strings as translatable
+
+One function is provided to handle the text which should be translated.
+
+=over 4
+
+=item translate($$$)
+
+Mandatory arguments:
+
+=over 2
+
+=item -
+
+A string to translate
+
+=item -
+
+The reference of this string (i.e. position in inputfile)
+
+=item -
+
+The type of this string (i.e. the textual description of its structural role;
+used in Locale::Po4a::Po::gettextization(); see also L<po4a(7)|po4a.7>,
+section B<Gettextization: how does it work?>)
+
+=back
+
+This function can also take some extra arguments. They must be organized as
+a hash. For example:
+
+  $self->translate("string","ref","type",
+                   'wrap' => 1);
+
+=over
+
+=item B<wrap>
+
+boolean indicating whether we can consider that whitespaces in string are
+not important. If yes, the function canonizes the string before looking for
+a translation or extracting it, and wraps the translation.
+
+=item B<wrapcol>
+
+the column at which we should wrap (default: the value of B<wrapcol> specified
+during creation of the TransTractor or 76).
+
+The negative value will be substracted from the default.
+
+=item B<comment>
+
+an extra comment to add to the entry.
+
+=back
+
+Actions:
+
+=over 2
+
+=item -
+
+Pushes the string, reference and type to po_out.
+
+=item -
+
+Returns the translation of the string (as found in po_in) so that the
+parser can build the doc_out.
+
+=item -
+
+Handles the charsets to recode the strings before sending them to
+po_out and before returning the translations.
+
+=back
+
+=back
+
+=cut
+
+sub translate {
+    my $self = shift;
+    my ( $string, $ref, $type ) = ( shift, shift, shift );
+    my (%options) = @_;
+
+    return "" unless length($string);
+
+    # my %validoption;
+    # map { $validoption{$_}=1 } (qw(wrap wrapcoll));
+    # foreach (keys %options) {
+    #        Carp::confess "internal error: translate() called with unknown arg $_. Valid options: $validoption"
+    #            unless $validoption{$_};
+    # }
+
+    if ( !defined $options{'wrapcol'} ) {
+        $options{'wrapcol'} = $self->{TT}{wrapcol}
+    } elsif ( $options{'wrapcol'} < 0 ) {
+        $options{'wrapcol'} = $self->{TT}{wrapcol} + $options{'wrapcol'};
+    }
+    my $transstring = $self->{TT}{po_in}->gettext(
+        $string,
+        'wrap'    => $options{'wrap'} || 0,
+        'wrapcol' => $options{'wrapcol'}
+    );
+
+    # the comments provided by the modules are automatic comments from the PO point of view
+    $self->{TT}{po_out}->push(
+        'msgid'     => $string,
+        'reference' => $ref,
+        'type'      => $type,
+        'automatic' => $options{'comment'},
+        'flags'     => $options{'flags'},
+        'wrap'      => $options{'wrap'} || 0,
+    );
+
+    if ( $options{'wrap'} || 0 ) {
+        $transstring =~ s/( *)$//s;
+        my $trailing_spaces = $1 || "";
+        $transstring =~ s/(?<!\\) +$//gm;
+        $transstring .= $trailing_spaces;
+    }
+
+    return $transstring;
+}
+
+=head2 Misc functions
+
+=over 4
+
+=item verbose()
+
+Returns if the verbose option was passed during the creation of the
+TransTractor.
+
+=cut
+
+sub verbose {
+    if ( defined $_[1] ) {
+        $_[0]->{TT}{verbose} = $_[1];
+    } else {
+        return $_[0]->{TT}{verbose} || 0;    # undef and 0 have the same meaning, but one generates warnings
+    }
+}
+
+=item debug()
+
+Returns if the debug option was passed during the creation of the
+TransTractor.
+
+=cut
+
+sub debug {
+    return $_[0]->{TT}{debug};
+}
+
+=item get_in_charset()
+
+This function return the charset that was provided as master charset
+
+=cut
+
+sub get_in_charset() {
+    return $_[0]->{TT}{'file_in_charset'};
+}
+
+=item get_out_charset()
+
+This function will return the charset that should be used in the output
+document (usually useful to substitute the input document's detected charset
+where it has been found).
+
+It will use the output charset specified in the command line. If it wasn't
+specified, it will use the input PO's charset, and if the input PO has the
+default "CHARSET", it will return the input document's charset, so that no
+encoding is performed.
+
+=cut
+
+sub get_out_charset {
+    my $self = shift;
+
+    # Prefer the value specified on the command line
+    return $self->{TT}{'file_out_charset'}
+      if length( $self->{TT}{'file_out_charset'} );
+
+    return $self->{TT}{po_in}->get_charset if $self->{TT}{po_in}->get_charset ne 'CHARSET';
+
+    return $self->{TT}{'file_in_charset'} if length( $self->{TT}{'file_in_charset'} );
+
+    return 'UTF-8';
+}
+
+# Push the translation of a Yaml document or Yaml Front-Matter header, parsed by YAML::Tiny in any case
+# $is_yfm is a boolean indicating whether we are dealing with a Front Matter (true value) or whole document (false value)
+sub handle_yaml {
+    my ( $self, $is_yfm, $blockref, $yamlarray, $yfm_keys, $yfm_skip_array, $yfm_paths ) = @_;
+
+    die "Empty YAML " . ( $is_yfm ? "Front Matter" : "document" ) unless ( length($yamlarray) > 0 );
+
+    my ( $indent, $ctx ) = ( 0, "" );
+    foreach my $cursor (@$yamlarray) {
+
+        # An empty document
+        if ( !defined $cursor ) {
+            $self->pushline("---\n");
+
+            # Do nothing
+
+            # A scalar document
+        } elsif ( !ref $cursor ) {
+            $self->pushline("---\n");
+            $self->pushline(
+                format_scalar(
+                    $self->translate(
+                        $cursor, $blockref,
+                        "YAML " . ( $is_yfm ? "Front Matter " : "" ) . "(scalar)",
+                        "wrap" => 0
+                    )
+                )
+            );
+
+            # A list at the root
+        } elsif ( ref $cursor eq 'ARRAY' ) {
+            if (@$cursor) {
+                $self->pushline("---\n");
+                do_array( $self, $is_yfm, $blockref, $cursor, $indent, $ctx, $yfm_keys, $yfm_skip_array, $yfm_paths );
+            } else {
+                $self->pushline("---[]\n");
+            }
+
+            # A hash at the root
+        } elsif ( ref $cursor eq 'HASH' ) {
+            if (%$cursor) {
+                $self->pushline("---\n");
+                do_hash( $self, $is_yfm, $blockref, $cursor, $indent, $ctx, $yfm_keys, $yfm_skip_array, $yfm_paths );
+            } else {
+                $self->pushline("--- {}\n");
+            }
+
+        } else {
+            die( "Cannot serialize " . ref($cursor) );
+        }
+    }
+
+    # Escape the string to make it valid in YAML.
+    # This is very similar to YAML::Tiny::_dump_scalar but does not do the internal->UTF-8 decoding,
+    # as the translations that we feed into this function are already in UTF-8
+    sub format_scalar {
+        my $string = $_[0];
+        my $is_key = $_[1];
+
+        return '~'  unless defined $string;
+        return "''" unless length $string;
+        if ( Scalar::Util::looks_like_number($string) ) {
+
+            # keys and values that have been used as strings get quoted
+            if ($is_key) {
+                return qq['$string'];
+            } else {
+                return $string;
+            }
+        }
+        if ( $string =~ /[\\\'\n]/ ) {
+            $string =~ s/\\/\\\\/g;
+            $string =~ s/"/\\"/g;
+            $string =~ s/\n/\\n/g;
+            return qq|"$string"|;
+        }
+        if ( $string =~ /(?:^[~!@#%&*|>?:,'"`{}\[\]]|^-+$|\s|:\z)/ ) {
+            return "'$string'";
+        }
+        return $string;
+    }
+
+    sub do_array {
+        my ( $self, $is_yfm, $blockref, $array, $indent, $ctx, $yfm_keys, $yfm_skip_array, $yfm_paths ) = @_;
+        foreach my $el (@$array) {
+            my $header = ( '  ' x $indent ) . '- ';
+            my $type   = ref $el;
+            if ( !$type ) {
+                if ($yfm_skip_array) {
+                    $self->pushline( $header . YAML::Tiny::_dump_scalar( "dummy", $el, 0 ) . "\n" );
+                } else {
+                    $self->pushline(
+                        $header
+                          . format_scalar(
+                            $self->translate(
+                                $el,                                                            $blockref,
+                                ( $is_yfm ? "Yaml Front Matter " : "" ) . "Array Element:$ctx", "wrap" => 0
+                            )
+                          )
+                          . "\n"
+                    );
+                }
+
+            } elsif ( $type eq 'ARRAY' ) {
+                if (@$el) {
+                    $self->pushline( $header . "\n" );
+                    do_array( $self, $is_yfm, $blockref, $el, $indent + 1,
+                        $ctx, $yfm_keys, $yfm_skip_array, $yfm_paths );
+                } else {
+                    $self->pushline( $header . " []\n" );
+                }
+
+            } elsif ( $type eq 'HASH' ) {
+                if ( keys %$el ) {
+                    $self->pushline( $header . "\n" );
+                    do_hash( $self, $is_yfm, $blockref, $el, $indent + 1, $ctx, $yfm_keys, $yfm_skip_array,
+                        $yfm_paths );
+                } else {
+                    $self->pushline( $header . " {}\n" );
+                }
+
+            } else {
+                die "YAML $type references not supported";
+            }
+        }
+    }
+
+    sub do_hash {
+        my ( $self, $is_yfm, $blockref, $hash, $indent, $ctx, $yfm_keys, $yfm_skip_array, $yfm_paths ) = @_;
+
+        foreach my $name ( sort keys %$hash ) {
+            my $el     = $hash->{$name} // "";
+            my $header = ( '  ' x $indent ) . YAML::Tiny::_dump_scalar( "dummy", $name, 1 ) . ":";
+
+            unless ( length($el) > 0 ) {    # empty element, as in "tags: " with nothing after the column
+                $self->pushline( $header . "\n" );
+                next;
+            }
+
+            my $type = ref $el;
+            if ( !$type ) {
+                my %keys  = %{$yfm_keys};
+                my %paths = %{$yfm_paths};
+                my $path  = "$ctx $name" =~ s/^\s+|\s+$//gr;  # Need to trim the path, at least when there is no ctx yet
+
+                if ( ( $el eq 'false' ) or ( $el eq 'true' ) ) {    # Do not translate nor quote booleans
+                    $self->pushline("$header $el\n");
+                } elsif (
+                    ( scalar %keys > 0  && exists $keys{$name} )  or    # the key we need is provided
+                    ( scalar %paths > 0 && exists $paths{$path} ) or    # that path is provided
+                    ( scalar %keys == 0 && scalar %paths == 0 )         # no key and no path provided
+                  )
+                {
+                    my $translation = $self->translate(
+                        $el, $blockref,
+                        ( $is_yfm ? "Yaml Front Matter " : "" ) . "Hash Value:$ctx $name",
+                        "wrap" => 0
+                    );
+
+                    # add extra quotes to the parameter, as a protection to the extra chars that the translator could add
+                    $self->pushline( $header . ' ' . format_scalar($translation) . "\n" );
+                } else {
+
+                    # Work around a bug in YAML::Tiny that quotes numbers
+                    # See https://github.com/Perl-Toolchain-Gang/YAML-Tiny#additional-perl-specific-notes
+                    if ( Scalar::Util::looks_like_number($el) ) {
+                        $self->pushline("$header $el\n");
+                    } else {
+                        $self->pushline( $header . ' ' . YAML::Tiny::_dump_scalar( "dummy", $el ) . "\n" );
+                    }
+                }
+
+            } elsif ( $type eq 'ARRAY' ) {
+                if (@$el) {
+                    $self->pushline( $header . "\n" );
+                    do_array(
+                        $self,     $is_yfm,         $blockref, $el, $indent + 1, "$ctx $name",
+                        $yfm_keys, $yfm_skip_array, $yfm_paths
+                    );
+                } else {
+                    $self->pushline( $header . " []\n" );
+                }
+
+            } elsif ( $type eq 'HASH' ) {
+                if ( keys %$el ) {
+                    $self->pushline( $header . "\n" );
+                    do_hash(
+                        $self,     $is_yfm,         $blockref, $el, $indent + 1, "$ctx $name",
+                        $yfm_keys, $yfm_skip_array, $yfm_paths
+                    );
+                } else {
+                    $self->pushline( $header . " {}\n" );
+                }
+
+            } else {
+                die "YAML $type references not supported";
+            }
+        }
+    }
+}
+
+=back
+
+=head1 FUTURE DIRECTIONS
+
+One shortcoming of the current TransTractor is that it can't handle
+translated document containing all languages, like debconf templates, or
+.desktop files.
+
+To address this problem, the only interface changes needed are:
+
+=over 2
+
+=item -
+
+take a hash as po_in_name (a list per language)
+
+=item -
+
+add an argument to translate to indicate the target language
+
+=item -
+
+make a pushline_all function, which would make pushline of its content for
+all languages, using a map-like syntax:
+
+    $self->pushline_all({ "Description[".$langcode."]=".
+                          $self->translate($line,$ref,$langcode)
+                        });
+
+=back
+
+Will see if it's enough ;)
+
+=head1 AUTHORS
+
+ Denis Barbier <barbier@linuxfr.org>
+ Martin Quinson (mquinson#debian.org)
+ Jordi Vilalta <jvprat@gmail.com>
+
+=cut
+
+1;