summaryrefslogtreecommitdiffstats
path: root/lib/Locale/Po4a/Po.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Locale/Po4a/Po.pm')
-rw-r--r--lib/Locale/Po4a/Po.pm1618
1 files changed, 1618 insertions, 0 deletions
diff --git a/lib/Locale/Po4a/Po.pm b/lib/Locale/Po4a/Po.pm
new file mode 100644
index 0000000..d87a145
--- /dev/null
+++ b/lib/Locale/Po4a/Po.pm
@@ -0,0 +1,1618 @@
+# Locale::Po4a::Po -- manipulation of PO files
+#
+# This program is free software; you may redistribute it and/or modify it
+# under the terms of GPL v2.0 or later (see COPYING).
+
+############################################################################
+# Modules and declarations
+############################################################################
+
+=encoding UTF-8
+
+=head1 NAME
+
+Locale::Po4a::Po - PO file manipulation module
+
+=head1 SYNOPSIS
+
+ use Locale::Po4a::Po;
+ my $pofile=Locale::Po4a::Po->new();
+
+ # Read PO file
+ $pofile->read('file.po');
+
+ # Add an entry
+ $pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour',
+ 'flags' => "wrap", 'reference'=>'file.c:46');
+
+ # Extract a translation
+ $pofile->gettext("Hello"); # returns 'bonjour'
+
+ # Write back to a file
+ $pofile->write('otherfile.po');
+
+=head1 DESCRIPTION
+
+Locale::Po4a::Po is a module that allows you to manipulate message
+catalogs. You can load and write from/to a file (which extension is often
+I<po>), you can build new entries on the fly or request for the translation
+of a string.
+
+For a more complete description of message catalogs in the PO format and
+their use, please refer to the info documentation of the gettext program (node "`PO Files"').
+
+This module is part of the po4a project, which objective is to use PO files
+(designed at origin to ease the translation of program messages) to
+translate everything, including documentation (man page, info manual),
+package description, debconf templates, and everything which may benefit
+from this.
+
+=head1 OPTIONS ACCEPTED BY THIS MODULE
+
+=over 4
+
+=item B<--porefs> I<type>
+
+Specify the reference format. Argument I<type> can be one of B<never>
+to not produce any reference, B<file> to only specify the file
+without the line number, B<counter> to replace line number by an
+increasing counter, and B<full> to include complete references (default: full).
+
+=item B<--wrap-po> B<no>|B<newlines>|I<number> (default: 76)
+
+Specify how the po file should be wrapped. This gives the choice between either
+files that are nicely wrapped but could lead to git conflicts, or files that are
+easier to handle automatically, but harder to read for humans.
+
+Historically, the gettext suite has reformatted the po files at the 77th column
+for cosmetics. This option specifies the behavior of po4a. If set to a numerical
+value, po4a will wrap the po file after this column and after newlines in the
+content. If set to B<newlines>, po4a will only split the msgid and msgstr after
+newlines in the content. If set to B<no>, po4a will not wrap the po file at all.
+The reference comments are always wrapped by the gettext tools that we use internally.
+
+Note that this option has no impact on how the msgid and msgstr are wrapped, i.e.
+on how newlines are added to the content of these strings.
+
+=item B<--msgid-bugs-address> I<email@address>
+
+Set the report address for msgid bugs. By default, the created POT files
+have no Report-Msgid-Bugs-To fields.
+
+=item B<--copyright-holder> I<string>
+
+Set the copyright holder in the POT header. The default value is
+"Free Software Foundation, Inc."
+
+=item B<--package-name> I<string>
+
+Set the package name for the POT header. The default is "PACKAGE".
+
+=item B<--package-version> I<string>
+
+Set the package version for the POT header. The default is "VERSION".
+
+=back
+
+=cut
+
+use IO::File;
+
+require Exporter;
+
+package Locale::Po4a::Po;
+use DynaLoader;
+
+use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext);
+
+use subs qw(makespace);
+use vars qw(@ISA @EXPORT_OK);
+@ISA = qw(Exporter DynaLoader);
+@EXPORT = qw(%debug);
+@EXPORT_OK = qw(&move_po_if_needed);
+
+use Locale::Po4a::TransTractor;
+
+# Try to use a C extension if present.
+eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION");
+
+use 5.16.0;
+use strict;
+use warnings;
+
+use Carp qw(croak);
+use File::Basename;
+use File::Path; # mkdir before write
+use File::Copy; # move
+use POSIX qw(strftime floor);
+use Time::Local;
+
+use Encode;
+use Config;
+
+my @known_flags = qw(
+ wrap no-wrap fuzzy
+ c-format no-c-format
+ objc-format no-objc-format
+ sh-format no-sh-format
+ python-format no-python-format
+ python-brace-format no-python-brace-format
+ lisp-format no-lisp-format
+ elisp-format no-elisp-format
+ librep-format no-librep-format
+ scheme-format no-scheme-format
+ smalltalk-format no-smalltalk-format
+ java-format no-java-format
+ csharp-format no-csharp-format
+ awk-format no-awk-format
+ object-pascal-format no-object-pascal-format
+ ycp-format no-ycp-format
+ tcl-format no-tcl-format
+ perl-format no-perl-format
+ perl-brace-format no-perl-brace-format
+ php-format no-php-format
+ gcc-internal-format no-gcc-internal-format
+ gfc-internal-format no-gfc-internal-format
+ qt-format no-qt-format
+ qt-plural-format no-qt-plural-format
+ kde-format no-kde-format
+ boost-format no-boost-format
+ lua-format no-lua-format
+ javascript-format no-javascript-format
+);
+
+# Custom flags, used for example by weblate
+push @known_flags, 'markdown-text';
+
+our %debug = (
+ 'canonize' => 0,
+ 'quote' => 0,
+ 'escape' => 0,
+ 'encoding' => 0,
+ 'filter' => 0
+);
+
+=head1 Functions concerning entire message catalogs
+
+=over 4
+
+=item new()
+
+Creates a new message catalog. If an argument is provided, it's the name of
+a PO file we should load.
+
+=cut
+
+sub new {
+ my ( $this, $options ) = ( shift, shift );
+ my $class = ref($this) || $this;
+ my $self = {};
+ bless $self, $class;
+ $self->initialize($options);
+
+ my $filename = shift;
+ $self->read($filename) if length($filename);
+ return $self;
+}
+
+# Return the numerical timezone (e.g. +0200)
+# Neither the %z nor the %s formats of strftime are portable:
+# '%s' is not supported on Solaris and '%z' indicates
+# "2006-10-25 19:36E. Europe Standard Time" on MS Windows.
+sub timezone {
+ my ($time) = @_;
+ my @l = localtime($time);
+
+ my $diff = floor( timegm(@l) / 60 + 0.5 ) - floor( $time / 60 + 0.5 );
+ my $sign = ( $diff >= 0 ? 1 : -1 );
+ $diff = abs($diff);
+
+ my $h = $sign * floor( $diff / 60 );
+ my $m = $diff % 60;
+
+ return sprintf "%+03d%02d\n", $h, $m;
+}
+
+sub initialize {
+ my ( $self, $options ) = ( shift, shift );
+ my $time = time;
+ my $date = strftime( "%Y-%m-%d %H:%M", localtime($time) ) . timezone($time);
+ chomp $date;
+
+ $self->{options}{'porefs'} = 'full';
+ $self->{options}{'msgid-bugs-address'} = undef;
+ $self->{options}{'copyright-holder'} = "Free Software Foundation, Inc.";
+ $self->{options}{'package-name'} = "PACKAGE";
+ $self->{options}{'package-version'} = "VERSION";
+ $self->{options}{'wrap-po'} = 76;
+ $self->{options}{'pot-language'} = "";
+
+ foreach my $opt ( keys %$options ) {
+
+ # print STDERR "$opt: ".(defined($options->{$opt})?$options->{$opt}:"(undef)")."\n";
+ if ( $options->{$opt} ) {
+ die wrap_mod( "po4a::po", dgettext( "po4a", "Unknown option: %s" ), $opt )
+ unless exists $self->{options}{$opt};
+ $self->{options}{$opt} = $options->{$opt};
+ }
+ }
+ $self->{options}{'wrap-po'} =~ /^(no|newlines|\d+)$/
+ || die wrap_mod(
+ "po4a::po",
+ dgettext( "po4a", "Invalid value for option 'wrap-po' ('%s' is not 'no' nor 'newlines' nor a number)" ),
+ $self->{options}{'wrap-po'}
+ );
+
+ $self->{options}{'porefs'} =~ /^(full|counter|noline|file|none|never)?$/
+ || die wrap_mod(
+ "po4a::po",
+ dgettext(
+ "po4a",
+ "Invalid value for option 'porefs' ('%s' is "
+ . "not one of 'full', 'counter', 'noline', 'file' or 'never')"
+ ),
+ $self->{options}{'porefs'}
+ );
+ $self->{options}{'porefs'} =~ s/noline/file/; # backward compat. 'file' used to be called 'noline'.
+ $self->{options}{'porefs'} =~ s/none/never/; # backward compat. 'never' used to be called 'none'.
+ if ( $self->{options}{'porefs'} =~ m/^counter/ ) {
+ $self->{counter} = {};
+ }
+
+ $self->{po} = ();
+ $self->{count} = 0; # number of msgids in the PO
+ # count_doc: number of strings in the document
+ # (duplicate strings counted multiple times)
+ $self->{count_doc} = 0;
+ $self->{gettextize_types} = (); # Type of each msgid found in the doc, in order
+ # We cannot use {$msgid}{'type'} as a type because for duplicate entries, the type is overwritten.
+ # So we have to copy the same info to this separate array, which is accessed through type_doc()
+ $self->{header_comment} =
+ " SOME DESCRIPTIVE TITLE\n"
+ . " Copyright (C) YEAR "
+ . $self->{options}{'copyright-holder'} . "\n"
+ . " This file is distributed under the same license "
+ . "as the "
+ . $self->{options}{'package-name'}
+ . " package.\n"
+ . " FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n" . "\n"
+ . ", fuzzy";
+
+ # $self->header_tag="fuzzy";
+ $self->{header} = escape_text(
+ "Project-Id-Version: "
+ . $self->{options}{'package-name'} . " "
+ . $self->{options}{'package-version'} . "\n"
+ . (
+ ( defined $self->{options}{'msgid-bugs-address'} )
+ ? "Report-Msgid-Bugs-To: " . $self->{options}{'msgid-bugs-address'} . "\n"
+ : ""
+ )
+ . "POT-Creation-Date: $date\n"
+ . "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+ . "Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n"
+ . "Language-Team: LANGUAGE <LL\@li.org>\n"
+ . "Language: "
+ . $self->{options}{'pot-language'} . "\n"
+ . "MIME-Version: 1.0\n"
+ . "Content-Type: text/plain; charset=UTF-8\n"
+ . "Content-Transfer-Encoding: 8bit\n"
+ );
+
+ $self->{footer} = [];
+
+ # To make stats about gettext hits
+ $self->stats_clear();
+}
+
+=item read($)
+
+Reads a PO file (which name is given as argument). Previously existing
+entries in self are not removed, the new ones are added to the end of the
+catalog.
+
+=cut
+
+sub read {
+ my $self = shift;
+ my $filename = shift
+ or croak wrap_mod( "po4a::po", dgettext( "po4a", "Please provide a non-null filename" ) );
+
+ my $charset = shift // 'UTF-8';
+ $charset = 'UTF-8' if $charset eq "CHARSET";
+ warn "Read $filename with encoding: $charset" if $debug{'encoding'};
+
+ my $checkvalidity = shift // 1;
+
+ my $lang = basename($filename);
+ $lang =~ s/\.po$//;
+ $self->{lang} = $lang;
+
+ if ($checkvalidity) { # We sometimes need to read a file even if it may be invalid (eg to test whether it's empty)
+ my $cmd = "msgfmt" . $Config{_exe} . " --check-format --check-domain -o /dev/null \"" . $filename . '"';
+
+ my $locale = $ENV{'LC_ALL'};
+ $ENV{'LC_ALL'} = "C";
+ my $out = qx/$cmd 2>&1/;
+ $ENV{'LC_ALL'} = $locale;
+
+ die wrap_msg( dgettext( "po4a", "Invalid po file %s:\n%s" ), $filename, $out )
+ unless ( $? == 0 );
+ }
+
+ my $fh;
+ if ( $filename eq '-' ) {
+ $fh = *STDIN;
+ } else {
+ open( $fh, "<:encoding($charset)", $filename )
+ or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot read from %s: %s" ), $filename, $! );
+ }
+
+ ## Read paragraphs line-by-line
+ my $pofile = "";
+ while ( defined( my $textline = <$fh> ) ) {
+ $pofile .= $textline;
+ }
+ $pofile =~ s/\r\n/\n/sg; # Reading a DOS-encoded file from Linux (native files are handled in all cases)
+
+ # If we did not get the charset right, reload the file with the right one
+ if ( $pofile =~ /charset=(.*?)[\s\\]/ ) {
+ my $detected_charset = $1;
+
+ if ( $detected_charset ne $charset && uc($detected_charset) ne $charset && uc($detected_charset) ne 'CHARSET' )
+ {
+ warn "Reloading the PO file, changing the charset from '$charset' to '$detected_charset'"
+ if $debug{'encoding'};
+ $self->read( $filename, $detected_charset, $checkvalidity );
+ return;
+ }
+ }
+
+ if ( $pofile =~ m/^\N{BOM}/ ) { # UTF-8 BOM detected
+ croak "BOM detected";
+ croak wrap_msg(
+ dgettext(
+ "po4a",
+ "The file %s starts with a BOM char indicating that its encoding is UTF-8, but you specified %s instead."
+ ),
+ $filename,
+ $charset
+ ) if ( uc($charset) ne 'UTF-8' );
+ $pofile =~ s/^\N{BOM}//;
+ }
+
+ if ( $filename ne '-' ) {
+ close $fh
+ or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot close %s after reading: %s" ), $filename, $! );
+ }
+
+ my $linenum = 0;
+
+ foreach my $msg ( split( /\n\n/, $pofile ) ) {
+ my ( $msgid, $msgstr, $comment, $previous, $automatic, $reference, $flags, $buffer );
+ my ( $msgid_plural, $msgstr_plural );
+ if ( $msg =~ m/^#~/m ) {
+ push( @{ $self->{footer} }, $msg );
+ next;
+ }
+ foreach my $line ( split( /\n/, $msg ) ) {
+ $linenum++;
+ if ( $line =~ /^#\. ?(.*)$/ ) { # Automatic comment
+ $automatic .= ( defined($automatic) ? "\n" : "" ) . $1;
+
+ } elsif ( $line =~ /^#: ?(.*)$/ ) { # reference
+ $reference .= ( defined($reference) ? "\n" : "" ) . $1;
+
+ } elsif ( $line =~ /^#, ?(.*)$/ ) { # flags
+ $flags .= ( defined($flags) ? "\n" : "" ) . $1;
+
+ } elsif ( $line =~ /^#\| ?(.*)$/ ) { # previous translation
+ $previous .= ( defined($previous) ? "\n" : "" ) . ( $1 || "" );
+
+ } elsif ( $line =~ /^#(.*)$/ ) { # Translator comments
+ $comment .= ( defined($comment) ? "\n" : "" ) . ( $1 || "" );
+
+ } elsif ( $line =~ /^msgid (".*")$/ ) { # begin of msgid
+ $buffer = $1;
+
+ } elsif ( $line =~ /^msgid_plural (".*")$/ ) {
+
+ # begin of msgid_plural, end of msgid
+
+ $msgid = $buffer;
+ $buffer = $1;
+
+ } elsif ( $line =~ /^msgstr (".*")$/ ) {
+
+ # begin of msgstr, end of msgid
+
+ $msgid = $buffer;
+ $buffer = "$1";
+
+ } elsif ( $line =~ /^msgstr\[([0-9]+)\] (".*")$/ ) {
+
+ # begin of msgstr[x], end of msgid_plural or msgstr[x-1]
+
+ # Note: po4a cannot uses plural forms
+ # (no integer to use the plural form)
+ # * drop the msgstr[x] where x >= 2
+ # * use msgstr[0] as the translation of msgid
+ # * use msgstr[1] as the translation of msgid_plural
+
+ if ( $1 eq "0" ) {
+ $msgid_plural = $buffer;
+ $buffer = "$2";
+ } elsif ( $1 eq "1" ) {
+ $msgstr = $buffer;
+ $buffer = "$2";
+ } elsif ( $1 eq "2" ) {
+ $msgstr_plural = $buffer;
+ warn wrap_ref_mod( "$filename:$linenum", "po4a::po",
+ dgettext( "po4a", "Messages with more than 2 plural forms are not supported." ) );
+ }
+ } elsif ( $line =~ /^(".*")$/ ) {
+
+ # continuation of a line
+ $buffer .= "\n$1";
+
+ } else {
+ warn wrap_ref_mod( "$filename:$linenum", "po4a::po", dgettext( "po4a", "Parse error at: -->%s<--" ),
+ $line );
+ }
+ }
+ $linenum++;
+ if ( defined $msgid_plural ) {
+ $msgstr_plural = $buffer;
+
+ $msgid = unquote_text($msgid) if ( defined($msgid) );
+ $msgstr = unquote_text($msgstr) if ( defined($msgstr) );
+
+ $self->push_raw(
+ 'msgid' => $msgid,
+ 'msgstr' => $msgstr,
+ 'reference' => $reference,
+ 'flags' => $flags,
+ 'comment' => $comment,
+ 'previous' => $previous,
+ 'automatic' => $automatic,
+ 'plural' => 0
+ );
+
+ $msgid_plural = unquote_text($msgid_plural)
+ if ( defined($msgid_plural) );
+ $msgstr_plural = unquote_text($msgstr_plural)
+ if ( defined($msgstr_plural) );
+
+ $self->push_raw(
+ 'msgid' => $msgid_plural,
+ 'msgstr' => $msgstr_plural,
+ 'reference' => $reference,
+ 'flags' => $flags,
+ 'comment' => $comment,
+ 'previous' => $previous,
+ 'automatic' => $automatic,
+ 'plural' => 1
+ );
+ } else {
+ $msgstr = $buffer;
+
+ $msgid = unquote_text($msgid) if ( defined($msgid) );
+ $msgstr = unquote_text($msgstr) if ( defined($msgstr) );
+
+ $self->push_raw(
+ 'msgid' => $msgid,
+ 'msgstr' => $msgstr,
+ 'reference' => $reference,
+ 'flags' => $flags,
+ 'comment' => $comment,
+ 'previous' => $previous,
+ 'automatic' => $automatic
+ );
+ }
+ }
+}
+
+=item write($)
+
+Writes the current catalog to the given file.
+
+=cut
+
+sub write {
+ my $self = shift;
+ my $filename = shift
+ or croak dgettext( "po4a", "Cannot write to a file without filename" ) . "\n";
+
+ my $fh;
+ if ( $filename eq '-' ) {
+ $fh = \*STDOUT;
+ } else {
+
+ # make sure the directory in which we should write the localized
+ # file exists
+ my $dir = $filename;
+ if ( $dir =~ m|/| ) {
+ $dir =~ s|/[^/]*$||;
+
+ File::Path::mkpath( $dir, 0, 0755 ) # Croaks on error
+ if ( length($dir) && !-e $dir );
+ }
+ open( $fh, '>:encoding(UTF-8)', $filename )
+ or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot write to %s: %s" ), $filename, $! );
+ }
+
+ print $fh "" . format_comment( $self->{header_comment}, "" )
+ if length( $self->{header_comment} );
+
+ # Force the encoding of PO files in UTF-8 on disk, because msgmerge can get messed up when mixing encodings
+ # See https://savannah.gnu.org/bugs/index.php?65104
+ my $header = $self->{header};
+ $header =~ /charset=([^\s\\]*)/i;
+ my $oldcharset = $1 // '';
+ warn sprintf(
+ dgettext(
+ "po4a",
+ "msgmerge suffers some bugs when PO files are not encoded in UTF-8; Recoding %s to UTF-8 (was %s) to circumvent the issue.\n"
+ ),
+ $filename,
+ $oldcharset
+ ) if $oldcharset ne 'UTF-8';
+ $header =~ s/charset=[^\s\\]*/charset=UTF-8/i;
+
+ print $fh "msgid \"\"\n";
+ print $fh "msgstr " . quote_text( $header, $self->{options}{'wrap-po'} ) . "\n\n";
+
+ my $buf_msgstr_plural; # Used to keep the first msgstr of plural forms
+ my $first = 1;
+ foreach my $msgid ( sort { ( $self->{po}{"$a"}{'pos'} ) <=> ( $self->{po}{"$b"}{'pos'} ) } keys %{ $self->{po} } ) {
+ my $output = "";
+
+ if ($first) {
+ $first = 0;
+ } else {
+ $output .= "\n";
+ }
+
+ $output .= format_comment( $self->{po}{$msgid}{'comment'}, "" )
+ if length( $self->{po}{$msgid}{'comment'} );
+ if ( length( $self->{po}{$msgid}{'automatic'} ) ) {
+ foreach my $comment ( split( /\\n/, $self->{po}{$msgid}{'automatic'} ) ) {
+ $output .= format_comment( $comment, ". " );
+ }
+ }
+ $output .= format_comment( $self->{po}{$msgid}{'type'}, ". type: " )
+ if length( $self->{po}{$msgid}{'type'} );
+
+ if ( length( $self->{po}{$msgid}{'reference'} ) ) {
+ my $output_ref = wrap( $self->{po}{$msgid}{'reference'} );
+ $output_ref =~ s/\s+$//mg;
+ $output .= format_comment( $output_ref, ": " );
+ }
+ $output .= "#, " . join( ", ", sort split( /\s+/, $self->{po}{$msgid}{'flags'} ) ) . "\n"
+ if length( $self->{po}{$msgid}{'flags'} );
+ $output .= format_comment( $self->{po}{$msgid}{'previous'}, "| " )
+ if length( $self->{po}{$msgid}{'previous'} );
+
+ if ( exists $self->{po}{$msgid}{'plural'} ) {
+ if ( $self->{po}{$msgid}{'plural'} == 0 ) {
+ $output .= "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n";
+ $buf_msgstr_plural =
+ "msgstr[0] " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n";
+ } elsif ( $self->{po}{$msgid}{'plural'} == 1 ) {
+
+ # TODO: there may be only one plural form
+ $output = "msgid_plural " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n";
+ $output .= $buf_msgstr_plural;
+ $output .=
+ "msgstr[1] " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n";
+ } else {
+ die wrap_msg( dgettext( "po4a", "Cannot write PO files with more than two plural forms." ) );
+ }
+ } else {
+ $output .= "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n";
+ $output .= "msgstr " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n";
+ }
+
+ print $fh $output;
+ }
+ print $fh join( "\n\n", @{ $self->{footer} } ) if scalar @{ $self->{footer} };
+
+ if ( $filename ne '-' ) {
+ close $fh
+ or croak wrap_mod( dgettext( "po4a", "Cannot close %s after writing: %s\n" ), $filename, $! );
+ }
+}
+
+=item write_if_needed($$)
+
+Like write, but if the PO or POT file already exists, the object will be
+written in a temporary file which will be compared with the existing file
+to check if the update is needed (this avoids to change a POT just to
+update a line reference or the POT-Creation-Date field).
+
+=cut
+
+sub move_po_if_needed {
+ my ( $new_po, $old_po, $backup ) = ( shift, shift, shift );
+ my $diff;
+
+ if ( -e $old_po ) {
+ $diff = qx(diff -q -I'^#:' -I'^\"POT-Creation-Date:' -I'^\"PO-Revision-Date:' $old_po $new_po);
+ if ( $diff eq "" ) {
+ unlink $new_po
+ or die wrap_msg( dgettext( "po4a", "Cannot unlink %s: %s." ), $new_po, $! );
+
+ # touch the old PO
+ my ( $atime, $mtime ) = ( time, time );
+ utime $atime, $mtime, $old_po;
+ } else {
+ move $new_po, $old_po
+ or die wrap_msg( dgettext( "po4a", "Cannot move %s to %s: %s." ), $new_po, $old_po, $! );
+ }
+ } else {
+ move $new_po, $old_po
+ or die wrap_msg( dgettext( "po4a", "Cannot move %s to %s: %s." ), $new_po, $old_po, $! );
+ }
+}
+
+sub write_if_needed {
+ my $self = shift;
+ my $filename = shift
+ or croak dgettext( "po4a", "Cannot write to a file without filename" ) . "\n";
+
+ if ( -e $filename ) {
+ my ($tmp_filename);
+ my $basename = basename($filename);
+ ( undef, $tmp_filename ) = File::Temp::tempfile(
+ $basename . "XXXX",
+ DIR => File::Spec->tmpdir(),
+ OPEN => 0,
+ UNLINK => 0
+ );
+ $self->write($tmp_filename);
+ move_po_if_needed( $tmp_filename, $filename );
+ } else {
+ $self->write($filename);
+ }
+}
+
+=item filter($)
+
+This function extracts a catalog from an existing one. Only the entries having
+a reference in the given file will be placed in the resulting catalog.
+
+This function parses its argument, converts it to a Perl function definition,
+evals this definition and filters the fields for which this function returns
+true.
+
+I love Perl sometimes ;)
+
+=cut
+
+sub filter {
+ my $self = shift;
+ our $filter = shift;
+
+ my $res;
+ $res = Locale::Po4a::Po->new();
+
+ # Parse the filter
+ our $code = "sub apply { return ";
+ our $pos = 0;
+ our $length = length $filter;
+
+ # explode chars to parts. How to subscript a string in Perl?
+ our @filter = split( //, $filter );
+
+ sub gloups {
+ my $fmt = shift;
+ my $space = "";
+ for ( 1 .. $pos ) {
+ $space .= ' ';
+ }
+ die wrap_msg("$fmt\n$filter\n$space^ HERE");
+ }
+
+ sub showmethecode {
+ return unless $debug{'filter'};
+ my $fmt = shift;
+ my $space = "";
+ for ( 1 .. $pos ) {
+ $space .= ' ';
+ }
+ print STDERR "$filter\n$space^ $fmt\n"; #"$code\n";
+ }
+
+ # I dream of a lex in perl :-/
+ sub parse_expression {
+ showmethecode("Begin expression")
+ if $debug{'filter'};
+
+ gloups( "Begin of expression expected, got '%s'", $filter[$pos] )
+ unless ( $filter[$pos] eq '(' );
+ $pos++; # pass the '('
+ if ( $filter[$pos] eq '&' ) {
+
+ # AND
+ $pos++;
+ showmethecode("Begin of AND")
+ if $debug{'filter'};
+ $code .= "(";
+ while (1) {
+ gloups("Unfinished AND statement.")
+ if ( $pos == $length );
+ parse_expression();
+ if ( $filter[$pos] eq '(' ) {
+ $code .= " && ";
+ } elsif ( $filter[$pos] eq ')' ) {
+ last; # do not eat that char
+ } else {
+ gloups( "End of AND or begin of sub-expression expected, got '%s'", $filter[$pos] );
+ }
+ }
+ $code .= ")";
+ } elsif ( $filter[$pos] eq '|' ) {
+
+ # OR
+ $pos++;
+ $code .= "(";
+ while (1) {
+ gloups("Unfinished OR statement.")
+ if ( $pos == $length );
+ parse_expression();
+ if ( $filter[$pos] eq '(' ) {
+ $code .= " || ";
+ } elsif ( $filter[$pos] eq ')' ) {
+ last; # do not eat that char
+ } else {
+ gloups( "End of OR or begin of sub-expression expected, got '%s'", $filter[$pos] );
+ }
+ }
+ $code .= ")";
+ } elsif ( $filter[$pos] eq '!' ) {
+
+ # NOT
+ $pos++;
+ $code .= "(!";
+ gloups("Missing sub-expression in NOT statement.")
+ if ( $pos == $length );
+ parse_expression();
+ $code .= ")";
+ } else {
+
+ # must be an equal. Let's get field and argument
+ my ( $field, $arg, $done );
+ $field = substr( $filter, $pos );
+ gloups("EQ statement contains no '=' or invalid field name")
+ unless ( $field =~ /([a-z]*)=/i );
+ $field = lc($1);
+ $pos += ( length $field ) + 1;
+
+ # check that we've got a valid field name,
+ # and the number it referes to
+ # DO NOT CHANGE THE ORDER
+ my @names = qw(msgid msgstr reference flags comment previous automatic);
+ my $fieldpos;
+ for ( $fieldpos = 0 ; $fieldpos < scalar @names && $field ne $names[$fieldpos] ; $fieldpos++ ) { }
+ gloups( "Invalid field name: %s", $field )
+ if $fieldpos == scalar @names; # not found
+
+ # Now, get the argument value. It has to be between quotes,
+ # which can be escaped
+ # We point right on the first char of the argument
+ # (first quote already eaten)
+ my $escaped = 0;
+ my $quoted = 0;
+ if ( $filter[$pos] eq '"' ) {
+ $pos++;
+ $quoted = 1;
+ }
+ showmethecode( ( $quoted ? "Quoted" : "Unquoted" ) . " argument of field '$field'" )
+ if $debug{'filter'};
+
+ while ( !$done ) {
+ gloups("Unfinished EQ argument.")
+ if ( $pos == $length );
+
+ if ($quoted) {
+ if ( $filter[$pos] eq '\\' ) {
+ if ($escaped) {
+ $arg .= '\\';
+ $escaped = 0;
+ } else {
+ $escaped = 1;
+ }
+ } elsif ($escaped) {
+ if ( $filter[$pos] eq '"' ) {
+ $arg .= '"';
+ $escaped = 0;
+ } else {
+ gloups( "Invalid escape sequence in argument: '\\%s'", $filter[$pos] );
+ }
+ } else {
+ if ( $filter[$pos] eq '"' ) {
+ $done = 1;
+ } else {
+ $arg .= $filter[$pos];
+ }
+ }
+ } else {
+ if ( $filter[$pos] eq ')' ) {
+
+ # counter the next ++ since we don't want to eat
+ # this char
+ $pos--;
+ $done = 1;
+ } else {
+ $arg .= $filter[$pos];
+ }
+ }
+ $pos++;
+ }
+
+ # and now, add the code to check this equality
+ $code .= "(\$_[$fieldpos] =~ m{$arg})";
+
+ }
+ showmethecode("End of expression")
+ if $debug{'filter'};
+ gloups("Unfinished statement.")
+ if ( $pos == $length );
+ gloups( "End of expression expected, got '%s'", $filter[$pos] )
+ unless ( $filter[$pos] eq ')' );
+ $pos++;
+ }
+
+ # And now, launch the beast, finish the function and use eval
+ # to construct this function.
+ # Ok, the lack of lexer is a fair price for the eval ;)
+ parse_expression();
+ gloups("Garbage at the end of the expression")
+ if ( $pos != $length );
+ $code .= "; }";
+ print STDERR "CODE = $code\n"
+ if $debug{'filter'};
+ eval $code;
+ die wrap_mod( "po4a::po", dgettext( "po4a", "Evaluating the provided filter failed: %s" ), $@ )
+ if $@;
+
+ for ( my $cpt = (0) ; $cpt < $self->count_entries() ; $cpt++ ) {
+
+ my ( $msgid, $ref, $msgstr, $flags, $type, $comment, $previous, $automatic );
+
+ $msgid = $self->msgid($cpt);
+ $ref = $self->{po}{$msgid}{'reference'};
+
+ $msgstr = $self->{po}{$msgid}{'msgstr'};
+ $flags = $self->{po}{$msgid}{'flags'};
+ $type = $self->{po}{$msgid}{'type'};
+ $comment = $self->{po}{$msgid}{'comment'};
+ $previous = $self->{po}{$msgid}{'previous'};
+ $automatic = $self->{po}{$msgid}{'automatic'};
+
+ # DO NOT CHANGE THE ORDER
+ $res->push_raw(
+ 'msgid' => $msgid,
+ 'msgstr' => $msgstr,
+ 'flags' => $flags,
+ 'type' => $type,
+ 'reference' => $ref,
+ 'comment' => $comment,
+ 'previous' => $previous,
+ 'automatic' => $automatic
+ ) if ( apply( $msgid, $msgstr, $ref, $flags, $comment, $previous, $automatic ) );
+ }
+
+ # delete the apply subroutine
+ # otherwise it will be redefined.
+ undef &apply;
+ return $res;
+}
+
+=back
+
+=head1 Functions to use a message catalog for translations
+
+=over 4
+
+=item gettext($%)
+
+Request the translation of the string given as argument in the current catalog.
+The function returns the original (untranslated) string if the string was not
+found.
+
+After the string to translate, you can pass a hash of extra
+arguments. Here are the valid entries:
+
+=over
+
+=item B<wrap>
+
+boolean indicating whether we can consider that whitespaces in string are
+not important. If yes, the function canonizes the string before looking for
+a translation, and wraps the result.
+
+=item B<wrapcol>
+
+the column at which we should wrap (default: 76).
+
+=back
+
+=cut
+
+sub gettext {
+ my $self = shift;
+ my $text = shift;
+ my (%opt) = @_;
+ my $res;
+
+ return "" unless length($text); # Avoid returning the header.
+ my $validoption = "reference wrap wrapcol";
+ my %validoption;
+
+ map { $validoption{$_} = 1 } ( split( / /, $validoption ) );
+ foreach ( keys %opt ) {
+ Carp::confess "internal error: unknown arg $_.\n" . "Here are the valid options: $validoption.\n"
+ unless $validoption{$_};
+ }
+
+ $text = canonize($text)
+ if ( $opt{'wrap'} );
+
+ my $esc_text = escape_text($text);
+
+ $self->{gettextqueries}++;
+
+ if (
+ defined $self->{po}{$esc_text}
+ and defined $self->{po}{$esc_text}{'msgstr'}
+ and length $self->{po}{$esc_text}{'msgstr'}
+ and ( not defined $self->{po}{$esc_text}{'flags'}
+ or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/ )
+ )
+ {
+
+ $self->{gettexthits}++;
+ $res = unescape_text( $self->{po}{$esc_text}{'msgstr'} );
+ if ( defined $self->{po}{$esc_text}{'plural'} ) {
+ if ( $self->{po}{$esc_text}{'plural'} eq "0" ) {
+ warn wrap_mod(
+ "po4a gettextize",
+ dgettext(
+ "po4a",
+ "'%s' is the singular form of a message, " . "po4a will use the msgstr[0] translation (%s)."
+ ),
+ $esc_text,
+ $res
+ );
+ } else {
+ warn wrap_mod(
+ "po4a gettextize",
+ dgettext(
+ "po4a",
+ "'%s' is the plural form of a message, " . "po4a will use the msgstr[1] translation (%s)."
+ ),
+ $esc_text,
+ $res
+ );
+ }
+ }
+ } else {
+ $res = $text;
+ }
+
+ if ( $opt{'wrap'} ) {
+ $res = wrap( $res, $opt{'wrapcol'} || 76, 0 );
+ }
+
+ # print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n";
+ return $res;
+}
+
+=item stats_get()
+
+Returns statistics about the hit ratio of gettext since the last time that
+stats_clear() was called. Please note that it's not the same
+statistics than the one printed by msgfmt --statistic. Here, it's statistics
+about recent usage of the PO file, while msgfmt reports the status of the
+file. Example of use:
+
+ [some use of the PO file to translate stuff]
+
+ ($percent,$hit,$queries) = $pofile->stats_get();
+ print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n";
+
+=cut
+
+sub stats_get() {
+ my $self = shift;
+ my ( $h, $q ) = ( $self->{gettexthits}, $self->{gettextqueries} );
+ my $p = ( $q == 0 ? 100 : int( $h / $q * 10000 ) / 100 );
+
+ # $p =~ s/\.00//;
+ # $p =~ s/(\..)0/$1/;
+
+ return ( $p, $h, $q );
+}
+
+=item stats_clear()
+
+Clears the statistics about gettext hits.
+
+=cut
+
+sub stats_clear {
+ my $self = shift;
+ $self->{gettextqueries} = 0;
+ $self->{gettexthits} = 0;
+}
+
+=back
+
+=head1 Functions to build a message catalog
+
+=over 4
+
+=item push(%)
+
+Push a new entry at the end of the current catalog. The arguments should
+form a hash table. The valid keys are:
+
+=over 4
+
+=item B<msgid>
+
+the string in original language.
+
+=item B<msgstr>
+
+the translation.
+
+=item B<reference>
+
+an indication of where this string was found. Example: file.c:46 (meaning
+in 'file.c' at line 46). It can be a space-separated list in case of
+multiple occurrences.
+
+=item B<comment>
+
+a comment added here manually (by the translators). The format here is free.
+
+=item B<automatic>
+
+a comment which was automatically added by the string extraction
+program. See the B<--add-comments> option of the B<xgettext> program for
+more information.
+
+=item B<flags>
+
+space-separated list of all defined flags for this entry.
+
+Valid flags are: B<c-text>, B<python-text>, B<lisp-text>, B<elisp-text>, B<librep-text>,
+B<smalltalk-text>, B<java-text>, B<awk-text>, B<object-pascal-text>, B<ycp-text>,
+B<tcl-text>, B<wrap>, B<no-wrap> and B<fuzzy>.
+
+See the gettext documentation for their meaning.
+
+=item B<type>
+
+this is mostly an internal argument: it is used while gettextizing
+documents. The idea here is to parse both the original and the translation
+into a PO object, and merge them, using one's msgid as msgid and the
+other's msgid as msgstr. To make sure that things get ok, each msgid in PO
+objects are given a type, based on their structure (like "chapt", "sect1",
+"p" and so on in DocBook). If the types of strings are not the same, that
+means that both files do not share the same structure, and the process
+reports an error.
+
+This information is written as automatic comment in the PO file since this
+gives to translators some context about the strings to translate.
+
+=item B<wrap>
+
+boolean indicating whether whitespaces can be mangled in cosmetic
+reformattings. If true, the string is canonized before use.
+
+This information is written to the PO file using the B<wrap> or B<no-wrap> flag.
+
+=item B<wrapcol>
+
+ignored; the key is kept for backward computability.
+
+=back
+
+=cut
+
+sub push {
+ my $self = shift;
+ my %entry = @_;
+
+ my $validoption = "wrap wrapcol type msgid msgstr automatic previous flags reference";
+ my %validoption;
+
+ map { $validoption{$_} = 1 } ( split( / /, $validoption ) );
+ foreach ( keys %entry ) {
+ Carp::confess "internal error: unknown arg $_.\n" . "Here are the valid options: $validoption.\n"
+ unless $validoption{$_};
+ }
+
+ unless ( $entry{'wrap'} ) {
+ $entry{'flags'} .= " no-wrap";
+ }
+ if ( defined( $entry{'msgid'} ) ) {
+ $entry{'msgid'} = canonize( $entry{'msgid'} )
+ if ( $entry{'wrap'} );
+
+ $entry{'msgid'} = escape_text( $entry{'msgid'} );
+ }
+ if ( defined( $entry{'msgstr'} ) ) {
+ $entry{'msgstr'} = canonize( $entry{'msgstr'} )
+ if ( $entry{'wrap'} );
+
+ $entry{'msgstr'} = escape_text( $entry{'msgstr'} );
+ }
+
+ $self->push_raw(%entry);
+}
+
+# The same as push(), but assuming that msgid and msgstr are already escaped
+sub push_raw {
+ my $self = shift;
+ my %entry = @_;
+ my ( $msgid, $msgstr, $reference, $comment, $automatic, $previous, $flags, $type, $transref ) = (
+ $entry{'msgid'}, $entry{'msgstr'}, $entry{'reference'}, $entry{'comment'}, $entry{'automatic'},
+ $entry{'previous'}, $entry{'flags'}, $entry{'type'}, $entry{'transref'}
+ );
+ my $keep_conflict = $entry{'conflict'};
+
+ # print STDERR "Push_raw\n";
+ # print STDERR " msgid=>>>$msgid<<<\n" if $msgid;
+ # print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr;
+ # Carp::cluck " flags=$flags\n" if $flags;
+
+ return unless defined( $entry{'msgid'} );
+
+ # no msgid => header definition
+ unless ( length( $entry{'msgid'} ) ) {
+
+ # if (defined($self->{header}) && $self->{header} =~ /\S/) {
+ # warn dgettext("po4a","Redefinition of the header. ".
+ # "The old one will be discarded\n");
+ # } FIXME: do that iff the header isn't the default one.
+ $self->{header} = $msgstr;
+ $self->{header_comment} = $comment;
+ return;
+ }
+
+ if ( $self->{options}{'porefs'} =~ m/^never/ ) {
+ $reference = "";
+ } elsif ( $self->{options}{'porefs'} =~ m/^counter/ ) {
+ if ( $reference =~ m/^(.+?)(?=\S+:\d+)/g ) {
+ my $new_ref = $1;
+ 1 while $reference =~ s{ # x modifier is added to add formatting and improve readability
+ \G(\s*)(\S+):\d+ # \G is the last match in m//g (see also the (?=) syntax above)
+ # $2 is the file name
+ }{
+ $self->{counter}{$2} ||= 0, # each file has its own counter
+ ++$self->{counter}{$2}, # increment it
+ $new_ref .= "$1$2:".$self->{counter}{$2} # replace line number by this counter
+ }gex && pos($reference);
+ $reference = $new_ref;
+ }
+ } elsif ( $self->{options}{'porefs'} =~ m/^file/ ) {
+ $reference =~ s/:\d+//g;
+ }
+
+ if ( defined( $self->{po}{$msgid} ) ) {
+ warn wrap_mod( "po4a::po", dgettext( "po4a", "msgid defined twice: %s" ), $msgid )
+ if (0); # FIXME: put a verbose stuff
+ if ( defined $msgstr
+ and defined $self->{po}{$msgid}{'msgstr'}
+ and $self->{po}{$msgid}{'msgstr'} ne $msgstr )
+ {
+ my $txt = quote_text( $msgid, $self->{options}{'wrap-po'} );
+ my ( $first, $second ) = (
+ format_comment( ". ", $self->{po}{$msgid}{'reference'} )
+ . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ),
+
+ format_comment( ". ", $reference ) . quote_text($msgstr), $self->{options}{'wrap-po'}
+ );
+
+ if ($keep_conflict) {
+ if ( $self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s ) {
+ $msgstr =
+ $self->{po}{$msgid}{'msgstr'} . "\\n#-#-#-#-# $transref (type: $type) #-#-#-#-#\\n" . $msgstr;
+ } else {
+ $msgstr =
+ "#-#-#-#-# "
+ . $self->{po}{$msgid}{'transref'}
+ . " (type "
+ . $self->{po}{$msgid}{'type'}
+ . ") #-#-#-#-#\\n"
+ . $self->{po}{$msgid}{'msgstr'} . "\\n"
+ . "#-#-#-#-# $transref (type: $type) #-#-#-#-#\\n"
+ . $msgstr;
+ }
+
+ # Every msgid will have the same list of references.
+ # Only keep the last list.
+ $self->{po}{$msgid}{'reference'} = "";
+ } else {
+ warn wrap_msg(
+ dgettext(
+ "po4a",
+ "Translations don't match for:\n" . "%s\n"
+ . "-->First translation:\n" . "%s\n"
+ . " Second translation:\n" . "%s\n"
+ . " Old translation discarded."
+ ),
+ $txt, $first, $second
+ );
+ }
+ }
+ }
+ if ( defined $transref ) {
+ $self->{po}{$msgid}{'transref'} = $transref;
+ }
+ if ( length($reference) ) {
+ if ( defined $self->{po}{$msgid}{'reference'} ) {
+
+ # Only add the new reference if it's not already included in the existing string
+ # It'd be much easier if $self->{po}{$msgid}{'reference'} were an array instead of a joined string...
+ my $oldref = $self->{po}{$msgid}{'reference'};
+ $self->{po}{$msgid}{'reference'} .= " " . $reference
+ unless ( ( $oldref =~ m/ $reference / )
+ || ( $oldref =~ m/ $reference$/ )
+ || ( $oldref =~ m/^$reference$/ )
+ || ( $oldref =~ m/^$reference / ) );
+ } else {
+ $self->{po}{$msgid}{'reference'} = $reference;
+ }
+ }
+ $self->{po}{$msgid}{'msgstr'} = $msgstr;
+ $self->{po}{$msgid}{'comment'} = $comment;
+ $self->{po}{$msgid}{'automatic'} = $automatic;
+ $self->{po}{$msgid}{'previous'} = $previous;
+
+ $self->{po}{$msgid}{pos_doc} = () unless ( defined( $self->{po}{$msgid}{pos_doc} ) );
+ CORE::push( @{ $self->{po}{$msgid}{pos_doc} }, $self->{count_doc}++ );
+ CORE::push( @{ $self->{gettextize_types} }, $type );
+
+ unless ( defined( $self->{po}{$msgid}{'pos'} ) ) {
+ $self->{po}{$msgid}{'pos'} = $self->{count}++;
+ }
+ $self->{po}{$msgid}{'type'} = $type;
+ $self->{po}{$msgid}{'plural'} = $entry{'plural'}
+ if defined $entry{'plural'};
+
+ if ( defined($flags) ) {
+ $flags = " $flags ";
+ $flags =~ s/,/ /g;
+ foreach my $flag (@known_flags) {
+ if ( index( $flags, " $flag " ) != -1 ) { # if flag to be set
+ unless ( defined( $self->{po}{$msgid}{'flags'} )
+ && $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/ )
+ {
+ # flag not already set
+ if ( defined $self->{po}{$msgid}{'flags'} ) {
+ $self->{po}{$msgid}{'flags'} .= " " . $flag;
+ } else {
+ $self->{po}{$msgid}{'flags'} = $flag;
+ }
+ }
+ }
+ }
+ }
+
+ # print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n";
+
+}
+
+=back
+
+=head1 Miscellaneous functions
+
+=over 4
+
+=item count_entries()
+
+Returns the number of entries in the catalog (without the header).
+
+=cut
+
+sub count_entries($) {
+ my $self = shift;
+ return $self->{count};
+}
+
+=item count_entries_doc()
+
+Returns the number of entries in document. If a string appears multiple times
+in the document, it will be counted multiple times.
+
+=cut
+
+sub count_entries_doc($) {
+ my $self = shift;
+ return $self->{count_doc};
+}
+
+=item msgid($)
+
+Returns the msgid of the given number.
+
+=cut
+
+sub msgid($$) {
+ my $self = shift;
+ my $num = shift;
+
+ foreach my $msgid ( keys %{ $self->{po} } ) {
+ return $msgid if ( $self->{po}{$msgid}{'pos'} eq $num );
+ }
+ return undef;
+}
+
+=item msgid_doc($)
+
+Returns the msgid with the given position in the document.
+
+=cut
+
+sub msgid_doc($$) {
+ my $self = shift;
+ my $num = shift;
+
+ foreach my $msgid ( keys %{ $self->{po} } ) {
+ foreach my $pos ( @{ $self->{po}{$msgid}{'pos_doc'} } ) {
+ return $msgid if ( $pos eq $num );
+ }
+ }
+ return undef;
+}
+
+=item type_doc($)
+
+Returns the type of the msgid with the given position in the document. This is
+probably only useful to gettextization, and it's stored separately from
+{$msgid}{'type'} because the later location may be overwritten by another type
+when the $msgid is duplicated in the master document.
+
+=cut
+
+sub type_doc($$) {
+ my $self = shift;
+ my $num = shift;
+
+ return ${ $self->{gettextize_types} }[$num];
+}
+
+=item get_charset()
+
+Returns the character set specified in the PO header. If it hasn't been
+set, it will return "UTF-8".
+
+=cut
+
+sub get_charset() {
+ my $self = shift;
+
+ $self->{header} =~ /charset=(.*?)[\s\\]/;
+
+ if ( defined $1 ) {
+ return $1;
+ } else {
+ return "UTF-8";
+ }
+}
+
+#----[ helper functions ]---------------------------------------------------
+
+# transforme the string from its PO file representation to the form which
+# should be used to print it
+sub unescape_text {
+ my $text = shift;
+
+ print STDERR "\nunescape [$text]====" if $debug{'escape'};
+ $text = join( "", split( /\n/, $text ) );
+ $text =~ s/\\"/"/g;
+
+ # unescape newlines
+ # NOTE on \G:
+ # The following regular expression introduce newlines.
+ # Thus, ^ doesn't match all beginnings of lines.
+ # \G is a zero-width assertion that matches the position
+ # of the previous substitution with s///g. As every
+ # substitution ends by a newline, it always matches a
+ # position just after a newline.
+ $text =~ s/( # $1:
+ (\G|[^\\]) # beginning of the line or any char
+ # different from '\'
+ (\\\\)* # followed by any even number of '\'
+ )\\n # and followed by an escaped newline
+ /$1\n/sgx; # single string, match globally, allow comments
+ # unescape carriage returns
+ $text =~ s/( # $1:
+ (\G|[^\\]) # beginning of the line or any char
+ # different from '\'
+ (\\\\)* # followed by any even number of '\'
+ )\\r # and followed by an escaped carriage return
+ /$1\r/sgx; # single string, match globally, allow comments
+ # unescape tabulations
+ $text =~ s/( # $1:
+ (\G|[^\\])# beginning of the line or any char
+ # different from '\'
+ (\\\\)* # followed by any even number of '\'
+ )\\t # and followed by an escaped tabulation
+ /$1\t/mgx; # multilines string, match globally, allow comments
+ # and unescape the escape character
+ $text =~ s/\\\\/\\/g;
+ print STDERR ">$text<\n" if $debug{'escape'};
+
+ return $text;
+}
+
+# transform the string to its representation as it should be written in PO
+# files
+sub escape_text {
+ my $text = shift;
+
+ print STDERR "\nescape [$text]====" if $debug{'escape'};
+ $text =~ s/\\/\\\\/g;
+ $text =~ s/"/\\"/g;
+ $text =~ s/\n/\\n/g;
+ $text =~ s/\r/\\r/g;
+ $text =~ s/\t/\\t/g;
+ print STDERR ">$text<\n" if $debug{'escape'};
+
+ return $text;
+}
+
+# put quotes around the string on each lines (without escaping it)
+# It does also normalize the text (ie, make sure its representation is wrapped
+# on the 80th char, but without changing the meaning of the string)
+sub quote_text {
+ my $string = shift;
+ my $do_wrap = shift // 'no'; # either 'no' or 'newlines', or column at which we should wrap
+
+ return '""' unless length($string);
+
+ return "\"$string\"" if ( $do_wrap eq 'no' );
+
+ print STDERR "\nquote $do_wrap [$string]====" if $debug{'quote'};
+
+ # break lines on newlines, if any
+ # see unescape_text for an explanation on \G
+ $string =~ s/( # $1:
+ (\G|[^\\]) # beginning of the line or any char
+ # different from '\'
+ (\\\\)* # followed by any even number of '\'
+ \\n) # and followed by an escaped newline
+ /$1\n/sgx; # single string, match globally, allow comments
+
+ $string = wrap( $string, $do_wrap ) if ( $do_wrap ne 'newlines' );
+ my @string = split( /\n/, $string );
+ $string = join( "\"\n\"", @string );
+ $string = "\"$string\"";
+ if ( scalar @string > 1 && $string[0] ne '' ) {
+ $string = "\"\"\n" . $string;
+ }
+
+ print STDERR ">$string<\n" if $debug{'quote'};
+ return $string;
+}
+
+# undo the work of the quote_text function
+sub unquote_text {
+ my $string = shift;
+ print STDERR "\nunquote [$string]====" if $debug{'quote'};
+ $string =~ s/^""\\n//s;
+ $string =~ s/^"(.*)"$/$1/s;
+ $string =~ s/"\n"//gm;
+
+ # Note: an even number of '\' could precede \\n, but I could not build a
+ # document to test this
+ $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm;
+ $string =~ s|!!DUMMYPOPM!!|\\n|gm;
+ print STDERR ">$string<\n" if $debug{'quote'};
+ return $string;
+}
+
+# canonize the string: write it on only one line, changing consecutive
+# whitespace to only one space.
+# Warning, it changes the string and should only be called if the string is
+# plain text
+sub canonize {
+ my $text = shift;
+ print STDERR "\ncanonize [$text]====" if $debug{'canonize'};
+ $text =~ s/^ *//s;
+ $text =~ s/^[ \t]+/ /gm;
+
+ # if ($text eq "\n"), it messed up the first string (header)
+ $text =~ s/\n/ /gm if ( $text ne "\n" );
+ $text =~ s/([.)]) +/$1 /gm;
+ $text =~ s/([^.)]) */$1 /gm;
+ $text =~ s/ *$//s;
+ print STDERR ">$text<\n" if $debug{'canonize'};
+ return $text;
+}
+
+
+# Wraps the string. We don't use Text::Wrap since it mangles whitespace at the
+# end of the split line.
+#
+# Mandatory arguments:
+# - A string to wrap. May content line breaks, in such case each line will be
+# wrapped separately.
+# Optional arguments:
+# - A column to wrap on. Default: 76.
+# - The extra length allowed for the first line. Default: -10 (which means it
+# will be wrapped 10 characters shorter).
+sub wrap {
+ my $text = shift;
+ return "0" if ( $text eq '0' );
+ my $col = shift || 76;
+ my $first_shift = shift || -10;
+ my @lines = split( /\n/, "$text" );
+ my $res = "";
+
+ while ( defined( my $line = shift @lines ) ) {
+ if ( $first_shift != 0 && length($line) > $col + $first_shift ) {
+ unshift @lines, $line;
+ $first_shift = 0;
+ next;
+ }
+ if ( length($line) > $col ) {
+ my $pos = rindex( $line, " ", $col );
+ while ( substr( $line, $pos - 1, 1 ) eq '.' && $pos != -1 ) {
+ $pos = rindex( $line, " ", $pos - 1 );
+ }
+ if ( $pos == -1 ) {
+
+ # There are no spaces in the first $col chars, pick-up the
+ # first space
+ $pos = index( $line, " " );
+ }
+ if ( $pos != -1 ) {
+ my $end = substr( $line, $pos + 1 );
+ $line = substr( $line, 0, $pos + 1 );
+ if ( $end =~ s/^( +)// ) {
+ $line .= $1;
+ }
+ unshift @lines, $end;
+ }
+ }
+ $first_shift = 0;
+ $res .= "$line\n";
+ }
+
+ # Restore the original trailing spaces
+ $res =~ s/\s+$//s;
+ if ( $text =~ m/(\s+)$/s ) {
+ $res .= $1;
+ }
+ return $res;
+}
+
+# outputs properly a '# ... ' line to be put in the PO file
+sub format_comment {
+ my $comment = shift;
+ my $char = shift;
+ my $result = "#" . $char . $comment;
+ $result =~ s/\n/\n#$char/gs;
+ $result =~ s/^#$char$/#/gm;
+ $result .= "\n";
+ return $result;
+}
+
+1;
+__END__
+
+=back
+
+=head1 AUTHORS
+
+ Denis Barbier <barbier@linuxfr.org>
+ Martin Quinson (mquinson#debian.org)
+
+=cut