summaryrefslogtreecommitdiffstats
path: root/lib/Locale
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Locale')
-rw-r--r--lib/Locale/Po4a/Common.pm4
-rw-r--r--lib/Locale/Po4a/Po.pm6
-rw-r--r--lib/Locale/Po4a/Pod.pm30
-rw-r--r--lib/Locale/Po4a/TransTractor.pm71
-rw-r--r--lib/Locale/Po4a/Xml.pm37
5 files changed, 106 insertions, 42 deletions
diff --git a/lib/Locale/Po4a/Common.pm b/lib/Locale/Po4a/Common.pm
index 1b09adc..9a9f069 100644
--- a/lib/Locale/Po4a/Common.pm
+++ b/lib/Locale/Po4a/Common.pm
@@ -224,7 +224,9 @@ BEGIN {
if ( eval { require Locale::gettext } ) {
import Locale::gettext;
require POSIX;
- POSIX::setlocale( &POSIX::LC_MESSAGES, '' );
+
+ # This cannot be done on Windows
+ POSIX::setlocale( &POSIX::LC_MESSAGES, '' ) unless $^O eq 'MSWin32';
} else {
eval '
sub bindtextdomain($$) { }
diff --git a/lib/Locale/Po4a/Po.pm b/lib/Locale/Po4a/Po.pm
index d87a145..e5d0253 100644
--- a/lib/Locale/Po4a/Po.pm
+++ b/lib/Locale/Po4a/Po.pm
@@ -541,6 +541,9 @@ sub write {
or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot write to %s: %s" ), $filename, $! );
}
+ # Some old perl versions qwak when the encoding is only set to utf. We need to first reset it to raw before setting utf8 again. Not sure why it's so.
+ binmode( $fh, ':raw' );
+ binmode( $fh, ':utf8' );
print $fh "" . format_comment( $self->{header_comment}, "" )
if length( $self->{header_comment} );
@@ -1537,7 +1540,6 @@ sub canonize {
return $text;
}
-
# Wraps the string. We don't use Text::Wrap since it mangles whitespace at the
# end of the split line.
#
@@ -1549,7 +1551,7 @@ sub canonize {
# - The extra length allowed for the first line. Default: -10 (which means it
# will be wrapped 10 characters shorter).
sub wrap {
- my $text = shift;
+ my $text = shift;
return "0" if ( $text eq '0' );
my $col = shift || 76;
my $first_shift = shift || -10;
diff --git a/lib/Locale/Po4a/Pod.pm b/lib/Locale/Po4a/Pod.pm
index 4b3dc04..1933ae3 100644
--- a/lib/Locale/Po4a/Pod.pm
+++ b/lib/Locale/Po4a/Pod.pm
@@ -27,6 +27,8 @@ use vars qw(@ISA);
use Carp qw(croak confess);
+use Locale::Po4a::Common;
+
sub initialize { }
sub translate {
@@ -84,16 +86,24 @@ sub command {
$charset =~ s/^\s*(.*?)\s*$/$1/s;
my $master_charset = $self->get_in_charset;
- croak wrap_mod(
- "po4a::pod",
- dgettext(
- "po4a",
- "The file %s declares %s as encoding, but you provided %s as master charset. Please change either setting."
- ),
- $self->{DOCPOD}{refname},
- $charset,
- $master_charset,
- ) if ( length( $master_charset // '' ) > 0 && uc($charset) ne uc($master_charset) );
+
+ # in POD at least, there is no difference between utf8 and UTF-8. The major POD parsers handle "both encodings" in the exact same way.
+ # Despite https://perldoc.perl.org/Encode#UTF-8-vs.-utf8-vs.-UTF8
+ $master_charset = 'UTF-8' if $master_charset // '' =~ /utf-?8/i;
+ $charset = 'UTF-8' if $charset =~ /utf-?8/i;
+
+ if ( length( $master_charset // '' ) > 0 && uc($charset) ne uc($master_charset) ) {
+ croak wrap_mod(
+ "po4a::pod",
+ dgettext(
+ "po4a",
+ "The file %s declares %s as encoding, but you provided %s as master charset. Please change either setting."
+ ),
+ $self->{DOCPOD}{refname},
+ $charset,
+ $master_charset,
+ );
+ }
# The =encoding line will be added by docheader
} else {
diff --git a/lib/Locale/Po4a/TransTractor.pm b/lib/Locale/Po4a/TransTractor.pm
index 0bc5b8f..f943ba1 100644
--- a/lib/Locale/Po4a/TransTractor.pm
+++ b/lib/Locale/Po4a/TransTractor.pm
@@ -11,7 +11,7 @@ use warnings;
use subs qw(makespace);
use vars qw($VERSION @ISA @EXPORT);
-$VERSION = "0.72";
+$VERSION = "0.73";
@ISA = qw(DynaLoader);
@EXPORT = qw(new process translate
read write readpo writepo
@@ -403,7 +403,7 @@ sub new {
$self->{TT}{debug} = $options{'debug'};
}
if ( defined $options{'wrapcol'} ) {
- if ( $options{'wrapcol'} < 0) {
+ if ( $options{'wrapcol'} < 0 ) {
$self->{TT}{wrapcol} = 'Inf';
} else {
$self->{TT}{wrapcol} = $options{'wrapcol'};
@@ -421,11 +421,14 @@ sub new {
=over 4
-=item read($$)
+=item read($$$)
-Add another input document data at the end of the existing array
-C<< @{$self->{TT}{doc_in}} >>. The argument is the filename to read. If a second
-argument is provided, it is the filename to use in the references.
+Add another input document data at the end of the existing array C<< @{$self->{TT}{doc_in}} >>.
+
+This function takes two mandatory arguments and an optional one.
+ * The filename to read on disk;
+ * The name to use as filename when building the reference in the PO file;
+ * The charset to use to read that file (UTF-8 by default)
This array C<< @{$self->{TT}{doc_in}} >> holds this input document data as an
array of strings with alternating meanings.
@@ -440,8 +443,8 @@ function when you're done with packing input files into the document.
sub read() {
my $self = shift;
- my $filename = shift or confess "Cannot write to a file without filename";
- my $refname = shift or confess "Cannot write to a file without refname";
+ my $filename = shift or confess "Cannot read from a file without filename";
+ my $refname = shift or confess "Cannot read from a file without refname";
my $charset = shift || 'UTF-8';
my $linenum = 0;
@@ -474,8 +477,14 @@ sub read() {
my $error = $@;
if ( length($error) ) {
chomp $error;
- die wrap_msg( dgettext( "po4a", "Malformed encoding while reading from file %s with charset %s: %s" ),
- $filename, $charset, $error );
+ die wrap_msg(
+ dgettext(
+ "po4a",
+ "Malformed encoding while reading from file %s with charset %s: %s\nIf %s is not the expected charset, you need to configure the right one with with --master-charset or other similar flags."
+ ),
+ $filename,
+ $charset, $error, $charset
+ );
}
# Croak if we need to
@@ -545,11 +554,23 @@ sub write {
binmode STDERR, ':encoding(UTF-8)';
my $char = chr( hex($1) );
die wrap_msg(
- dgettext( "po4a", "Malformed encoding while writing char '%s' to file %s with charset %s: %s" ),
- $char, $filename, $charset, $error );
+ dgettext(
+ "po4a",
+ "Malformed encoding while writing char '%s' to file %s with charset %s: %s\nIf %s is not the expected charset, you need to configure the right one with with --localized-charset or other similar flags."
+ ),
+ $char,
+ $filename,
+ $charset, $error, $charset
+ );
} else {
- die wrap_msg( dgettext( "po4a", "Malformed encoding while writing to file %s with charset %s: %s" ),
- $filename, $charset, $error );
+ die wrap_msg(
+ dgettext(
+ "po4a",
+ "Malformed encoding while writing to file %s with charset %s: %s\nIf %s is not the expected charset, you need to configure the right one with with --localized-charset or other similar flags."
+ ),
+ $filename,
+ $charset, $error, $charset
+ );
}
};
@@ -648,8 +669,14 @@ sub addendum_parse {
} or do {
my $error = $@ || 'Unknown failure';
chomp $error;
- die wrap_msg( dgettext( "po4a", "Malformed encoding while reading from file %s with charset %s: %s" ),
- $filename, $charset, $error );
+ die wrap_msg(
+ dgettext(
+ "po4a",
+ "Malformed encoding while reading from file %s with charset %s: %s\nIf %s is not the expected charset, you need to configure the right one with with --master-charset or other similar flags."
+ ),
+ $filename,
+ $charset, $error, $charset
+ );
};
unless ( $header =~ s/PO4A-HEADER://i ) {
@@ -720,8 +747,14 @@ sub addendum_parse {
my $error = $@;
if ( length($error) ) {
chomp $error;
- die wrap_msg( dgettext( "po4a", "Malformed encoding while reading from file %s with charset %s: %s" ),
- $filename, $charset, $error );
+ die wrap_msg(
+ dgettext(
+ "po4a",
+ "Malformed encoding while reading from file %s with charset %s: %s\nIf %s is not the expected charset, you need to configure the right one with with --master-charset or other similar flags."
+ ),
+ $filename,
+ $charset, $error, $charset
+ );
}
close INS;
@@ -995,7 +1028,7 @@ sub translate {
# }
if ( !defined $options{'wrapcol'} ) {
- $options{'wrapcol'} = $self->{TT}{wrapcol}
+ $options{'wrapcol'} = $self->{TT}{wrapcol};
} elsif ( $options{'wrapcol'} < 0 ) {
$options{'wrapcol'} = $self->{TT}{wrapcol} + $options{'wrapcol'};
}
diff --git a/lib/Locale/Po4a/Xml.pm b/lib/Locale/Po4a/Xml.pm
index 3666ac7..1c67bc7 100644
--- a/lib/Locale/Po4a/Xml.pm
+++ b/lib/Locale/Po4a/Xml.pm
@@ -899,16 +899,33 @@ sub tag_trans_xmlhead {
my $out_charset = $self->get_out_charset;
if ( defined $in_charset ) {
- croak wrap_mod(
- "po4a::xml",
- dgettext(
- "po4a",
- "The file %s declares %s as encoding, but you provided %s as master charset. Please change either setting."
- ),
- $self->{'current_file'},
- $in_charset,
- $input_charset
- ) if ( length( $input_charset // '' ) > 0 && uc($input_charset) ne uc($in_charset) );
+ if ( length( $input_charset // '' ) > 0 && uc($in_charset) ne uc($input_charset) ) {
+ if ( ( $in_charset eq 'UTF-8' || lc($in_charset) eq 'utf8' )
+ && ( $input_charset eq 'UTF-8' || lc($input_charset) eq 'utf8' ) )
+ {
+ croak wrap_mod(
+ "po4a::pod",
+ dgettext(
+ "po4a",
+ "The file %s declares %s as encoding, but you provided %s as master charset. Please change either setting because they really are different encoding in Perl. See https://perldoc.perl.org/Encode#UTF-8-vs.-utf8-vs.-UTF8"
+ ),
+ $self->{DOCPOD}{refname},
+ $in_charset,
+ $input_charset,
+ );
+ } else {
+ croak wrap_mod(
+ "po4a::pod",
+ dgettext(
+ "po4a",
+ "The file %s declares %s as encoding, but you provided %s as master charset. Please change either setting."
+ ),
+ $self->{DOCPOD}{refname},
+ $in_charset,
+ $input_charset,
+ );
+ }
+ }
$tag =~ s/$in_charset/$out_charset/;
} else {