From 2607e6bf533b6bff1a2230feb10df3ee7882d218 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 26 Jun 2024 09:52:47 +0200 Subject: Merging upstream version 0.73. Signed-off-by: Daniel Baumann --- lib/Locale/Po4a/Common.pm | 4 ++- lib/Locale/Po4a/Po.pm | 6 ++-- lib/Locale/Po4a/Pod.pm | 30 +++++++++++------ lib/Locale/Po4a/TransTractor.pm | 71 ++++++++++++++++++++++++++++++----------- lib/Locale/Po4a/Xml.pm | 37 +++++++++++++++------ 5 files changed, 106 insertions(+), 42 deletions(-) (limited to 'lib') diff --git a/lib/Locale/Po4a/Common.pm b/lib/Locale/Po4a/Common.pm index 1b09adc..9a9f069 100644 --- a/lib/Locale/Po4a/Common.pm +++ b/lib/Locale/Po4a/Common.pm @@ -224,7 +224,9 @@ BEGIN { if ( eval { require Locale::gettext } ) { import Locale::gettext; require POSIX; - POSIX::setlocale( &POSIX::LC_MESSAGES, '' ); + + # This cannot be done on Windows + POSIX::setlocale( &POSIX::LC_MESSAGES, '' ) unless $^O eq 'MSWin32'; } else { eval ' sub bindtextdomain($$) { } diff --git a/lib/Locale/Po4a/Po.pm b/lib/Locale/Po4a/Po.pm index d87a145..e5d0253 100644 --- a/lib/Locale/Po4a/Po.pm +++ b/lib/Locale/Po4a/Po.pm @@ -541,6 +541,9 @@ sub write { or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot write to %s: %s" ), $filename, $! ); } + # Some old perl versions qwak when the encoding is only set to utf. We need to first reset it to raw before setting utf8 again. Not sure why it's so. + binmode( $fh, ':raw' ); + binmode( $fh, ':utf8' ); print $fh "" . format_comment( $self->{header_comment}, "" ) if length( $self->{header_comment} ); @@ -1537,7 +1540,6 @@ sub canonize { return $text; } - # Wraps the string. We don't use Text::Wrap since it mangles whitespace at the # end of the split line. # @@ -1549,7 +1551,7 @@ sub canonize { # - The extra length allowed for the first line. Default: -10 (which means it # will be wrapped 10 characters shorter). sub wrap { - my $text = shift; + my $text = shift; return "0" if ( $text eq '0' ); my $col = shift || 76; my $first_shift = shift || -10; diff --git a/lib/Locale/Po4a/Pod.pm b/lib/Locale/Po4a/Pod.pm index 4b3dc04..1933ae3 100644 --- a/lib/Locale/Po4a/Pod.pm +++ b/lib/Locale/Po4a/Pod.pm @@ -27,6 +27,8 @@ use vars qw(@ISA); use Carp qw(croak confess); +use Locale::Po4a::Common; + sub initialize { } sub translate { @@ -84,16 +86,24 @@ sub command { $charset =~ s/^\s*(.*?)\s*$/$1/s; my $master_charset = $self->get_in_charset; - croak wrap_mod( - "po4a::pod", - dgettext( - "po4a", - "The file %s declares %s as encoding, but you provided %s as master charset. Please change either setting." - ), - $self->{DOCPOD}{refname}, - $charset, - $master_charset, - ) if ( length( $master_charset // '' ) > 0 && uc($charset) ne uc($master_charset) ); + + # in POD at least, there is no difference between utf8 and UTF-8. The major POD parsers handle "both encodings" in the exact same way. + # Despite https://perldoc.perl.org/Encode#UTF-8-vs.-utf8-vs.-UTF8 + $master_charset = 'UTF-8' if $master_charset // '' =~ /utf-?8/i; + $charset = 'UTF-8' if $charset =~ /utf-?8/i; + + if ( length( $master_charset // '' ) > 0 && uc($charset) ne uc($master_charset) ) { + croak wrap_mod( + "po4a::pod", + dgettext( + "po4a", + "The file %s declares %s as encoding, but you provided %s as master charset. Please change either setting." + ), + $self->{DOCPOD}{refname}, + $charset, + $master_charset, + ); + } # The =encoding line will be added by docheader } else { diff --git a/lib/Locale/Po4a/TransTractor.pm b/lib/Locale/Po4a/TransTractor.pm index 0bc5b8f..f943ba1 100644 --- a/lib/Locale/Po4a/TransTractor.pm +++ b/lib/Locale/Po4a/TransTractor.pm @@ -11,7 +11,7 @@ use warnings; use subs qw(makespace); use vars qw($VERSION @ISA @EXPORT); -$VERSION = "0.72"; +$VERSION = "0.73"; @ISA = qw(DynaLoader); @EXPORT = qw(new process translate read write readpo writepo @@ -403,7 +403,7 @@ sub new { $self->{TT}{debug} = $options{'debug'}; } if ( defined $options{'wrapcol'} ) { - if ( $options{'wrapcol'} < 0) { + if ( $options{'wrapcol'} < 0 ) { $self->{TT}{wrapcol} = 'Inf'; } else { $self->{TT}{wrapcol} = $options{'wrapcol'}; @@ -421,11 +421,14 @@ sub new { =over 4 -=item read($$) +=item read($$$) -Add another input document data at the end of the existing array -C<< @{$self->{TT}{doc_in}} >>. The argument is the filename to read. If a second -argument is provided, it is the filename to use in the references. +Add another input document data at the end of the existing array C<< @{$self->{TT}{doc_in}} >>. + +This function takes two mandatory arguments and an optional one. + * The filename to read on disk; + * The name to use as filename when building the reference in the PO file; + * The charset to use to read that file (UTF-8 by default) This array C<< @{$self->{TT}{doc_in}} >> holds this input document data as an array of strings with alternating meanings. @@ -440,8 +443,8 @@ function when you're done with packing input files into the document. sub read() { my $self = shift; - my $filename = shift or confess "Cannot write to a file without filename"; - my $refname = shift or confess "Cannot write to a file without refname"; + my $filename = shift or confess "Cannot read from a file without filename"; + my $refname = shift or confess "Cannot read from a file without refname"; my $charset = shift || 'UTF-8'; my $linenum = 0; @@ -474,8 +477,14 @@ sub read() { my $error = $@; if ( length($error) ) { chomp $error; - die wrap_msg( dgettext( "po4a", "Malformed encoding while reading from file %s with charset %s: %s" ), - $filename, $charset, $error ); + die wrap_msg( + dgettext( + "po4a", + "Malformed encoding while reading from file %s with charset %s: %s\nIf %s is not the expected charset, you need to configure the right one with with --master-charset or other similar flags." + ), + $filename, + $charset, $error, $charset + ); } # Croak if we need to @@ -545,11 +554,23 @@ sub write { binmode STDERR, ':encoding(UTF-8)'; my $char = chr( hex($1) ); die wrap_msg( - dgettext( "po4a", "Malformed encoding while writing char '%s' to file %s with charset %s: %s" ), - $char, $filename, $charset, $error ); + dgettext( + "po4a", + "Malformed encoding while writing char '%s' to file %s with charset %s: %s\nIf %s is not the expected charset, you need to configure the right one with with --localized-charset or other similar flags." + ), + $char, + $filename, + $charset, $error, $charset + ); } else { - die wrap_msg( dgettext( "po4a", "Malformed encoding while writing to file %s with charset %s: %s" ), - $filename, $charset, $error ); + die wrap_msg( + dgettext( + "po4a", + "Malformed encoding while writing to file %s with charset %s: %s\nIf %s is not the expected charset, you need to configure the right one with with --localized-charset or other similar flags." + ), + $filename, + $charset, $error, $charset + ); } }; @@ -648,8 +669,14 @@ sub addendum_parse { } or do { my $error = $@ || 'Unknown failure'; chomp $error; - die wrap_msg( dgettext( "po4a", "Malformed encoding while reading from file %s with charset %s: %s" ), - $filename, $charset, $error ); + die wrap_msg( + dgettext( + "po4a", + "Malformed encoding while reading from file %s with charset %s: %s\nIf %s is not the expected charset, you need to configure the right one with with --master-charset or other similar flags." + ), + $filename, + $charset, $error, $charset + ); }; unless ( $header =~ s/PO4A-HEADER://i ) { @@ -720,8 +747,14 @@ sub addendum_parse { my $error = $@; if ( length($error) ) { chomp $error; - die wrap_msg( dgettext( "po4a", "Malformed encoding while reading from file %s with charset %s: %s" ), - $filename, $charset, $error ); + die wrap_msg( + dgettext( + "po4a", + "Malformed encoding while reading from file %s with charset %s: %s\nIf %s is not the expected charset, you need to configure the right one with with --master-charset or other similar flags." + ), + $filename, + $charset, $error, $charset + ); } close INS; @@ -995,7 +1028,7 @@ sub translate { # } if ( !defined $options{'wrapcol'} ) { - $options{'wrapcol'} = $self->{TT}{wrapcol} + $options{'wrapcol'} = $self->{TT}{wrapcol}; } elsif ( $options{'wrapcol'} < 0 ) { $options{'wrapcol'} = $self->{TT}{wrapcol} + $options{'wrapcol'}; } diff --git a/lib/Locale/Po4a/Xml.pm b/lib/Locale/Po4a/Xml.pm index 3666ac7..1c67bc7 100644 --- a/lib/Locale/Po4a/Xml.pm +++ b/lib/Locale/Po4a/Xml.pm @@ -899,16 +899,33 @@ sub tag_trans_xmlhead { my $out_charset = $self->get_out_charset; if ( defined $in_charset ) { - croak wrap_mod( - "po4a::xml", - dgettext( - "po4a", - "The file %s declares %s as encoding, but you provided %s as master charset. Please change either setting." - ), - $self->{'current_file'}, - $in_charset, - $input_charset - ) if ( length( $input_charset // '' ) > 0 && uc($input_charset) ne uc($in_charset) ); + if ( length( $input_charset // '' ) > 0 && uc($in_charset) ne uc($input_charset) ) { + if ( ( $in_charset eq 'UTF-8' || lc($in_charset) eq 'utf8' ) + && ( $input_charset eq 'UTF-8' || lc($input_charset) eq 'utf8' ) ) + { + croak wrap_mod( + "po4a::pod", + dgettext( + "po4a", + "The file %s declares %s as encoding, but you provided %s as master charset. Please change either setting because they really are different encoding in Perl. See https://perldoc.perl.org/Encode#UTF-8-vs.-utf8-vs.-UTF8" + ), + $self->{DOCPOD}{refname}, + $in_charset, + $input_charset, + ); + } else { + croak wrap_mod( + "po4a::pod", + dgettext( + "po4a", + "The file %s declares %s as encoding, but you provided %s as master charset. Please change either setting." + ), + $self->{DOCPOD}{refname}, + $in_charset, + $input_charset, + ); + } + } $tag =~ s/$in_charset/$out_charset/; } else { -- cgit v1.2.3