summaryrefslogtreecommitdiffstats
path: root/lib/Locale/Po4a/Pod.pm
blob: 4b3dc042707a945672c514651f8d6dbd514c4042 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
# Locale::Po4a::Pod -- Convert POD data to PO file, for translation.
#
# This program is free software; you may redistribute it and/or modify it
# under the terms of GPL v2.0 or later (see COPYING file).
#
# This module converts POD to PO file, so that it becomes possible to
# translate POD formatted documentation. See gettext documentation for
# more info about PO files.

############################################################################
# Modules and declarations
############################################################################

use Pod::Parser;
use Locale::Po4a::TransTractor qw(process new get_in_charset get_out_charset);

package Locale::Po4a::Pod;

use 5.16.0;
use strict;
use warnings;

require Exporter;

use vars qw(@ISA);
@ISA = qw(Locale::Po4a::TransTractor Pod::Parser);

use Carp qw(croak confess);

sub initialize { }

sub translate {
    my ( $self, $str, $ref, $type ) = @_;
    my (%options) = @_;

    $str = $self->pre_trans( $str, $ref, $type );
    $str = $self->SUPER::translate( $str, $ref, $type, %options );
    $str = $self->post_trans( $str, $ref, $type );

    return $str;
}

sub pre_trans {
    my ( $self, $str, $ref, $type ) = @_;

    return $str;
}

sub post_trans {
    my ( $self, $str, $ref, $type ) = @_;

    # Change ascii non-breaking space to POD one
    my $nbs_out    = "\xA0";
    my $enc_length = Encode::from_to( $nbs_out, "latin1", $self->get_out_charset );
    if ( defined $enc_length ) {
        while ( $str =~ m/(^|.*\s)(\S+?)\Q$nbs_out\E(\S+?)(\s.*$|$)/s ) {
            my ( $begin, $m1, $m2, $end ) = ( $1, $2, $3, $4 );
            $str = ( defined $begin ) ? $begin : "";

            # Remove the non-breaking spaces in the string that will be
            # between S<...>
            $m2 =~ s/\Q$nbs_out\E/ /g;
            $str .= "S<$m1 $m2>";
            $str .= ( defined $end ) ? $end : "";
        }
    }

    return $str;
}

sub command {
    my ( $self, $command, $paragraph, $line_num ) = @_;

    #    print STDOUT "cmd: '$command' '$paragraph' at $line_num\n";
    if (   $command eq 'back'
        || $command eq 'cut'
        || $command eq 'pod' )
    {
        $self->pushline("=$command\n\n");
    } elsif ( $command eq 'over' ) {
        $self->pushline( "=$command $paragraph" . ( length($paragraph) ? "" : "\n\n" ) );
    } elsif ( $command eq 'encoding' ) {
        my $charset = $paragraph;
        $charset =~ s/^\s*(.*?)\s*$/$1/s;

        my $master_charset = $self->get_in_charset;
        croak wrap_mod(
            "po4a::pod",
            dgettext(
                "po4a",
                "The file %s declares %s as encoding, but you provided %s as master charset. Please change either setting."
            ),
            $self->{DOCPOD}{refname},
            $charset,
            $master_charset,
        ) if ( length( $master_charset // '' ) > 0 && uc($charset) ne uc($master_charset) );

        # The =encoding line will be added by docheader
    } else {
        $paragraph = $self->translate( $paragraph, $self->{DOCPOD}{refname} . ":$line_num", "=$command", "wrap" => 1 );
        $self->pushline("=$command $paragraph\n\n");
    }
}

sub verbatim {
    my ( $self, $paragraph, $line_num ) = @_;

    #    print "verb: '$paragraph' at $line_num\n";

    if ( $paragraph eq "\n" ) {
        $self->pushline("$paragraph\n");
        return;
    }
    $paragraph = $self->translate( $paragraph, $self->{DOCPOD}{refname} . ":$line_num", "verbatim" );
    $paragraph =~ s/\n$//m;
    $self->pushline("$paragraph\n");
}

sub textblock {
    my ( $self, $paragraph, $line_num ) = @_;

    #    print "text: '$paragraph' at $line_num\n";

    if ( $paragraph eq "\n" ) {
        $self->pushline("$paragraph\n");
        return;
    }

    # Fix a pretty damned bug.
    # Podlators don't wrap explicitelly the text, and groff won't seem to
    #  wrap any line begining with a space. So, we have to consider as
    #  verbatim not only the paragraphs whose first line is indented, but
    #  the paragraph containing an indented line.
    # That way, we'll declare more paragraphs as verbatim than needed, but
    #  that's harmless (only less confortable for translators).
    if ( $paragraph =~ m/^[ \t]/m ) {
        $self->verbatim( $paragraph, $line_num );
        return;
    }

    $paragraph = $self->translate( $paragraph, $self->{DOCPOD}{refname} . ":$line_num", 'textblock', "wrap" => 1 );
    $paragraph =~ s/ *\n/ /gm;    # Unwrap the content, to ensure that C<> markup is not split on several lines
    $self->pushline("$paragraph\n\n");
}

sub end_pod { }

sub read {
    my ( $self, $filename, $refname, $charset ) = @_;
    $charset ||= "UTF-8";
    my $fh;
    open $fh, "<:encoding($charset)", $filename;
    push @{ $self->{DOCPOD}{infile} }, ( $fh, $refname );
    $self->Locale::Po4a::TransTractor::read( $filename, $refname, $charset );
}

sub parse {
    my $self = shift;

    my @list = @{ $self->{DOCPOD}{infile} };
    while ( scalar @list ) {
        my ( $fh, $refname ) = ( shift @list, shift @list );
        $self->{DOCPOD}{refname} = $refname;
        $self->parse_from_filehandle($fh);
        close $fh;
    }
}

sub docheader {
    my $self     = shift;
    my $encoding = $self->get_out_charset();
    if (    ( defined $encoding )
        and ( length $encoding )
        and ( $encoding ne "ascii" ) )
    {
        $encoding = "\n=encoding $encoding\n";
    } else {
        $encoding = "";
    }

    return <<EOT;

        *****************************************************
        *           GENERATED FILE, DO NOT EDIT             *
        * THIS IS NO SOURCE FILE, BUT RESULT OF COMPILATION *
        *****************************************************

This file was generated by po4a(7). Do not store it (in VCS, for example),
but store the PO file used as source file by po4a-translate.

In fact, consider this as a binary, and the PO file as a regular .c file:
If the PO get lost, keeping this translation up-to-date will be harder.
$encoding
EOT
}
1;

##############################################################################
# Module return value and documentation
##############################################################################

1;
__END__

=encoding UTF-8

=head1 NAME

Locale::Po4a::Pod - convert POD data from/to PO files

=head1 SYNOPSIS

    use Locale::Po4a::Pod;
    my $parser = Locale::Po4a::Pod->new();

    # Read POD from STDIN and write to STDOUT.
    $parser->parse_from_filehandle;

    # Read POD from file.pod and write to file.txt.
    $parser->parse_from_file ('file.pod', 'file.txt');

=head1 DESCRIPTION

Locale::Po4a::Pod is a module to help the translation of documentation in
the POD format (the preferred language for documenting Perl) into other
[human] languages.

=head1 STATUS OF THIS MODULE

I think that this module is rock stable, and there is only one known bug
with F</usr/lib/perl5/Tk/MainWindow.pod> (and some other
pages, see below) which contains:

  C<" #n">

Lack of luck, in the po4a version, this was split on the space by the
wrapping. As result, in the original version, the man page contains:

 " #n"

and mine contains:

 "" #n""

which is logic since CE<lt>foobarE<gt> is rewritten "foobar".

Complete list of pages having this problem on my box (from 564 pages; note
that it depends on the chosen wrapping column):

 /usr/lib/perl5/Tk/MainWindow.pod
 /usr/share/perl/5.8.0/overload.pod
 /usr/share/perl/5.8.0/pod/perlapi.pod
 /usr/share/perl/5.8.0/pod/perldelta.pod
 /usr/share/perl/5.8.0/pod/perlfaq5.pod
 /usr/share/perl/5.8.0/pod/perlpod.pod
 /usr/share/perl/5.8.0/pod/perlre.pod
 /usr/share/perl/5.8.0/pod/perlretut.pod



=head1 INTERNALS

As a derived class from Pod::Parser, Locale::Po4a::Pod supports the same
methods and interfaces.  See L<Pod::Parser> for all the details; briefly,
one creates a new parser with C<< Locale::Po4a::Pod->new() >> and then
calls either parse_from_filehandle() or parse_from_file().

=head1 SEE ALSO

L<Pod::Parser>,
L<Locale::Po4a::Man(3pm)>,
L<Locale::Po4a::TransTractor(3pm)>,
L<po4a(7)|po4a.7>

=head1 AUTHORS

 Denis Barbier <barbier@linuxfr.org>
 Martin Quinson (mquinson#debian.org)

=head1 COPYRIGHT AND LICENSE

Copyright © 2002 SPI, Inc.

This program is free software; you may redistribute it and/or modify it
under the terms of GPL v2.0 or later (see the COPYING file).

=cut