summaryrefslogtreecommitdiffstats
path: root/po4a-normalize
blob: 5ca7cf96d276c73a27aef769de8666b1f37bf37d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#! /usr/bin/env perl
eval 'exec perl -S $0 ${1+"$@"}'
  if $running_under_some_shell;

# po4a-normalize -- normalize documentation files
#
# Copyright 2002-2023 by SPI, inc.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of GPL v2.0 or later (see COPYING).

=encoding UTF-8

=head1 NAME

po4a-normalize - normalize a documentation file by parsing it in po4a, and writing it back

=head1 SYNOPSIS

B<po4a-normalize> B<-f> I<fmt> I<master.doc>

=head1 DESCRIPTION

The po4a (PO for anything) project goal is to ease translations (and more
interestingly, the maintenance of translations) using gettext tools on
areas where they were not expected like documentation.

The B<po4a-normalize> script is a debugging tool used to make sure that
po4a don't change the document when it's not supposed to. Only use it if
you're developing a new module, or if you doubt the sanity of the tools.

The generated document will be written to F<po4a-normalize.output> while the
generated POT file will be written to F<po4a-normalize.po> by default, but 
you can use the B<--localized> and B<--pot> options to change that.

=head1 OPTIONS

=over 4

=item B<-o>, B<--option>

Extra option(s) to pass to the format plugin. See the documentation of each
plugin for more information about the valid options and their meanings. For 
example, you could pass '-o tablecells' to the AsciiDoc parser, while the 
text parser would accept '-o tabs=split'.

=item B<-w>, B<--width>

Column at which we should wrap the resulting file if the format supports it
(default: 76).

=item B<-b>, B<--blank>

Create a blank translated document.
The generated translated document will be generated assuming all messages
are translated by a space or new line.

This is useful to check what parts of the document cannot be translated.

Both B<--blank> and B<--capitalize> can't be specified at the same time.

=item B<-C>, B<--capitalize>

Create a translated document and corresponding po-file with original strings
translated as their capitalized versions.

This is useful to check what parts of the document cannot be translated and
generate test data for po4a.

Both B<--blank> and B<--capitalize> can't be specified at the same time.

=item B<-h>, B<--help>

Show a short help message.

=item B<--help-format>

List the documentation formats understood by po4a.

=item B<-f>, B<--format>

Format of the documentation you want to handle. Use the B<--help-format>
option to see the list of available formats.

=item B<-M>, B<--master-charset>

Charset of the file containing the document to translate.

=item B<-l>, B<--localized>

Name of the normalized file to produce (I<po4a-normalize.output> by default). 

=item B<-p>, B<--pot>

Pot file to produce (I<po4a-normalize.po> by default).

=item B<-V>, B<--version>

Display the version of the script and exit.

=back

=head1 SEE ALSO

L<po4a-gettextize(1)>,
L<po4a-translate(1)>,
L<po4a-updatepo(1)>,
L<po4a(7)>

=head1 AUTHORS

 Denis Barbier <barbier@linuxfr.org>
 Nicolas François <nicolas.francois@centraliens.net>
 Martin Quinson (mquinson#debian.org)

=head1 COPYRIGHT AND LICENSE

Copyright 2002-2023 by SPI, inc.

This program is free software; you may redistribute it and/or modify it
under the terms of GPL v2.0 or later (see the COPYING file).

=cut

use 5.16.0;
use strict;
use warnings;

use Locale::Po4a::Chooser;
use Locale::Po4a::TransTractor;
use Locale::Po4a::Common;

use Getopt::Long qw(GetOptions);

use Pod::Usage qw(pod2usage);

Locale::Po4a::Common::textdomain('po4a');

sub show_version {
    Locale::Po4a::Common::show_version("po4a-normalize");
    exit 0;
}

my ( $help_fmt, $help, $type, $debug, $verbose, $quiet, $wrappo, @options, $no_deprecation );
my ( $blank, $capitalize );
my ($mastchar);
my ( $localized, $potfile, $width ) = ( 'po4a-normalize.output', 'po4a-normalize.po', 76 );
Getopt::Long::Configure( 'no_auto_abbrev', 'no_ignore_case' );
GetOptions(
    'help|h'        => \$help,
    'help-format'   => \$help_fmt,
    'format|f=s'    => \$type,
    'localized|l=s' => \$localized,
    'pot|p=s'       => \$potfile,

    'blank|b'      => \$blank,
    'capitalize|C' => \$capitalize,

    'master-charset|M=s' => \$mastchar,

    # undocumented option to silence the warning about po4a-normalize not being intended for users
    'no-deprecation' => \$no_deprecation,

    'option|o=s' => \@options,
    'wrap-po=s'  => \$wrappo,

    'width|w=s' => \$width,
    'verbose|v' => \$verbose,
    'quiet|q'   => \$quiet,
    'debug|d'   => \$debug,
    'version|V' => \&show_version
) or pod2usage();

$help     && pod2usage( -verbose => 1, -exitval => 0 );
$help_fmt && Locale::Po4a::Chooser::list(0);
pod2usage() unless scalar @ARGV == 1;

my %options = (
    "verbose" => $verbose,
    "debug"   => $debug,
    "wrapcol" => $width
);
if ($blank and $capitalize) {
    die wrap_msg( gettext("Both B<--blank> and B<--capitalize> can't be specified at the same time."));
}
foreach (@options) {
    if (m/^([^=]*)=(.*)$/) {
        $options{$1} = "$2";
    } else {
        $options{$_} = 1;
    }
}

unless ($no_deprecation) {
    print wrap_msg(
        gettext(
                "po4a-normalize is mostly useful to debug po4a itself. "
              . "Users are advised to use the po4a(1) program instead."
        )
    );
}
my $parser = Locale::Po4a::Chooser::new( $type, %options );

my $filename = shift || pod2usage(1);
$filename eq '-' || -e $filename || die wrap_msg( gettext("File %s does not exist."), $filename );
die wrap_msg( gettext("Options %s and %s cannot be provided together"), "quiet", "verbose" ) if $verbose && $quiet;

$parser->read( $filename, $filename, $mastchar // '' );
my %pot_options = ( 'wrap-po' => $wrappo );
$parser->setpoout( Locale::Po4a::Po->new( \%pot_options ) );
$parser->parse();
if ($blank or $capitalize) {
    foreach my $msgid ( keys %{ $parser->{TT}{po_out}{po} } ) {
        if ( $blank ) {
            if ( $msgid =~ m/\n$/s ) {
                $parser->{TT}{po_out}{po}{$msgid}{'msgstr'} = "\n";
            } else {
                $parser->{TT}{po_out}{po}{$msgid}{'msgstr'} = " ";
            }
        } else { # $capitalize
            ($parser->{TT}{po_out}{po}{$msgid}{'msgstr'} = $msgid) =~ s/(?<!\\)(.)/\U$1/g;
        }
    }
    my $modified_po = $parser->{TT}{po_out};
    $parser = Locale::Po4a::Chooser::new( $type, %options );
    $parser->{TT}{po_in} = $modified_po;
    $parser->read($filename, $filename, $mastchar // '' );
    $parser->{TT}{file_in_charset} = $mastchar;
    $parser->parse();
    $parser->{TT}{po_out} = $modified_po;
}
print wrap_msg( dgettext( "po4a", "Write the normalized document to %s." ), $localized )
  unless $quiet;
$parser->write( $localized, '' );
print wrap_msg( dgettext( "po4a", "Write the normalized PO file to %s." ), $potfile )
  unless $quiet;
$parser->writepo($potfile);

__END__