From d318611dd6f23fcfedd50e9b9e24620b102ba96a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 21:44:05 +0200 Subject: Adding upstream version 1.23.0. Signed-off-by: Daniel Baumann --- contrib/gpinyin/ChangeLog | 200 ++++++++++++ contrib/gpinyin/gpinyin.1.man | 378 ++++++++++++++++++++++ contrib/gpinyin/gpinyin.am | 56 ++++ contrib/gpinyin/gpinyin.pl | 725 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 1359 insertions(+) create mode 100644 contrib/gpinyin/ChangeLog create mode 100644 contrib/gpinyin/gpinyin.1.man create mode 100644 contrib/gpinyin/gpinyin.am create mode 100755 contrib/gpinyin/gpinyin.pl (limited to 'contrib/gpinyin') diff --git a/contrib/gpinyin/ChangeLog b/contrib/gpinyin/ChangeLog new file mode 100644 index 0000000..53cc228 --- /dev/null +++ b/contrib/gpinyin/ChangeLog @@ -0,0 +1,200 @@ +2022-10-09 G. Branden Robinson + + * gpinyin.pl: Report groff version number along with this + program's own when not run from source tree. Drop dead code. + Bump micro version number to reflect recent restructuring. + +2022-10-09 G. Branden Robinson + + Make gpinyin script stand alone. + + * subs.pl: Delete, moving its content into... + * gpinyin.pl: ...here. + (vowel_t): Use explicit list with `my`. + (vowel_n, vowel_t): Declare local scalar `$vowel_with_tone` + using `my`. + * gpinyin.am (dist_gpinyin_DATA): Delete. + +2022-05-03 G. Branden Robinson + + * gpinyin.am (gpinyin): Fix missing dependency on + `$(SH_DEPS_SED_SCRIPT)`. + +2021-05-11 G. Branden Robinson + + * gpinyin.1.man: Render the tone mark table only if the output + device might be capable. + + Fixes . + +2021-05-10 G. Branden Robinson + + * gpinyin.1.man: Work around inability of grops and gropdf to + construct some Unicode composite characters. Use groff + composite special characters for "a" with acute and grave + accents, and define strings for "a" with macron (overline) and + with háček/caron accents. Use different string definitions for + nroff and troff modes so we don't regress UTF-8 terminal output. + +2021-05-10 G. Branden Robinson + + * subs.pl (vowel_t): Fix incorrect rendering of base glyph 'U', + which was being forced to lowercase when a dieresis and tone + mark were both being applied. This caused the tone mark to + overlap the dieresis, so decrease the font size of the base + glyph even more to compensate. The result is ugly but + comprehensible. + + See , partially mitigated + but not completely resolved. + +2021-05-09 G. Branden Robinson + + * subs.pl (%tones1_Unicode): Fix copy and paste error. Emit + U+01D5 (Latin capital letter u with dieresis and macron) for Ü + with tone 1, instead of U+016A (Latin capital letter u with + macron). + + Fixes . + +2021-05-09 G. Branden Robinson + + * subs.pl (handle_word): Emit \[cq] instead of \[aq] when + interpolating an apostrophe before a vowel. + + Fixes . + +2021-05-09 G. Branden Robinson + + * subs.pl (%tones_glyphs, %tones4_glyphs): Fix hash keys to use + the groff dotless i special character into which a lowercase "i" + has already been transformed instead of 'i' itself. + (vowel_n, vowel_t): Rename variable so that tone-transformed + vowel is stored separately. Add "or warn" to test the result + and cheaply assert that we got a string back from our hash + lookup on the vowel. + + Fixes . + +2021-01-06 Colin Watson + + * gpinyin.pl: Avoid Perl's unsafe "<>" operator. + + The "<>" operator is implemented using the two-argument form of + "open", which interprets magic such as pipe characters, allowing + execution of arbitrary commands which is unlikely to be + expected. Perl >= 5.22 has a "<<>>" operator which avoids this, + but also forbids the use of "-" to mean the standard input, + which is a facility that the affected groff programs document. + + ARGV::readonly would probably also fix this, but I fundamentally + dislike the approach of escaping data in preparation for a + language facility to unescape it, especially when the required + escaping is as non-obvious as it is here. (For the same reason, + I prefer to use subprocess invocation facilities that allow + passing the argument list as a list rather than as a string to + be interpreted by the shell.) So I've abandoned this dubious + convenience and changed the affected programs to iterate over + command-line arguments manually using the three-argument form of + open. + + Fixes . + +2020-04-22 G. Branden Robinson + + * gpinyin.1.man: Delete references to groffer. + +2018-02-28 Werner LEMBERG + + * gpinyin.am (gpinyin): Use $(AM_V_GEN) to silence file generation. + +2015-08-22 Bernd Warken + + * gpinyin.1.man: Rename `gpinyin.man'. + + * gpinyin.am: include renaming. + +2015-08-05 Bernd Warken + + * gpinyin.am: Add `Last update'. Setup Emacs mode. + +2015-04-03 Werner LEMBERG + + * gpinyin.man: Make it work in compatibility mode. + (EL): Fix typo. + +2014-10-11 Werner LEMBERG + + * Makefile.sub (gpinyin): Handle `gpinyin_dir'. + +2014-10-11 Bernd Warken + + * gpinyin.pl: Version 1.0.4 + Remove `use IPC::System::Simple'. + +2014-10-10 Bernd Warken + + * gpinyin.pl: Version 1.0.3 + Remove beginning empty line for `pinyin' parts. + +2014-09-25 Bernd Warken + + * gpinyin.pl: Version 1.0.2 + + * Makefile.sub: Add .PHONY. Restructure install and uninstall. + +2014-09-03 Bernd Warken + + Version 1.0.1 + + * all `gpinyin' files: Copying and Emacs settings. + +2014-08-27 Bernd Warken + + Version 1.0.0 + + * gpinyin.pl, subs.pl, gpinyin.man: Make `gpinyin' runnable. + +2014-08-08 Bernd Warken + + * gpinyin.pl: Version 0.9.2 + + * subs.pl: Rename `sub.pl'. + + * Makefile.sub: Change `sub.pl' to `subs.pl'. + +2014-08-08 Bernd Warken + + * gpinyin.pl: Version 0.9.1 + + * sub.pl: New file for storing subs later on. + + * Makefile.sub: Add new gpinyin path for sub.pl. + +2014-08-01 Bernd Warken + + * gpinyin.pl, gpinyin.man, ChangeLog, Makefile.sub: + First version 0.9.0 of gpinyin + +2014-08-01 Bernd Warken +________________________________________________________________________ +License + +Copyright (C) 2014-2020 Free Software Foundation, Inc. +Written by Bernd Warken . + +Copying and distribution of this file, with or without +modification, are permitted provided the copyright notice and this +notice are preserved. + +This file is part of `gpinyin', which is part of the `groff' +project. + +##### Editor settings + +Local Variables: +fill-column: 72 +mode: change-log +version-control: never +End: +vim:set autoindent textwidth=72: diff --git a/contrib/gpinyin/gpinyin.1.man b/contrib/gpinyin/gpinyin.1.man new file mode 100644 index 0000000..3c3884e --- /dev/null +++ b/contrib/gpinyin/gpinyin.1.man @@ -0,0 +1,378 @@ +'\" t +.TH gpinyin @MAN1EXT@ "@MDATE@" "groff @VERSION@" +.SH Name +gpinyin \- use Hanyu Pinyin Chinese in +.I groff +documents +. +. +.\" ==================================================================== +.\" Legal Terms +.\" ==================================================================== +.\" +.\" Copyright (C) 2014-2020 Free Software Foundation, Inc. +.\" +.\" This file is part of gpinyin, which is part of groff, a free +.\" software project. +.\" +.\" You can redistribute it and/or modify it under the terms of the GNU +.\" General Public License version 2 as published by the Free Software +.\" Foundation. +.\" +.\" The license text is available in the internet at +.\" . +. +. +.\" Save and disable compatibility mode (for, e.g., Solaris 10/11). +.do nr *groff_gpinyin_1_man_C \n[.cp] +.cp 0 +. +.\" Define fallback for groff 1.23's MR macro if the system lacks it. +.nr do-fallback 0 +.if !\n(.f .nr do-fallback 1 \" mandoc +.if \n(.g .if !d MR .nr do-fallback 1 \" older groff +.if !\n(.g .nr do-fallback 1 \" non-groff *roff +.if \n[do-fallback] \{\ +. de MR +. ie \\n(.$=1 \ +. I \%\\$1 +. el \ +. IR \%\\$1 (\\$2)\\$3 +. . +.\} +.rr do-fallback +. +. +.\" ==================================================================== +.\" Local definitions +.\" ==================================================================== +. +.\" Define a string for the TeX logo. +.ie t .ds TeX T\h'-.1667m'\v'.224m'E\v'-.224m'\h'-.125m'X +.el .ds TeX TeX +. +. +.\" ==================================================================== +.SH Synopsis +.\" ==================================================================== +. +.SY gpinyin +.RI [ file\~ .\|.\|.] +.YS +. +. +.SY gpinyin +.B \-h +. +.SY gpinyin +.B \-\-help +.YS +. +.SY gpinyin +.B \-v +. +.SY gpinyin +.B \-\-version +.YS +. +. +.\" ==================================================================== +.SH Description +.\" ==================================================================== +. +.I gpinyin +is a preprocessor for +.MR groff @MAN1EXT@ +that facilitates use of Hanyu Pinyin in +.MR groff @MAN7EXT@ +files. +. +Pinyin is a method for writing the Mandarin Chinese language with the +Latin alphabet. +. +Mandarin consists of more than four hundred base syllables, +each spoken with one of five different tones. +. +Changing the tone applied to the syllable generally alters the meaning +of the word it forms. +. +In Pinyin, +a syllable is written in the Latin alphabet and a numeric tone indicator +can be appended to each syllable. +. +. +.P +Each +.I input-file +is a file name or the character +.RB \[lq] \- \[rq] +to indicate that the standard input stream should be read. +. +As usual, +the argument +.RB \[lq] \-\- \[rq] +can be used in order to force interpretation of all remaining arguments +as file names, +even if an +.I input-file +argument begins with a +.RB \[lq] \- \[rq]. +. +.B \-h +and +.B \-\-help +display a usage message, +while +.B \-v +and +.B \-\-version +show version information; +all exit afterward. +. +. +.\" ==================================================================== +.SS "Pinyin sections" +.\" ==================================================================== +. +Pinyin sections in +.I groff +files are enclosed by two +.B .pinyin +requests with different arguments. +. +The starting request is +.RS +.EX +\&.pinyin start +.EE +.RE +or +.RS +.EX +\&.pinyin begin +.EE +.RE +and the ending request is +.RS +.EX +\&.pinyin stop +.EE +.RE +or +.RS +.EX +\&.pinyin end +.EE +.RE +\&. +. +. +.\" ==================================================================== +.SS Syllables +.\" ==================================================================== +. +In Pinyin, +each syllable is represented by one to six letters drawn from the +fifty-two upper- and lowercase letters of the Unicode basic Latin +character set, +plus the letter \[lq]U\[rq] with dieresis (umlaut) in both cases\[em]in +other words, +the members of the set \[lq][a\[en]zA\[en]Z\[:u]\[:U]]\[rq]. +. +. +.P +In +.I groff +input, +all basic Latin letters are written as themselves. +. +The \[lq]u with dieresis\[rq] can be written as +\[lq]\e[:u]\[rq] +in lowercase or +\[lq]\e[:U]\[rq] +in uppercase. +. +Within +.B .pinyin +sections, +.I gpinyin +supports the form +\[lq]ue\[rq] +for lowercase and the forms +\[lq]Ue\[rq] +and +\[lq]UE\[rq] +for uppercase. +. +. +.\" ==================================================================== +.SS Tones +.\" ==================================================================== +. +Each syllable has exactly one of five +.IR tones . +. +The fifth tone is not explicitly written at all, +but each of the first through fourth tones is indicated with a diacritic +above a specific vowel within the syllable. +. +. +.P +In a +.I gpinyin +source file, +these tones are written by adding a numeral in the range 0 to 5 after +the syllable. +. +The tone numbers 1 to 4 are transformed into accents above vowels in the +output. +. +The tone numbers 0 and 5 are synonymous. +. +. +.P +.nr gpinyin*do-table 0 +.if t .nr gpinyin*do-table 1 +.if n .if '\*[.T]'utf8' .nr gpinyin*do-table 1 +.\" XXX: One hack necessitates another; since our table is conditional, +.\" we need to save the input line counter. +.nr gpinyin*.c \n[.c] +.ie \n[gpinyin*do-table] \{\ +The tones are written as follows. +. +. +.P +.\" XXX: This is so gross. Why can't grops and gropdf figure this out? +.if t .ds a- \za\[a-] +.if n .ds a- \[a a-] +.if t .ds a< \za\[ah] +.if n .ds a< \[a ah] +.if t .ne 8 \" Try to keep the table on one page for printed output. +.TS +l l l l l. +Tone Description Diacritic Example Input Example Output +_ +first flat \[a-] ma1 m\*[a-] +second rising \[aa] ma2 m\[a aa] +third falling-rising \[ah] ma3 m\*[a<] +fourth falling \[ga] ma4 m\[a ga] +fifth neutral (none) ma0 ma +\^ \^ \^ ma5 \^ +.TE +.\} +.lf (\n[gpinyin*.c] + 25) \" XXX part 2: Restore input line counter. +.el \{\ +[The tone mark table is omitted from this rendering of the man page +because the selected output device \[lq]\*[.T]\[rq] lacks the character +repertoire to display it. +. +Try another output device.] +.\} +.rm a- +.rm a< +.rr gpinyin*do-table +. +. +.P +The neutral tone number can be omitted from a word-final syllable, +but not otherwise. +. +. +.\" ==================================================================== +.SH Authors +.\" ==================================================================== +. +.I gpinyin +was written by +.MT groff\-bernd\:.warken\-72@\:web\:.de +Bernd Warken +.ME . +. +. +.\" ==================================================================== +.SH "See also" +.\" ==================================================================== +. +Useful documents on the World Wide Web related to Pinyin include +.RS 4n +.UR http://\:www\:.foolsworkshop\:.com/\:ptou/\:index\:.html +.I Pinyin to Unicode +.UE , +. +.br +.UR http://\:www\:.mandarintools\:.com/ +.I On-line Chinese Tools \" sic: On-line +.UE , +. +.br +.\" XXX: Turning off adjustment like this is ugly; thanks to meter-long +.\" URLs we need an escape sequence that selectively disables adjustment +.\" at the end of a word. +.na +.UR http://\:www\:.pinyin\:.info/\:index\:.html +.I Pinyin.info: a guide to the writing of Mandarin Chinese in \ +romanization +.UE , +.ad \*[AD] +. +.br +.UR http://\:www\:.pinyin\:.info/\:rules/\:where.html +\[lq]Where do the tone marks go?\[rq] +.UE , +. +.br +.UR http://\:git\:.savannah\:.gnu\:.org/\:gitweb/\:\ +?p=cjk\:.git\:;a=blob_plain\:;f=doc/\:pinyin\:.txt\:;hb=HEAD +.I pinyin.txt +from the CJK macro package for \*[TeX] +.UE , +.br +.RS -4n +and +.RE +. +.br +.\" XXX: Same ugliness as before. +.na +.UR http://\:git\:.savannah\:.gnu\:.org/\:gitweb/\:\ +?p=cjk\:.git\:;a=blob_plain\:;f=texinput/\:pinyin\:.sty\:;hb=HEAD +.I pinyin.sty +from the CJK macro package for \*[TeX] +.UE . +.ad \*[AD] +. +.RE +. +. +.P +.MR groff @MAN1EXT@ +and +.MR grog @MAN1EXT@ +explain how to view +.I roff +documents. +. +. +.P +.MR groff @MAN7EXT@ +and +.MR groff_char @MAN7EXT@ +are comprehensive references covering the language elements of GNU +.I troff \" GNU +and the available glyph repertoire, +respectively. +. +. +.\" Clean up. +.rm TeX +. +.\" Restore compatibility mode (for, e.g., Solaris 10/11). +.cp \n[*groff_gpinyin_1_man_C] +.do rr *groff_gpinyin_1_man_C +. +. +.\" Local Variables: +.\" fill-column: 72 +.\" mode: nroff +.\" End: +.\" vim: set filetype=groff textwidth=72: diff --git a/contrib/gpinyin/gpinyin.am b/contrib/gpinyin/gpinyin.am new file mode 100644 index 0000000..78cc35a --- /dev/null +++ b/contrib/gpinyin/gpinyin.am @@ -0,0 +1,56 @@ +# Automake rules for 'gpinyin' (preprocessor for added Perl parts) + +# Copyright (C) 2014-2020 Free Software Foundation, Inc. +# Written by Bernd Warken . +# Moved to automake by Bertrand Garrigues + +# This file is part of 'gpinyin' which is part of 'groff'. + +# 'groff' is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# 'groff' is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +######################################################################## + +gpinyin_srcdir = $(top_srcdir)/contrib/gpinyin +bin_SCRIPTS += gpinyin +gpinyindir = $(gpinyin_dir) +man1_MANS += contrib/gpinyin/gpinyin.1 +EXTRA_DIST += \ + contrib/gpinyin/ChangeLog \ + contrib/gpinyin/gpinyin.1.man \ + contrib/gpinyin/gpinyin.pl + +gpinyin: contrib/gpinyin/gpinyin.pl $(SH_DEPS_SED_SCRIPT) + $(AM_V_GEN)sed -f "$(SH_DEPS_SED_SCRIPT)" \ + -e "s|[@]g[@]|$(g)|g" \ + -e "s|[@]BINDIR[@]|$(DESTDIR)$(bindir)|g" \ + -e "s|[@]gpinyin_dir[@]|$(DESTIR)$(gpinyin_dir)|" \ + -e "s|[@]VERSION[@]|$(VERSION)|g" \ + -e "$(SH_SCRIPT_SED_CMD)" \ + $(gpinyin_srcdir)/gpinyin.pl \ + >$@ \ + && chmod +x $@ + +uninstall_groffdirs: uninstall-gpinyin-hook +uninstall-gpinyin-hook: + if test -d $(DESTDIR)$(gpinyindir); then \ + rmdir $(DESTDIR)$(gpinyindir); \ + fi + + +# Local Variables: +# mode: makefile-automake +# fill-column: 72 +# End: +# vim: set autoindent filetype=automake textwidth=72: diff --git a/contrib/gpinyin/gpinyin.pl b/contrib/gpinyin/gpinyin.pl new file mode 100755 index 0000000..73b3034 --- /dev/null +++ b/contrib/gpinyin/gpinyin.pl @@ -0,0 +1,725 @@ +#! /usr/bin/env perl + +# gpinyin - European-like Chinese writing 'pinyin' into 'groff' + +# Copyright (C) 2014-2020 Free Software Foundation, Inc. + +# Written by Bernd Warken . + +my $version = '1.0.5'; +my $groff_version = '(groff @VERSION@) '; # with trailing space + +# This file is part of 'gpinyin', which is part of 'groff'. + +# 'groff' is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# 'groff' is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +# You can find a copy of the GNU General Public License in the internet +# at . + +######################################################################## + +use strict; +use warnings; +#use diagnostics; + +# temporary dir and files +use File::Temp qw/ tempfile tempdir /; + +# needed for temporary dir +use File::Spec; + +# for 'copy' and 'move' +use File::Copy; + +# for fileparse, dirname and basename +use File::Basename; + +# current working directory +use Cwd; + +# $Bin is the directory where this script is located +use FindBin; + + +######################################################################## +# system variables and exported variables +######################################################################## + +$\ = "\n"; # final part for print command + +{ + my $at = '@'; + $groff_version = '' if '@VERSION@' eq "${at}VERSION${at}"; +} + +######################################################################## +# All Pinyin syllables from wikipedia +######################################################################## + +my %syllables = + ( + 'a' => 1, 'ai' => 1, 'an' => 1, 'ang' => 1, 'ao' => 1, + 'ba' => 1, 'bai' => 1, 'ban' => 1, 'bang' => 1, 'bao' => 1, + 'bei' => 1, 'ben' => 1, 'beng' => 1, + 'bi' => 1, 'bian' => 1, 'biao' => 1, 'bie' => 1, 'bin' => 1, + 'bing' => 1, 'bo' => 1, 'bu' => 1, + 'ca' => 1, 'cai' => 1, 'can' => 1, 'cang' => 1, 'cao' => 1, + 'ce' => 1, 'cen' => 1, 'ceng' => 1, + 'cha' => 1, 'chai' => 1, 'chan' => 1, 'chang' => 1, 'chao' => 1, + 'che' => 1, 'chen' => 1, 'cheng' => 1, 'chi' => 1, + 'chong' => 1, 'chou' => 1, 'chu' => 1, + 'chua' => 1, 'chuai' => 1, 'chuan' => 1, 'chuang' => 1, + 'chui' => 1, 'chun' => 1, 'chuo' => 1, + 'ci' => 1, 'cong' => 1, 'cou' => 1, + 'cu' => 1, 'cuan' => 1, 'cui' => 1, 'cun' => 1, 'cuo' => 1, + 'da' => 1, 'dai' => 1, 'dan' => 1, 'dang' => 1, 'dao' => 1, + 'de' => 1, 'dei' => 1, 'den' => 1, 'deng' => 1, + 'di' => 1, 'dian' => 1, 'diao' => 1, 'die' => 1, + 'ding' => 1, 'diu' => 1, 'dong' => 1, 'dou' => 1, + 'du' => 1, 'duan' => 1, 'dui' => 1, 'dun' => 1, 'duo' => 1, + 'e' => 1, 'ei' => 1, 'en' => 1, 'eng' => 1, 'er' => 1, + 'fa' => 1, 'fan' => 1, 'fang' => 1, + 'fei' => 1, 'fen' => 1, 'feng' => 1, 'fiao' => 1, + 'fo' => 1, 'fou' => 1, 'fu' => 1, + 'ga' => 1, 'gai' => 1, 'gan' => 1, 'gang' => 1, 'gao' => 1, + 'ge' => 1, 'gei' => 1, 'gen' => 1, 'geng' => 1, + 'gong' => 1, 'gou' => 1, 'gu' => 1, + 'gua' => 1, 'guai' => 1, 'guan' => 1, 'guang' => 1, 'gui' => 1, + 'gun' => 1, 'guo' => 1, + 'ha' => 1, 'hai' => 1, 'han' => 1, 'hang' => 1, 'hao' => 1, + 'he' => 1, 'hei' => 1, 'hen' => 1, 'heng' => 1, + 'hong' => 1, 'hou' => 1, + 'hu' => 1, 'hua' => 1, 'huai' => 1, 'huan' => 1, 'huang' => 1, + 'hui' => 1, 'hun' => 1, 'huo' => 1, + 'ji' => 1, 'jia' => 1, 'jian' => 1, 'jiang' => 1, 'jiao' => 1, + 'jie' => 1, 'jin' => 1, 'jing' => 1, 'jiong' => 1, 'jiu' => 1, + 'ju' => 1, 'juan' => 1, 'jue' => 1, 'jun' => 1, + 'ka' => 1, 'kai' => 1, 'kan' => 1, 'kang' => 1, 'kao' => 1, + 'ke' => 1, 'kei' => 1, 'ken' => 1, 'keng' => 1, + 'kong' => 1, 'kou' => 1, 'ku' => 1, 'kua' => 1, 'kuai' => 1, + 'kuan' => 1, 'kuang' => 1, 'kui' => 1, 'kun' => 1, 'kuo' => 1, + 'la' => 1, 'lai' => 1, 'lan' => 1, 'lang' => 1, 'lao' => 1, + 'le' => 1, 'lei' => 1, 'leng' => 1, + 'li' => 1, 'lia' => 1, 'lian' => 1, 'liang' => 1, 'liao' => 1, + 'lie' => 1, 'lin' => 1, 'ling' => 1, 'liu' => 1, + 'lo' => 1, 'long' => 1, 'lou' => 1, + 'lu' => 1, 'luan' => 1, 'lun' => 1, 'luo' => 1, + 'lü' => 1, 'lüe' => 1, + 'ma' => 1, 'mai' => 1, 'man' => 1, 'mang' => 1, 'mao' => 1, + 'me' => 1, 'mei' => 1, 'men' => 1, 'meng' => 1, + 'mi' => 1, 'mian' => 1, 'miao' => 1, 'mie' => 1, + 'min' => 1, 'ming' => 1, 'miu' => 1, + 'mo' => 1, 'mou' => 1, 'mu' => 1, + 'na' => 1, 'nai' => 1, 'nan' => 1, 'nang' => 1, 'nao' => 1, + 'ne' => 1, 'nei' => 1, 'nen' => 1, 'neng' => 1, + 'ni' => 1, 'nian' => 1, 'niang' => 1, 'niao' => 1, 'nie' => 1, + 'nin' => 1, 'ning' => 1, 'niu' => 1, 'nong' => 1, 'nou' => 1, + 'nu' => 1, 'nuan' => 1, 'nun' => 1, 'nuo' => 1, + 'nü' => 1, 'nüe' => 1, + 'o' => 1, 'ong' => 1, 'ou' => 1, + 'pa' => 1, 'pai' => 1, 'pan' => 1, 'pang' => 1, 'pao' => 1, + 'pei' => 1, 'pen' => 1, 'peng' => 1, + 'pi' => 1, 'pian' => 1, 'piao' => 1, 'pie' => 1, 'pin' => 1, + 'ping' => 1, 'po' => 1, 'pou' => 1, 'pu' => 1, + 'qi' => 1, 'qia' => 1, 'qian' => 1, 'qiang' => 1, 'qiao' => 1, 'qie' => 1, + 'qin' => 1, 'qing' => 1, 'qiong' => 1, 'qiu' => 1, + 'qu' => 1, 'quan' => 1, 'que' => 1, 'qun' => 1, + 'ran' => 1, 'rang' => 1, 'rao' => 1, 're' => 1, 'ren' => 1, + 'ri' => 1, 'rong' => 1, 'rou' => 1, + 'ru' => 1, 'ruan' => 1, 'rui' => 1, 'run' => 1, 'ruo' => 1, + 'sa' => 1, 'sai' => 1, 'san' => 1, 'sang' => 1, 'sao' => 1, + 'se' => 1, 'sen' => 1, 'seng' => 1, + 'sha' => 1, 'shai' => 1, 'shan' => 1, 'shang' => 1, 'shao' => 1, + 'she' => 1, 'shei' => 1, 'shen' => 1, 'sheng' => 1, 'shi' => 1, + 'shou' => 1, 'shu' => 1, 'shua' => 1, 'shuai' => 1, 'shuan' => 1, + 'shuang' => 1, 'shui' => 1, 'shun' => 1, 'shuo' => 1, + 'si' => 1, 'song' => 1, 'sou' => 1, 'su' => 1, 'suan' => 1, 'sui' => 1, + 'sun' => 1, 'suo' => 1, + 'ta' => 1, 'tai' => 1, 'tan' => 1, 'tang' => 1, 'tao' => 1, + 'te' => 1, 'teng' => 1, + 'ti' => 1, 'tian' => 1, 'tiao' => 1, 'tie' => 1, 'ting' => 1, + 'tong' => 1, 'tou' => 1, + 'tu' => 1, 'tuan' => 1, 'tui' => 1, 'tun' => 1, 'tuo' => 1, + 'wa' => 1, 'wai' => 1, 'wan' => 1, 'wang' => 1, + 'wei' => 1, 'wen' => 1, 'weng' => 1, 'wo' => 1, 'wu' => 1, + 'xi' => 1, 'xia' => 1, 'xian' => 1, 'xiang' => 1, 'xiao' => 1, + 'xie' => 1, 'xin' => 1, 'xing' => 1, 'xiong' => 1, 'xiu' => 1, + 'xu' => 1, 'xuan' => 1, 'xue' => 1, 'xun' => 1, + 'ya' => 1, 'yai' => 1, 'yan' => 1, 'yang' => 1, 'yao' => 1, + 'ye' => 1, 'yi' => 1, 'yin' => 1, 'ying' => 1, + 'yo' => 1, 'yong' => 1, 'you' => 1, + 'yu' => 1, 'yuan' => 1, 'yue' => 1, 'yun' => 1, + 'za' => 1, 'zai' => 1, 'zan' => 1, 'zang' => 1, 'zao' => 1, + 'ze' => 1, 'zei' => 1, 'zen' => 1, 'zeng' => 1, + 'zha' => 1, 'zhai' => 1, 'zhan' => 1, 'zhang' => 1, 'zhao' => 1, + 'zhe' => 1, 'zhei' => 1, 'zhen' => 1, 'zheng' => 1, 'zhi' => 1, + 'zhong' => 1, 'zhou' => 1, 'zhu' => 1, 'zhua' => 1, 'zhuai' => 1, + 'zhuan' => 1, 'zhuang' => 1, 'zhui' => 1, 'zhun' => 1, 'zhuo' => 1, + 'zi' => 1, 'zong' => 1, 'zou' => 1, 'zu' => 1, 'zuan' => 1, + 'zui' => 1, 'zun' => 1, 'zuo' => 1, + ); + +######################################################################## +# Unicode variables for utf8 tty (nroff) +######################################################################## + +my %tones1_Unicode = + ( + 'A' => q(\\[u0100]), + 'E' => q(\\[u0112]), + 'I' => q(\\[u012A]), + 'O' => q(\\[u014C]), + 'U' => q(\\[u016A]), + 'Ü' => q(\\[u01D5]), + 'a' => q(\\[u0101]), + 'e' => q(\\[u0113]), + 'i' => q(\\[u012B]), + 'o' => q(\\[u014D]), + 'u' => q(\\[u016B]), + 'ü' => q(\\[u01D6]), + ); + +my %tones2_Unicode = + ( + 'A' => q(\\[u00C1]), + 'E' => q(\\[u00C9]), + 'I' => q(\\[u00CD]), + 'O' => q(\\[u00D3]), + 'U' => q(\\[u00DA]), + 'Ü' => q(\\[u01D7]), + 'a' => q(\\[u00E1]), + 'e' => q(\\[u00E9]), + 'i' => q(\\[u00ED]), + 'o' => q(\\[u00F3]), + 'u' => q(\\[u00FA]), + 'ü' => q(\\[u01D8]), + ); + +my %tones3_Unicode = + ( + 'A' => q(\\[u01CD]), + 'E' => q(\\[u011A]), + 'I' => q(\\[u01CF]), + 'O' => q(\\[u01D1]), + 'U' => q(\\[u01D3]), + 'Ü' => q(\\[u01D9]), + 'a' => q(\\[u01CE]), + 'e' => q(\\[u011B]), + 'i' => q(\\[u01D0]), + 'o' => q(\\[u01D2]), + 'u' => q(\\[u01D4]), + 'ü' => q(\\[u01DA]), + ); + +my %tones4_Unicode = + ( + 'A' => q(\\[u00C0]), + 'E' => q(\\[u00C8]), + 'I' => q(\\[u00CC]), + 'O' => q(\\[u00D2]), + 'U' => q(\\[u00D9]), + 'Ü' => q(\\[u01DB]), + 'a' => q(\\[u00E0]), + 'e' => q(\\[u00E8]), + 'i' => q(\\[u00EC]), + 'o' => q(\\[u00F2]), + 'u' => q(\\[u00F9]), + 'ü' => q(\\[u01DC]), + ); + + +######################################################################## +# glyph variables for troff +######################################################################## + +#my $tone1_macron = '\\[a-]'; +#my $tone2_acute = '\\[aa]'; +#my $tone3_caron = '\\[ah]'; +#my $tone4_grave = '\\[ga]'; +my @accents = ( '', '\\[a-]', '\\[aa]', '\\[ah]', '\\[ga]', ); + +my %tones2_glyphs = + ( + 'A' => q(\\['A]), + 'E' => q(\\['E]), + 'I' => q(\\['I]), + 'O' => q(\\['O]), + 'U' => q(\\['U]), + 'a' => q(\\['a]), + 'e' => q(\\['e]), + '\\[.i]' => q(\\['i]), + 'o' => q(\\['o]), + 'u' => q(\\['u]), + ); + +my %tones4_glyphs = + ( + 'A' => q(\\[`A]), + 'E' => q(\\[`E]), + 'I' => q(\\[`I]), + 'O' => q(\\[`O]), + 'U' => q(\\[`U]), + 'a' => q(\\[`a]), + 'e' => q(\\[`e]), + '\\[.i]' => q(\\[`i]), + 'o' => q(\\[`o]), + 'u' => q(\\[`u]), + ); + + + +######################################################################## +# subs +######################################################################## + +# Pinyin consists of syllables with a final number to be translated +# into an accent. Such numbered syllables are combined into words. +# Such words can have a final punctuation. A line is a collection of +# such words. + +my @roffs = ( 'n', + 't', + ); + +######################################################################## +sub err { + my $s = shift; + print STDERR $s; + 1; +} # err() + + +######################################################################## +sub handle_line { + my $starting_blanks = shift; + my $line = shift; + +#&err('handle_line start: ' . $line); + + my %outline = ( 'n' => $starting_blanks, 't' => $starting_blanks, ); + + # transform to Ü only for inside of Perl + $line =~ s/\\ + \(:U + /Ü/gx; + $line =~ s/\\ + \[:U\] + /Ü/gx; + +# handle_line() + + # transform to ü only for inside of Perl + $line =~ s/\\ + \(:u + /ü/gx; + $line =~ s/\\ + \[:u\] + /ü/gx; + + $line =~ s/U[eE]/Ü/g; + $line =~ s/u[eE]/ü/g; + + $line =~ s/\\\(aq/'/g; # \(aq is an apostrophe + $line =~ s/\\\[aq\]/'/g; # \[aq] is an apostrophe + $line =~ s/^[']//; # remove leading apostrophe + $line =~ s/[']$//; # remove final apostrophe + $line =~ s/['][']+/'/g; # combine apostrophe groups + $line =~ s/([0-4])'/$1/; + $line =~ s/([^0-4])'/${1}0/; + + my @words = split /\s+/, $line; + + +# handle_line() + for my $word ( @words ) { +#&err('handle_line word: ' . $word); + + next unless ( $word ); + + # this is a word, maybe composed of several syllables + my $punctuation = $1 if ( $word =~ s/([,.?!:;]*)$// ); + # '$word' is now without punctuation + + my %outword = &handle_word($word); + next unless ( %outword ); + + for my $roff ( @roffs ) { +#&err('handle_line roff ' . $roff . ': ' . $outword{$roff}); + + # combine words to line + next unless ( $outword{$roff} ); + + # non-initial space + $outline{$roff} .= ' ' if ( $outline{$roff} ); + + $outline{$roff} .= $outword{$roff}; + $outline{$roff} .= $punctuation; + } + } +#for my $roff ( @roffs ) { +#&err('handle_line end ' . $roff . ': ' . $outline{$roff}); +#} + return %outline; +} # handle_line() + + +######################################################################## +sub handle_word { + my $word = shift; +#&err('handle_word start: ' . $word); + + $word =~ s/5/0/g; # transform 5 to 0 + $word =~ s/([^0-4])$/${1}0/; # add lacking final no-tone + + # remove apostrophes with tone + $word =~ s/ + ([0-4]) + ['] + /$1/gx; + # replace apostrophes without tone by 0 + $word =~ s/ + ([^0-4]) + ['] + /${1}0/gx; + +# handle_word() + # detect wrong tone numbers + if ( $word =~ s/[5-9]/0/g ) { + &err('word ' . $word . ': wrong tone number ' . $1); + return {}; + } + + $word =~ s/[']//g; # remove apostrophes + + # remove starting apostrophe or number + $word =~ s/^(['0-4])+//; + + # add 0 for final no-tone + $word .= '0' if ( $word =~ /[^0-4]$/ ); + + if ( $word =~ /^[0-9]/ ) { # word starts with number + print 'word: ' . $word . ' starts with tone number'; + $word =~ s/^[0-9]+//; + } +#&err('handle_word 0: ' . $word); + +# handle_word() + + my %outword = ( 'n' => '', 't' => '', ); + + # split word into syllables + while ( $word =~ /^[a-zA-ZüÜ']/ ) { + $word =~ s/^([a-zA-ZüÜ']+)([0-4])//; + my $syll = $1; + my $tone = $2; +#err('handle_word split: ' . $syll . ' ' . $tone); + my %outsyll = &handle_syll( $syll, $tone ); + next unless ( %outsyll ); + for my $roff ( @roffs ) { + my $out = $outsyll{$roff}; + $out = '\\[cq]' . $out if ( $out && $out =~ /^[aeo]/ ); + $outword{$roff} .= $out; +#&err('handle_word ' . $roff . ': ' . $outword{$roff}); + } + } + return %outword; +} # handle_word() + + +######################################################################## +sub handle_syll { + my $syll = shift; + my $tone = shift; +#&err( 'handle_syll start: ' . $syll . ' ' . $tone); + + my $lower_case = lc($syll); + $lower_case =~ s/Ü/ü/g; + unless ( exists($syllables{$lower_case}) ) { + err('The syllable ' . $syll . ' is not a Chinese syllable.'); + return {}; + } + + my %outsyll = ( 'n' => '', 't' => '', ); + + if ( $tone == 0 ) { # no accent + # use u umlaut without accent + $syll =~ s/Ü/\\[:U]/g; + $syll =~ s/ü/\\[:u]/g; + + for my $roff ( @roffs ) { + $outsyll{$roff} = $syll; +#&err('handle_syll 0 outsyll ' . $roff . ': ' . $outsyll{$roff}); + } + return %outsyll; + } # end of tone 0 + +# handle_syll() + + # split syllable + $syll =~ + /^ + ([a-zA-Z]*) + ([aeiouAEIOUüÜ]+) + ([a-zA-Z]*) + $/x; + my $initial = $1; + my $vowels = $2; + my $final = $3; + unless ( $vowels ) { + &err( 'Syllable ' . $syll . ' does not have vowels' ); + return {}; + } + + # split vowels + my $vowels_before = ''; + my $vowel = ''; + my $vowels_after = ''; + +# handle_syll() + + # find vowel for accent + if ( $vowels =~ /^[aeiouAEIOU]$/ ) { # only 1 vowel +#&err('handle_syll single vowel ' . $vowels); + $vowel = $vowels; + } elsif ( $vowels eq 'ü' ) { + $vowel = $vowels; + } elsif ( $vowels eq 'Ü' ) { + $vowel = $vowels; + } elsif ( $vowels =~ /^([^aeAE]*)([aeAE])(.*)$/ ) { # a, A, e or E + $vowels_before = $1; + $vowel = $2; + $vowels_after = $3; + } elsif ( $vowels =~ /^([^oO]*)(oO)(.*)$/ ) { # o or O + $vowels_before = $1; + $vowel = $2; + $vowels_after = $3; + } elsif ( $vowels =~ /^(\w)(\w)(.*)$/ ) { # take 2nd vowel + $vowels_before = $1; + $vowel = $2; + $vowels_after = $3; + } else { + &err( 'Unknown vowels: ' . $vowels . ' in syllable: ' . $syll ); + return {}; + } + +# unless ( $vowel =~ /^[aeiouAEIOU]$/ ) { +# print STDERR q(The argument ') . $vowel . q(' is not a vowel!); +# return {}; +# } + +# handle_syll() + + $outsyll{'n'} = &vowel_n($vowel, $tone); + $outsyll{'t'} = &vowel_t($vowel, $tone); + + for my $roff ( @roffs ) { + $outsyll{$roff} = $initial . $vowels_before . + $outsyll{$roff} . $vowels_after . $final; +#&err('handle_syll out ' . $roff . ': ' . $outsyll{$roff}); + } + + return %outsyll; +} # handle_syll() + + +######################################################################## +sub vowel_n { # Unicode for nroff + my $vowel = shift; + my $tone = shift; +#&err('vowel_n: ' . $vowel . ' ' . $tone); + + return '' unless ( $vowel ); + + my $vowel_with_tone; + if ( $tone == 1 ) { # macron + $vowel_with_tone = $tones1_Unicode{$vowel}; + } elsif ( $tone == 2 ) { # acute + $vowel_with_tone = $tones2_Unicode{$vowel}; + } elsif ( $tone == 3 ) { # caron + $vowel_with_tone = $tones3_Unicode{$vowel}; + } elsif ( $tone == 4 ) { # grave + $vowel_with_tone = $tones4_Unicode{$vowel}; + } + $vowel_with_tone or warn "failed to apply tone $tone to vowel $vowel"; + return $vowel_with_tone; +} # vowel_nr() + + +######################################################################## +sub vowel_t { # named glyphs for troff + my $vowel = shift; + my $tone = shift; +#&err( 'vowel_t: ' . $vowel . ' ' . $tone); + + return '' unless ( $vowel ); + + # \o'\s-2\[:u]\s0\[a-]' + if ( $vowel =~ /[üÜ]/ ) { + my ($ue, $smaller); + if ($vowel eq 'ü') { + $ue = q(\\[:u]); + $smaller = 2; + } else { + $ue = q(\\[:U]); + $smaller = 4; + } + $vowel = q(\\o'\\s-) . $smaller . $ue . q(\\s0) . + $accents[$tone] . q('); + return $vowel; + } + + $vowel = q(\\[.i]) if ( $vowel eq 'i' ); + + my $vowel_with_tone; + if ( $tone == 1 ) { # macron + $vowel_with_tone = q(\\o') . $vowel . $accents[$tone] . q('); + } elsif ( $tone == 2 ) { # acute + $vowel_with_tone = $tones2_glyphs{$vowel}; + } elsif ( $tone == 3 ) { # caron + $vowel_with_tone = q(\\o') . $vowel . $accents[$tone] . q('); + } elsif ( $tone == 4 ) { # grave + $vowel_with_tone = $tones4_glyphs{$vowel}; + } + $vowel_with_tone or warn "failed to apply tone $tone to vowel $vowel"; + return $vowel_with_tone; +} # vowel_t() + + +######################################################################## +sub finish_pinyin_mode { +#&err( 'finish' ); + my $n = shift; + my $t = shift; + push @$n, '\\}'; + push @$t, '\\}'; + + for ( @$n ) { # Unicode for nroff + print; + } + + for ( @$t ) { # glyphs for troff + print; + } + + 1; +} # finish_pinyin_mode() + + +######################################################################## +# options +######################################################################## + +foreach (@ARGV) { + if ( /^(-h|--h|--he|--hel|--help)$/ ) { + print q(Usage for the 'gpinyin' program:); + print 'gpinyin [-] [--] [filespec...] normal file name arguments'; + print 'gpinyin [-h|--help] gives usage information'; + print 'gpinyin [-v|--version] displays the version number'; + print q(This program is a 'groff' preprocessor that handles ) . + q(pinyin parts in 'roff' files.); + exit; + } elsif (/^(-v|--v|--ve|--ver|--vers|--versi|--versio|--version)$/) { + print "gpinyin ${groff_version}version $version"; + exit; + } +} + + +######################################################################## +# input +######################################################################## + +my $pinyin_mode = 0; # not in Pinyin mode + +my @output_n = # nroff + ( + '.ie n \\{\\', + ); + +my @output_t = # troff + ( + '.el \\{\\', + ); + +unshift @ARGV, '-' unless @ARGV; +foreach my $filename (@ARGV) { + my $input; + if ($filename eq '-') { + $input = \*STDIN; + } elsif (not open $input, '<', $filename) { + warn $!; + next; + } + while (<$input>) { + chomp; + s/\s+$//; # remove final spaces +# &err('gpinyin: ' . $_); + + my $line = $_; # with starting blanks + + # .pinyin start or begin line + if ( $line =~ /^[.']\s*pinyin\s+(start|begin)$/ ) { + if ( $pinyin_mode ) { + # '.pinyin' was started twice, ignore + &err( q['.pinyin' starter was run several times] ); + } else { # new pinyin start + $pinyin_mode = 1; + } + next; + } + + # .pinyin stop or end line + if ( $line =~ /^[.']\s*pinyin\s+(stop|end)$/ ) { + if ( $pinyin_mode ) { # normal stop + $pinyin_mode = 0; + &finish_pinyin_mode( \@output_n, \@output_t ); + } else { # ignore + &err( 'gpinyin: there was a .pinyin stop without ' . + 'being in pinyin mode' ); + } + next; + } + + # now not a .pinyin line + + + if ( $pinyin_mode ) { # within Pinyin + my $starting_blanks = ''; + $starting_blanks = $1 if ( s/^(s+)// ); # handle starting spaces + + my %outline = &handle_line($starting_blanks, $line); +# &err('gpinyin outline n: ' . $outline{'n'} ); +# &err('gpinyin outline t: ' . $outline{'t'} ); + push @output_n, $outline{'n'}; + push @output_t, $outline{'t'}; + } else { # normal roff line, not within Pinyin + print $line; + } + next; + } # end of input line +} + + +######################################################################## +# end of file without stopping 'pinyin' mode +if ( $pinyin_mode ) { + &finish_pinyin_mode( \@output_n, \@output_t ); +} + +######################################################################## + + +1; +# Local Variables: +# fill-column: 72 +# mode: CPerl +# End: +# vim: set autoindent textwidth=72: -- cgit v1.2.3