diff options
Diffstat (limited to 'src/utils/grog/grog.pl')
-rw-r--r-- | src/utils/grog/grog.pl | 721 |
1 files changed, 721 insertions, 0 deletions
diff --git a/src/utils/grog/grog.pl b/src/utils/grog/grog.pl new file mode 100644 index 0000000..28973c5 --- /dev/null +++ b/src/utils/grog/grog.pl @@ -0,0 +1,721 @@ +#!@PERL@ +# grog - guess options for groff command +# Inspired by doctype script in Kernighan & Pike, Unix Programming +# Environment, pp 306-8. + +# Copyright (C) 1993-2021 Free Software Foundation, Inc. +# Written by James Clark. +# Rewritten in Perl by Bernd Warken <groff-bernd.warken-72@web.de>. +# Hacked up by G. Branden Robinson, 2021. + +# This file is part of 'grog', which is part of 'groff'. + +# 'groff' is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# 'groff' is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# <http://www.gnu.org/licenses/gpl-2.0.html>. + +use warnings; +use strict; + +use File::Spec; + +my $groff_version = 'DEVELOPMENT'; + +my @command = (); # the constructed groff command +my @requested_package = (); # arguments to '-m' grog options +my @inferred_preprocessor = (); # preprocessors the document uses +my @inferred_main_package = (); # full-service package(s) detected +my $main_package; # full-service package we go with +my $do_run = 0; # run generated 'groff' command +my $use_compatibility_mode = 0; # is -C being passed to groff? + +my %preprocessor_for_macro = ( + 'EQ', 'eqn', + 'G1', 'grap', + 'GS', 'grn', + 'PS', 'pic', + '[', 'refer', + #'so', 'soelim', # Can't be inferred this way; see grog man page. + 'TS', 'tbl', + 'cstart', 'chem', + 'lilypond', 'glilypond', + 'Perl', 'gperl', + 'pinyin', 'gpinyin', +); + +my $program_name = $0; +{ + my ($v, $d, $f) = File::Spec->splitpath($program_name); + $program_name = $f; +} + +my %user_macro; +my %score = (); + +my @input_file; + +# .TH is both a man(7) macro and often used with tbl(1). We expect to +# find .TH in ms(7) documents only between .TS and .TE calls, and in +# man(7) documents only as the first macro call. +my $have_seen_first_macro_call = 0; +# man(7) and ms(7) use many of the same macro names; do extra checking. +my $man_score = 0; +my $ms_score = 0; + +my $had_inference_problem = 0; +my $had_processing_problem = 0; +my $have_any_valid_arguments = 0; + + +sub fail { + my $text = shift; + print STDERR "$program_name: error: $text\n"; + $had_processing_problem = 1; +} + + +sub warn { + my $text = shift; + print STDERR "$program_name: warning: $text\n"; +} + + +sub process_arguments { + my $no_more_options = 0; + my $delayed_option = ''; + my $was_minus = 0; + my $optarg = 0; + my $pdf_with_ligatures = 0; + + foreach my $arg (@ARGV) { + if ( $optarg ) { + push @command, $arg; + $optarg = 0; + next; + } + + if ($no_more_options) { + push @input_file, $arg; + next; + } + + if ($delayed_option) { + if ($delayed_option eq '-m') { + push @requested_package, $arg; + $arg = ''; + } else { + push @command, $delayed_option; + } + + push @command, $arg if $arg; + $delayed_option = ''; + next; + } + + unless ( $arg =~ /^-/ ) { # file name, no opt, no optarg + push @input_file, $arg; + next; + } + + # now $arg starts with '-' + + if ($arg eq '-') { + unless ($was_minus) { + push @input_file, $arg; + $was_minus = 1; + } + next; + } + + if ($arg eq '--') { + $no_more_options = 1; + next; + } + + # Handle options that cause an early exit. + &version() if ($arg eq '-v' || $arg eq '--version'); + &usage(0) if ($arg eq '-h' || $arg eq '--help'); + + if ($arg =~ '^--.') { + if ($arg =~ '^--(run|with-ligatures)$') { + $do_run = 1 if ($arg eq '--run'); + $pdf_with_ligatures = 1 if ($arg eq '--with-ligatures'); + } else { + &fail("unrecognized grog option '$arg'; ignored"); + &usage(1); + } + next; + } + + # Handle groff options that take an argument. + + # Handle the option argument being separated by whitespace. + if ($arg =~ /^-[dfFIKLmMnoPrTwW]$/) { + $delayed_option = $arg; + next; + } + + # Handle '-m' option without subsequent whitespace. + if ($arg =~ /^-m/) { + my $package = $arg; + $package =~ s/-m//; + push @requested_package, $package; + next; + } + + # Treat anything else as (possibly clustered) groff options that + # take no arguments. + + # Our do_line() needs to know if it should do compatibility parsing. + $use_compatibility_mode = 1 if ($arg =~ /C/); + + push @command, $arg; + } + + if ($pdf_with_ligatures) { + push @command, '-P-y'; + push @command, '-PU'; + } + + @input_file = ('-') unless (@input_file); +} # process_arguments() + + +sub process_input { + foreach my $file (@input_file) { + unless ( open(FILE, $file eq "-" ? $file : "< $file") ) { + &fail("cannot open '$file': $!"); + next; + } + + $have_any_valid_arguments = 1; + + while (my $line = <FILE>) { + chomp $line; + &do_line($line); + } + + close(FILE); + } # end foreach +} # process_input() + + +# Push item onto inferred full-service list only if not already present. +sub push_main_package { + my $pkg = shift; + if (!grep(/^$pkg/, @inferred_main_package)) { + push @inferred_main_package, $pkg; + } +} # push_main_package() + + +sub do_line { + my $command; # request or macro name + my $args; # request or macro arguments + + my $line = shift; + + # Check for a Perl Pod::Man comment. + # + # An alternative to this kludge is noted below: if a "standard" macro + # is redefined, we could delete it from the relevant lists and + # hashes. + if ($line =~ /\\\" Automatically generated by Pod::Man/) { + $man_score += 100; + } + + # Strip comments. + $line =~ s/\\".*//; + $line =~ s/\\#.*// unless $use_compatibility_mode; + + return unless ($line =~ /^[.']/); # Ignore text lines. + + # Perform preprocessor checks; they scan their inputs using a rump + # interpretation of roff(7) syntax that requires the default control + # character and no space between it and the macro name. In AT&T + # compatibility mode, no space (or newline!) is required after the + # macro name, either. We mimic the preprocessors themselves; eqn(1), + # for instance, does not recognize '.EN' if '.EQ' has not been seen. + my $boundary = '\\b'; + $boundary = '' if ($use_compatibility_mode); + + if ($line =~ /^\.(\S\S)$boundary/ || $line =~ /^\.(\[)/) { + my $macro = $1; + # groff identifiers can have extremely weird characters in them. + # The ones we care about are conventionally named, but me(7) + # documents can call macros like '+c', so quote carefully. + if (grep(/^\Q$macro\E$/, keys %preprocessor_for_macro)) { + my $preproc = $preprocessor_for_macro{$macro}; + if (!grep(/$preproc/, @inferred_preprocessor)) { + push @inferred_preprocessor, $preproc; + } + } + } + + # Normalize control lines; convert no-break control character to the + # regular one and remove unnecessary whitespace. + $line =~ s/^['.]\s*/./; + $line =~ s/\s+$//; + + return if ($line =~ /^\.$/); # Ignore empty request. + return if ($line =~ /^\.\\?\.$/); # Ignore macro definition ends. + + # Split control line into a request or macro call and its arguments. + + # Handle single-letter macro names. + if ($line =~ /^\.(\S)(\s+(.*))?$/) { + $command = $1; + $args = $2; + # Handle two-letter macro/request names in compatibility mode. + } elsif ($use_compatibility_mode) { + $line =~ /^\.(\S\S)\s*(.*)$/; + $command = $1; + $args = $2; + # Handle multi-letter macro/request names in groff mode. + } else { + $line =~ /^\.(\S+)(\s+(.*))?$/; + $command = $1; + $args = $3; + } + + $command = '' unless ($command); + $args = '' unless ($args); + + ###################################################################### + # user-defined macros + + # If the line calls a user-defined macro, skip it. + return if (exists $user_macro{$command}); + + # These are all requests supported by groff 1.23.0. + my @request = ('ab', 'ad', 'af', 'aln', 'als', 'am', 'am1', 'ami', + 'ami1', 'as', 'as1', 'asciify', 'backtrace', 'bd', + 'blm', 'box', 'boxa', 'bp', 'br', 'brp', 'break', 'c2', + 'cc', 'ce', 'cf', 'cflags', 'ch', 'char', 'chop', + 'class', 'close', 'color', 'composite', 'continue', + 'cp', 'cs', 'cu', 'da', 'de', 'de1', 'defcolor', 'dei', + 'dei1', 'device', 'devicem', 'di', 'do', 'ds', 'ds1', + 'dt', 'ec', 'ecr', 'ecs', 'el', 'em', 'eo', 'ev', + 'evc', 'ex', 'fam', 'fc', 'fchar', 'fcolor', 'fi', + 'fp', 'fschar', 'fspecial', 'ft', 'ftr', 'fzoom', + 'gcolor', 'hc', 'hcode', 'hla', 'hlm', 'hpf', 'hpfa', + 'hpfcode', 'hw', 'hy', 'hym', 'hys', 'ie', 'if', 'ig', + 'in', 'it', 'itc', 'kern', 'lc', 'length', 'linetabs', + 'lf', 'lg', 'll', 'lsm', 'ls', 'lt', 'mc', 'mk', 'mso', + 'msoquiet', 'na', 'ne', 'nf', 'nh', 'nm', 'nn', 'nop', + 'nr', 'nroff', 'ns', 'nx', 'open', 'opena', 'os', + 'output', 'pc', 'pev', 'pi', 'pl', 'pm', 'pn', 'pnr', + 'po', 'ps', 'psbb', 'pso', 'ptr', 'pvs', 'rchar', 'rd', + 'return', 'rfschar', 'rj', 'rm', 'rn', 'rnn', 'rr', + 'rs', 'rt', 'schar', 'shc', 'shift', 'sizes', 'so', + 'soquiet', 'sp', 'special', 'spreadwarn', 'ss', + 'stringdown', 'stringup', 'sty', 'substring', 'sv', + 'sy', 'ta', 'tc', 'ti', 'tkf', 'tl', 'tm', 'tm1', + 'tmc', 'tr', 'trf', 'trin', 'trnt', 'troff', 'uf', + 'ul', 'unformat', 'vpt', 'vs', 'warn', 'warnscale', + 'wh', 'while', 'write', 'writec', 'writem'); + + # Add user-defined macro names to %user_macro. + # + # Macros can also be defined with .dei{,1}, ami{,1}, but supporting + # that would be a heavy lift for the benefit of users that probably + # don't require grog's help. --GBR + if ($command =~ /^(de|am)1?$/) { + my $name = $args; + # Strip off any end macro. + $name =~ s/\s+.*$//; + # Handle special cases of macros starting with '[' or ']'. + if ($name =~ /^[][]/) { + delete $preprocessor_for_macro{'['}; + } + # XXX: If the macro name shadows a standard macro name, maybe we + # should delete the latter from our lists and hashes. This might + # depend on whether the document is trying to remain compatible + # with an existing interface, or simply colliding with names they + # don't care about (consider a raw roff document that defines 'PP'). + # --GBR + $user_macro{$name} = 0 unless (exists $user_macro{$name}); + return; + } + + # XXX: Handle .rm as well? + + # Ignore all other requests. Again, macro names can contain Perl + # regex metacharacters, so be careful. + return if (grep(/^\Q$command\E$/, @request)); + # What remains must be a macro name. + my $macro = $command; + + $have_seen_first_macro_call = 1; + $score{$macro}++; + + + ###################################################################### + # macro package (tmac) + ###################################################################### + + # man and ms share too many macro names for the following approach to + # be fruitful for many documents; see &infer_man_or_ms_package. + # + # We can put one thumb on the scale, however. + if ((!$have_seen_first_macro_call) && ($macro eq 'TH')) { + # TH as the first call in a document screams man(7). + $man_score += 100; + } + + ########## + # mdoc + if ($macro =~ /^Dd$/) { + &push_main_package('doc'); + return; + } + + ########## + # old mdoc + if ($macro =~ /^(Tp|Dp|De|Cx|Cl)$/) { + &push_main_package('doc-old'); + return; + } + + ########## + # me + + if ($macro =~ /^( + [ilnp]p| + n[12]| + sh + )$/x) { + &push_main_package('e'); + return; + } + + + ############# + # mm and mmse + + if ($macro =~ /^( + H| + MULB| + LO| + LT| + NCOL| + PH| + SA + )$/x) { + if ($macro =~ /^LO$/) { + if ( $args =~ /^(DNAMN|MDAT|BIL|KOMP|DBET|BET|SIDOR)/ ) { + &push_main_package('mse'); + return; + } + } elsif ($macro =~ /^LT$/) { + if ( $args =~ /^(SVV|SVH)/ ) { + &push_main_package('mse'); + return; + } + } + &push_main_package('m'); + return; + } + + ########## + # mom + + if ($macro =~ /^( + ALD| + AUTHOR| + CHAPTER_TITLE| + CHAPTER| + COLLATE| + DOCHEADER| + DOCTITLE| + DOCTYPE| + DOC_COVER| + FAMILY| + FAM| + FT| + LEFT| + LL| + LS| + NEWPAGE| + NO_TOC_ENTRY| + PAGENUMBER| + PAGE| + PAGINATION| + PAPER| + PRINTSTYLE| + PT_SIZE| + START| + TITLE| + TOC_AFTER_HERE + TOC| + T_MARGIN| + )$/x) { + &push_main_package('om'); + return; + } +} # do_line() + +my @preprocessor = (); + + +sub infer_preprocessors { + my %option_for_preprocessor = ( + 'eqn', '-e', + 'grap', '-G', + 'grn', '-g', + 'pic', '-p', + 'refer', '-R', + #'soelim', '-s', # Can't be inferred this way; see grog man page. + 'tbl', '-t', + 'chem', '-j' + ); + + # Use a temporary list we can sort later. We want the options to show + # up in a stable order for testing purposes instead of the order their + # macros turn up in the input. groff doesn't care about the order. + my @opt = (); + + foreach my $preproc (@inferred_preprocessor) { + my $preproc_option = $option_for_preprocessor{$preproc}; + + if ($preproc_option) { + push @opt, $preproc_option; + } else { + push @preprocessor, $preproc; + } + } + push @command, sort @opt; +} # infer_preprocessors() + + +# Return true (1) if either the man or ms package is inferred. +sub infer_man_or_ms_package { + my @macro_ms = ('RP', 'TL', 'AU', 'AI', 'DA', 'ND', 'AB', 'AE', + 'QP', 'QS', 'QE', 'XP', + 'NH', + 'R', + 'CW', + 'BX', 'UL', 'LG', 'NL', + 'KS', 'KF', 'KE', 'B1', 'B2', + 'DS', 'DE', 'LD', 'ID', 'BD', 'CD', 'RD', + 'FS', 'FE', + 'OH', 'OF', 'EH', 'EF', 'P1', + 'TA', '1C', '2C', 'MC', + 'XS', 'XE', 'XA', 'TC', 'PX', + 'IX', 'SG'); + + my @macro_man = ('BR', 'IB', 'IR', 'RB', 'RI', 'P', 'TH', 'TP', 'SS', + 'HP', 'PD', + 'AT', 'UC', + 'SB', + 'EE', 'EX', + 'OP', + 'MT', 'ME', 'SY', 'YS', 'TQ', 'UR', 'UE'); + + my @macro_man_or_ms = ('B', 'I', 'BI', + 'DT', + 'RS', 'RE', + 'SH', + 'SM', + 'IP', 'LP', 'PP'); + + for my $key (@macro_man_or_ms, @macro_man, @macro_ms) { + $score{$key} = 0 unless exists $score{$key}; + } + + # Compute a score for each package by counting occurrences of their + # characteristic macros. + foreach my $key (@macro_man_or_ms) { + $man_score += $score{$key}; + $ms_score += $score{$key}; + } + + foreach my $key (@macro_man) { + $man_score += $score{$key}; + } + + foreach my $key (@macro_ms) { + $ms_score += $score{$key}; + } + + if (!$ms_score && !$man_score) { + # The input may be a "raw" roff document; this is not a problem, + # but it does mean no package was inferred. + return 0; + } elsif ($ms_score == $man_score) { + # If there was no TH call, it's not a (valid) man(7) document. + if (!$score{'TH'}) { + &push_main_package('s'); + } else { + &warn("document ambiguous; disambiguate with -man or -ms option"); + $had_inference_problem = 1; + } + return 0; + } elsif ($ms_score > $man_score) { + &push_main_package('s'); + } else { + &push_main_package('an'); + } + + return 1; +} # infer_man_or_ms_package() + + +sub construct_command { + my @main_package = ('an', 'doc', 'doc-old', 'e', 'm', 'om', 's'); + my $file_args_included; # file args now only at 1st preproc + unshift @command, 'groff'; + if (@preprocessor) { + my @progs; + $progs[0] = shift @preprocessor; + push(@progs, @input_file); + for (@preprocessor) { + push @progs, '|'; + push @progs, $_; + } + push @progs, '|'; + unshift @command, @progs; + $file_args_included = 1; + } else { + $file_args_included = 0; + } + + foreach (@command) { + next unless /\s/; + # when one argument has several words, use accents + $_ = "'" . $_ . "'"; + } + + my $have_ambiguous_main_package = 0; + my $inferred_main_package_count = scalar @inferred_main_package; + + # Did we infer multiple full-service packages? + if ($inferred_main_package_count > 1) { + $have_ambiguous_main_package = 1; + # For each one the user explicitly requested... + for my $pkg (@requested_package) { + # ...did it resolve the ambiguity for us? + if (grep(/$pkg/, @inferred_main_package)) { + @inferred_main_package = ($pkg); + $have_ambiguous_main_package = 0; + last; + } + } + } elsif ($inferred_main_package_count == 1) { + $main_package = shift @inferred_main_package; + } + + if ($have_ambiguous_main_package) { + # TODO: Alphabetical is probably not the best ordering here. We + # should tally up scores on a per-package basis generally, not just + # for an and s. + for my $pkg (@main_package) { + if (grep(/$pkg/, @inferred_main_package)) { + $main_package = $pkg; + &warn("document ambiguous (choosing '$main_package'" + . " from '@inferred_main_package'); disambiguate with -m" + . " option"); + $had_inference_problem = 1; + last; + } + } + } + + # If a full-service package was explicitly requested, warn if the + # inference differs from the request. This also ensures that all -m + # arguments are placed in the same order that the user gave them; + # caveat dictator. + my @auxiliary_package_argument = (); + for my $pkg (@requested_package) { + my $is_auxiliary_package = 1; + if (grep(/$pkg/, @main_package)) { + $is_auxiliary_package = 0; + if ($pkg ne $main_package) { + &warn("overriding inferred package '$main_package'" + . " with requested package '$pkg'"); + $main_package = $pkg; + } + } + if ($is_auxiliary_package) { + push @auxiliary_package_argument, "-m" . $pkg; + } + } + + push @command, '-m' . $main_package if ($main_package); + push @command, @auxiliary_package_argument; + push @command, @input_file unless ($file_args_included); + + ######### + # execute the 'groff' command here with option '--run' + if ( $do_run ) { # with --run + print STDERR "@command\n"; + my $cmd = join ' ', @command; + system($cmd); + } else { + print "@command\n"; + } +} # construct_command() + + +sub usage { + my $stream = *STDOUT; + my $had_error = shift; + $stream = *STDERR if $had_error; + my $grog = $program_name; + print $stream "usage: $grog [--ligatures] [--run]" . + " [groff-option ...] [--] [file ...]\n" . + "usage: $grog {-v | --version}\n" . + "usage: $grog {-h | --help}\n"; + unless ($had_error) { + print $stream "\n" . +"Read each roff(7) input FILE and attempt to infer an appropriate\n" . +"groff(1) command to format it. See the grog(1) manual page.\n"; + } + exit $had_error; +} + + +sub version { + print "GNU $program_name (groff) $groff_version\n"; + exit 0; +} # version() + + +# initialize + +my $in_unbuilt_source_tree = 0; +{ + my $at = '@'; + $in_unbuilt_source_tree = 1 if ('@VERSION@' eq "${at}VERSION${at}"); +} + +$groff_version = '@VERSION@' unless ($in_unbuilt_source_tree); + +&process_arguments(); +&process_input(); + +if ($have_any_valid_arguments) { + &infer_preprocessors(); + &infer_man_or_ms_package() if (scalar @inferred_main_package != 1); + &construct_command(); +} + +exit 2 if ($had_processing_problem); +exit 1 if ($had_inference_problem); +exit 0; + +# Local Variables: +# fill-column: 72 +# mode: CPerl +# End: +# vim: set cindent noexpandtab shiftwidth=2 softtabstop=2 textwidth=72: |