%top{ #ifdef HAVE_CONFIG_H # include "config.h" #endif /* HAVE_CONFIG_H */ #include "manconfig.h" /* Flex emits several functions which might reasonably have various * attributes applied and many unused macros; none of these are our problem. */ #if GNUC_PREREQ(8,0) # pragma GCC diagnostic ignored "-Wsuggest-attribute=malloc" #endif #pragma GCC diagnostic ignored "-Wsuggest-attribute=pure" #pragma GCC diagnostic ignored "-Wunused-macros" } %{ /* * lexgrog.l: extract 'whatis' info from nroff man / formatted cat pages. * * Copyright (C) 1994, 1995 Graeme W. Wilford. (Wilf.) * Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, * 2011, 2012 Colin Watson. * * This file is part of man-db. * * man-db is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * man-db is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with man-db; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * Wed Oct 12 18:46:11 BST 1994 Wilf. (G.Wilford@ee.surrey.ac.uk) * * CJW: Detect grap and vgrind. Understand fill requests. Other improvements * in the syntax accepted. */ #include #include #include #include #include #include #include "error.h" #include "xalloc.h" #include "gettext.h" #define _(String) gettext (String) #include "encodings.h" #include "pipeline.h" #include "sandbox.h" #include "security.h" #include "util.h" #include "decompress.h" #include "lexgrog.h" #include "manconv.h" #include "manconv_client.h" #define YY_READ_BUF_SIZE 1024 #define MAX_NAME 8192 /* defines the ordered list of filters detected by lexgrog */ enum { TBL_FILTER = 0, /* tbl */ EQN_FILTER, /* eqn */ PIC_FILTER, /* pic */ GRAP_FILTER, /* grap */ REF_FILTER, /* refer */ VGRIND_FILTER, /* vgrind */ MAX_FILTERS /* delimiter */ }; #define ARRAY_SIZE(array) (sizeof (array) / sizeof ((array)[0])) extern man_sandbox *sandbox; struct macro { const char *name; const char *value; }; static const struct macro glyphs[] = { /* It is vital to keep these in strcmp order (sort -t\" -k2)! They * will be searched using bsearch. * Data from groff_char(7), although I have omitted some that are * particularly unlikely to be used in NAME sections. */ { "'A", "Á" }, { "'C", "Ć" }, { "'E", "É" }, { "'I", "Í" }, { "'O", "Ó" }, { "'U", "Ú" }, { "'Y", "Ý" }, { "'a", "á" }, { "'c", "ć" }, { "'e", "é" }, { "'i", "í" }, { "'o", "ó" }, { "'u", "ú" }, { "'y", "ý" }, { ",C", "Ç" }, { ",c", "ç" }, { "-D", "Ð" }, { ".i", "ı" }, { "/L", "Ł" }, { "/O", "Ø" }, { "/l", "ł" }, { "/o", "ø" }, { ":A", "Ä" }, { ":E", "Ë" }, { ":I", "Ï" }, { ":O", "Ö" }, { ":U", "Ü" }, { ":Y", "Ÿ" }, { ":a", "ä" }, { ":e", "ë" }, { ":i", "ï" }, { ":o", "ö" }, { ":u", "ü" }, { ":y", "ÿ" }, { "AE", "Æ" }, { "Bq", "„" }, { "Fc", "»" }, { "Fi", "ffi" }, { "Fl", "ffl" }, { "Fo", "«" }, { "IJ", "IJ" }, { "OE", "Œ" }, { "Sd", "ð" }, { "TP", "Þ" }, { "Tp", "þ" }, { "^A", "Â" }, { "^E", "Ê" }, { "^I", "Î" }, { "^O", "Ô" }, { "^U", "Û" }, { "^a", "â" }, { "^e", "ê" }, { "^i", "î" }, { "^o", "ô" }, { "^u", "û" }, { "`A", "À" }, { "`E", "È" }, { "`I", "Ì" }, { "`O", "Ò" }, { "`U", "Ù" }, { "`a", "à" }, { "`e", "è" }, { "`i", "ì" }, { "`o", "ò" }, { "`u", "ù" }, { "a\"", "˝" }, { "a-", "¯" }, { "a.", "˙" }, { "a^", "^" }, { "aa", "´" }, { "ab", "˘" }, { "ac", "¸" }, { "ad", "¨" }, { "ae", "æ" }, { "ah", "ˇ" }, { "ao", "˚" }, { "aq", "'" }, { "a~", "~" }, { "bq", "‚" }, { "cq", "’" }, { "dq", "\"" }, { "em", "—" }, { "en", "–" }, { "fc", "›" }, { "ff", "ff" }, { "fi", "fi" }, { "fl", "fl" }, { "fo", "‹" }, { "ga", "`" }, { "ha", "^" }, { "ho", "˛" }, { "hy", "‐" }, { "ij", "ij" }, { "lq", "“" }, { "oA", "Å" }, { "oa", "å" }, { "oe", "œ" }, { "oq", "‘" }, { "r!", "¡" }, { "r?", "¿" }, { "rq", "”" }, { "ss", "ß" }, { "ti", "~" }, { "vS", "Š" }, { "vZ", "Ž" }, { "vs", "š" }, { "vz", "ž" }, { "~A", "Ã" }, { "~N", "Ñ" }, { "~O", "Õ" }, { "~a", "ã" }, { "~n", "ñ" }, { "~o", "õ" } }; static const struct macro perldocs[] = { /* It is vital to keep these in strcmp order (sort -t\" -k2)! They * will be searched using bsearch. * Data from Pod/Man.pm. */ { "--", "-" }, { "Aq", "'" }, { "C'", "'" }, { "C+", "C++" }, { "C`", "`" }, { "L\"", "\"" }, { "PI", "π" }, { "R\"", "\"" } }; static void add_str_to_whatis (const char *string, size_t length); static void add_char_to_whatis (unsigned char c); static void add_separator_to_whatis (void); static void add_wordn_to_whatis (const char *string, size_t length); static void add_word_to_whatis (const char *string); static void add_glyph_to_whatis (const char *string, size_t length); static void add_perldoc_to_whatis (const char *string, size_t length); static void mdoc_text (const char *string); static void newline_found (void); static char newname[MAX_NAME]; static char *p_name; static const char *fname; static char filters[MAX_FILTERS]; static bool fill_mode; static bool waiting_for_quote; static decompress *decomp; #define YY_INPUT(buf,result,max_size) { \ size_t size = max_size; \ const char *block = decompress_read (decomp, &size); \ if (block && size != 0) { \ memcpy (buf, block, size); \ buf[size] = '\0'; \ result = size; \ } else \ result = YY_NULL; \ } #define YY_NO_INPUT %} %option ecs meta-ecs %option 8bit batch caseful never-interactive %option nostdinit %option warn %option noyywrap nounput %x MAN_PRENAME %x MAN_NAME %x MAN_DESC %x MAN_DESC_AT %x MAN_DESC_BSX %x MAN_DESC_BX %x MAN_DESC_BX_RELEASE %x MAN_DESC_DQ %x MAN_DESC_FX %x MAN_DESC_NX %x MAN_DESC_OX %x CAT_NAME %x CAT_FILE %x MAN_FILE %x CAT_REST %x MAN_REST %x FORCE_EXIT digit [[:digit:]] upper [[:upper:]] alpha [[:alpha:]] blank [[:blank:]] blank_eol [[:blank:]\r\n] word [[:alnum:]][^[:blank:]\r\n]* eol \r?\n bol {eol}+ next {eol}* empty {eol}{blank}* indent {eol}{blank}+ dbl_quote \" font_change \\f([[:upper:]1-4]|\({upper}{2}) size_change \\s[+-]?{digit} style_change ({font_change}{size_change}?|{size_change}{font_change}?) typeface \.(B[IR]?|I[BR]?|R[BI]|S[BM]) sec_request \.[Ss][HhYySs] comment ['.]\\{dbl_quote} /* Please add to this list if you know how. */ /* Note that, since flex only supports UTF-8 by accident, character classes * including non-ASCII characters must be written out as (a|b|c|d) rather * than [abcd]. */ ar_name (اﻹسم|الإسم) /* ИМЕ also works for mk */ bg_name И(М|м)(Е|е) cs_name (J[Mm](É|é|\\\('[Ee]|E|e)[Nn][Oo]|N(Á|á)[Zz][Ee][Vv]) da_name N[Aa][Vv][Nn] de_name B[Ee][Zz][Ee][Ii][Cc][Hh][Nn][Uu][Nn][Gg] en_name N[Aa][Mm][Ee] eo_name N[Oo][Mm][Oo] es_name N[Oo][Mm][Bb][Rr][Ee] fa_name نام fi_name N[Ii][Mm][Ii] fr_name N[Oo][Mm] hu_name N(É|é|\\\('[Ee]|E|e)[Vv] id_name N[Aa][Mm][Aa] /* NOME also works for gl, pt */ it_name N[Oo][Mm][Ee] ja_name (名|̾)(前|称) ko_name (이름|명칭) latin_name N[Oo][Mm][Ee][Nn] lt_name P[Aa][Vv][Aa][Dd][Ii][Nn][Ii][Mm][Aa][Ss] nl_name N[Aa][Aa][Mm] pl_name N[Aa][Zz][Ww][Aa] ro_name N[Uu][Mm][Ee] ru_name (И(М|м)(Я|я)|Н(А|а)(З|з)(В|в)(А|а)(Н|н)(И|и)(Е|е)|Н(А|а)(И|и)(М|м)(Е|е)(Н|н)(О|о)(В|в)(А|а)(Н|н)(И|и)(Е|е)) sk_name M[Ee][Nn][Oo] sr_name (И(М|м)(Е|е)|Н(А|а)(З|з)(И|и)(В|в)) srlatin_name (I[Mm][Ee]|N[Aa][Zz][Ii][Vv]) sv_name N[Aa][Mm][Nn] ta_name பெய tr_name (A[Dd]|(İ|i)S(İ|i)M) uk_name Н(А|а)(З|з)(В|в)(А|а) vi_name T(Ê|ê)[Nn] zh_CN_name 名{blank}?(称|字){blank}?.* zh_TW_name (名{blank}?(稱|字)|命令名){blank}?.* name ({ar_name}|{bg_name}|{cs_name}|{da_name}|{de_name}|{en_name}|{eo_name}|{es_name}|{fa_name}|{fi_name}|{fr_name}|{hu_name}|{id_name}|{it_name}|{ja_name}|{ko_name}|{latin_name}|{lt_name}|{nl_name}|{pl_name}|{ro_name}|{ru_name}|{sk_name}|{sr_name}|{srlatin_name}|{sv_name}|{ta_name}|{tr_name}|{uk_name}|{vi_name}|{zh_CN_name}|{zh_TW_name}) name_sec {dbl_quote}?{style_change}?{name}{style_change}?({blank}*{dbl_quote})? /* eptgrv : eqn, pic, tbl, grap, refer, vgrind */ tbl_request \.TS eqn_request \.EQ pic_request \.PS grap_request \.G1 ref1_request \.R1 ref2_request \.\[ vgrind_request \.vS %% /* begin NAME section processing */ {sec_request}{blank_eol}+{name_sec}{blank}* BEGIN (MAN_PRENAME); {empty}{2,}{name}{blank}*{indent} BEGIN (CAT_NAME); /* general text matching */ { \.[^Ss\r\n].* | \..{0,3}{dbl_quote}?.{0,4}{dbl_quote}? | {comment}.* | .|{eol} } { .{1,9} | [ ]* | {eol}{2,} | .|{eol} } { {bol}{tbl_request} filters[TBL_FILTER] = 't'; {bol}{eqn_request} filters[EQN_FILTER] = 'e'; {bol}{pic_request} filters[PIC_FILTER] = 'p'; {bol}{grap_request} filters[GRAP_FILTER] = 'g'; {bol}{ref1_request} | {bol}{ref2_request} filters[REF_FILTER] = 'r'; {bol}{vgrind_request} filters[VGRIND_FILTER] = 'v'; } <> { /* exit */ *p_name = '\0'; /* terminate the string */ yyterminate (); } .+|{eol} /* rules to end NAME section processing */ .|{eol} { /* forced exit */ *p_name = '\0'; /* terminate the string */ yyterminate (); } {bol}{sec_request}{blank}* | <> { /* no NAME at all */ *p_name = '\0'; BEGIN (MAN_REST); } /* need to match whole string so that we beat the following roff catch-all, so use yyless to push back the name */ { {bol}{typeface}{blank}.* | {bol}\.Tn{blank}.* | {bol}\.ft{blank}.* | {bol}\.V[be]{blank}.* | {bol}\.IX{blank}.* | {bol}\.Nm{blank}.* { yyless (0); BEGIN (MAN_NAME); } } /* Skip over initial roff requests in NAME section. The use of yyless here is evil. */ {bol}['.].* {empty}{eol} yyless (1); .|{eol} { yyless (0); BEGIN (MAN_NAME); } { {bol}{sec_request}{blank}* | /* Another section */ {bol}\.X{upper}{blank}+ | /* special - hpux */ {bol}\.sp{blank}* | /* vertical spacing */ {bol}\.ig{blank}* | /* block comment */ {bol}\.de[1i]?{blank}* | /* macro definition */ {bol}\.i[ef]{blank}* | /* conditional */ {empty}{bol}.+ | <> { /* terminate the string */ *p_name = '\0'; BEGIN (MAN_REST); } } { {bol}S[yYeE] | {eol}{2,}.+ | {next}__ { /* terminate the string */ *p_name = '\0'; BEGIN (CAT_REST); yyterminate (); } } /* ROFF request removal */ { /* some include quoting; dealing with this is unpleasant */ {bol}{typeface}{blank}+\" { newline_found (); waiting_for_quote = true; } {bol}{typeface}{blank}+ | /* type face commands */ {bol}\.Tn{blank}+ | /* mdoc trade name */ {bol}\.ft{blank}.* | /* font change */ {bol}\.V[be]{blank}.* | /* pod2man, verbatim mode */ {bol}\.IX{blank}.* | /* .IX line */ {bol}\.Nm{blank}+ | /* mdoc name */ {bol}\.PD{blank}* | /* paragraph spacing */ {bol}\\& | /* non-breaking space */ {next}{comment}.* { /* per line comments */ newline_found (); } } /* No-op requests */ { {bol}\.{blank}*$ newline_found (); {bol}\.\.$ newline_found (); } /* Toggle fill mode */ { {bol}\.nf.* fill_mode = false; {bol}\.fi.* fill_mode = true; } -{eol}{blank_eol}* /* strip continuations */ /* convert to DASH */ { {next}{blank}*\\\((mi|hy|em|en){blank}* | {next}{blank}*\\\[(mi|hy|em|en)\]{blank}* | {next}{blank_eol}+[-\\]-{blank}* | {next}{blank_eol}*[-\\]-{blank}+ | {bol}\.Nd{blank}* { add_separator_to_whatis (); BEGIN (MAN_DESC); } } {next}{blank}+-{1,2}{blank_eol}+ add_separator_to_whatis (); /* escape sequences and special characters */ { {next}\\[\\e] add_char_to_whatis ('\\'); {next}\\('|\(aa) add_char_to_whatis ('\''); {next}\\(`|\(ga) add_char_to_whatis ('`'); {next}\\(-|\((mi|hy|em|en)) add_char_to_whatis ('-'); {next}\\\[(mi|hy|em|en)\] add_char_to_whatis ('-'); {next}\\\. add_char_to_whatis ('.'); {next}((\\[ 0t~])|[ ]|\t)* add_char_to_whatis (' '); {next}\\\((ru|ul) add_char_to_whatis ('_'); {next}\\\\t add_char_to_whatis ('\t'); {next}\\[|^&!%acdpruz{}\r\n] /* various useless control chars */ {next}\\[bhlLvx]{blank}*'[^']+' /* various inline functions */ {next}\\\$[1-9] /* interpolate arg */ /* roff named glyphs */ {next}\\\(..|\\\[..\] add_glyph_to_whatis (yytext + 2, 2); /* perldoc strings */ {next}\\\*\(..|\\\*\[..\] add_perldoc_to_whatis (yytext + 3, 2); {next}\\\*. add_perldoc_to_whatis (yytext + 2, 1); {next}\\["#].* /* comment */ {next}{font_change} /* font changes */ {next}\\k{alpha} /* mark input place in register */ {next}\\n(\({alpha})?{alpha} /* interpolate number register */ {next}\\o\"[^"]+\" /* overstrike chars */ {next}{size_change} /* size changes */ {next}\\w{blank}*'[^']+'[^ \t]* /* width of string */ {next}\\ /* catch all */ {next}\(\\\|\){blank}* /* function() in hpux */ } /* some people rather ambitiously use non-trivial mdoc macros in NAME sections; cope with those that have been seen in the wild, and a few more */ { {bol}\.At{blank}* BEGIN (MAN_DESC_AT); {bol}\.Bsx{blank}* BEGIN (MAN_DESC_BSX); {bol}\.Bx{blank}* BEGIN (MAN_DESC_BX); {bol}\.Fx{blank}* BEGIN (MAN_DESC_FX); {bol}\.Nx{blank}* BEGIN (MAN_DESC_NX); {bol}\.Ox{blank}* BEGIN (MAN_DESC_OX); {bol}\.Ux{blank}* add_word_to_whatis ("UNIX"); {bol}\.Dq{blank}* { add_word_to_whatis ("\""); BEGIN (MAN_DESC_DQ); } } { 32v{blank}* mdoc_text ("Version 32V AT&T UNIX"); v1{blank}* mdoc_text ("Version 1 AT&T UNIX"); v2{blank}* mdoc_text ("Version 2 AT&T UNIX"); v3{blank}* mdoc_text ("Version 3 AT&T UNIX"); v4{blank}* mdoc_text ("Version 4 AT&T UNIX"); v5{blank}* mdoc_text ("Version 5 AT&T UNIX"); v6{blank}* mdoc_text ("Version 6 AT&T UNIX"); v7{blank}* mdoc_text ("Version 7 AT&T UNIX"); V{blank}* mdoc_text ("AT&T System V UNIX"); V.1{blank}* mdoc_text ("AT&T System V.1 UNIX"); V.2{blank}* mdoc_text ("AT&T System V.2 UNIX"); V.3{blank}* mdoc_text ("AT&T System V.3 UNIX"); V.4{blank}* mdoc_text ("AT&T System V.4 UNIX"); .|{eol} { yyless (0); mdoc_text ("AT&T UNIX"); } } { {word} { add_word_to_whatis ("BSD/OS"); add_wordn_to_whatis (yytext, yyleng); BEGIN (MAN_DESC); } .|{eol} { yyless (0); mdoc_text ("BSD/OS"); } } { -alpha{blank}* mdoc_text ("BSD (currently in alpha test)"); -beta{blank}* mdoc_text ("BSD (currently in beta test)"); -devel{blank}* mdoc_text ("BSD (currently under development"); {word}{blank}* { add_wordn_to_whatis (yytext, yyleng); add_str_to_whatis ("BSD", 3); BEGIN (MAN_DESC_BX_RELEASE); } .|{eol} { yyless (0); mdoc_text ("BSD"); } } { [Rr]eno{blank}* { add_str_to_whatis ("-Reno", 5); BEGIN (MAN_DESC); } [Tt]ahoe{blank}* { add_str_to_whatis ("-Tahoe", 6); BEGIN (MAN_DESC); } [Ll]ite{blank}* { add_str_to_whatis ("-Lite", 5); BEGIN (MAN_DESC); } [Ll]ite2{blank}* { add_str_to_whatis ("-Lite2", 6); BEGIN (MAN_DESC); } .|{eol} { yyless (0); BEGIN (MAN_DESC); } } .* { add_str_to_whatis (yytext, yyleng); add_char_to_whatis ('"'); BEGIN (MAN_DESC); } { {word} { add_word_to_whatis ("FreeBSD"); add_wordn_to_whatis (yytext, yyleng); BEGIN (MAN_DESC); } .|{eol} { yyless (0); mdoc_text ("FreeBSD"); } } { {word} { add_word_to_whatis ("NetBSD"); add_wordn_to_whatis (yytext, yyleng); BEGIN (MAN_DESC); } .|{eol} { yyless (0); mdoc_text ("NetBSD"); } } { {word} { add_word_to_whatis ("OpenBSD"); add_wordn_to_whatis (yytext, yyleng); BEGIN (MAN_DESC); } .|{eol} { yyless (0); mdoc_text ("OpenBSD"); } } /* collapse spaces, escaped spaces, tabs, newlines to a single space */ {next}((\\[ ])|{blank})* add_char_to_whatis (' '); /* a ROFF break request, a paragraph request, or an indentation change usually means we have multiple whatis definitions, provide a separator for later processing */ { {bol}\.br{blank}* | {bol}\.LP{blank}* | {bol}\.PP{blank}* | {bol}\.P{blank}* | {bol}\.IP{blank}.* | {bol}\.HP{blank}.* | {bol}\.RS{blank}.* | {bol}\.RE{blank}.* { add_char_to_whatis ((char) 0x11); BEGIN (MAN_NAME); } } /* any other roff request we don't recognise terminates definitions */ {bol}['.] { *p_name = '\0'; BEGIN (MAN_REST); } /* pass words as a chunk. speed optimization */ [[:alnum:]]* add_str_to_whatis (yytext, yyleng); /* normalise the comma (,) separators */ {blank}*,[ \t\r\n]* | {blank}*,{blank}* add_str_to_whatis (", ", 2); {bol}. { newline_found (); add_char_to_whatis (yytext[yyleng - 1]); } . add_char_to_whatis (*yytext); /* default EOF rule */ <> return 1; %% /* print warning and force scanner to terminate */ static void too_big (void) { /* Even though MAX_NAME is a macro expanding to a constant, we * translate it using ngettext anyway because that will make it * easier to change the macro later. */ error (0, 0, ngettext ("warning: whatis for %s exceeds %d byte, " "truncating.", "warning: whatis for %s exceeds %d bytes, " "truncating.", MAX_NAME), fname, MAX_NAME); BEGIN (FORCE_EXIT); } /* append a string to newname if enough room */ static void add_str_to_whatis (const char *string, size_t length) { if (p_name - newname + length >= MAX_NAME) too_big (); else { (void) strncpy (p_name, string, length); p_name += length; } } /* append a char to newname if enough room */ static void add_char_to_whatis (unsigned char c) { if (p_name - newname + 1 >= MAX_NAME) too_big (); else if (waiting_for_quote && c == '"') waiting_for_quote = false; else *p_name++ = c; } /* append the " - " separator to newname, trimming the first space if one's * already there */ static void add_separator_to_whatis (void) { if (p_name != newname && *(p_name - 1) != ' ') add_char_to_whatis (' '); add_str_to_whatis ("- ", 2); } /* append a word to newname if enough room, ensuring only necessary surrounding space */ static void add_wordn_to_whatis (const char *string, size_t length) { if (p_name != newname && *(p_name - 1) != ' ') add_char_to_whatis (' '); while (length && string[length - 1] == ' ') --length; if (length) add_str_to_whatis (string, length); } static void add_word_to_whatis (const char *string) { add_wordn_to_whatis (string, strlen (string)); } struct compare_macro_key { const char *string; size_t length; }; static int compare_macro (const void *left, const void *right) { const struct compare_macro_key *key = left; const struct macro *value = right; int cmp; cmp = strncmp (key->string, value->name, key->length); if (cmp) return cmp; /* equal up to key->length, so value->name must be at least size * key->length + 1 */ else if (value->name[key->length]) return -1; else return 0; } static void add_macro_to_whatis (const struct macro *macros, size_t n_macros, const char *string, size_t length) { struct compare_macro_key key; const struct macro *macro; key.string = string; key.length = length; macro = bsearch (&key, macros, n_macros, sizeof (struct macro), compare_macro); if (macro) add_str_to_whatis (macro->value, strlen (macro->value)); } static void add_glyph_to_whatis (const char *string, size_t length) { add_macro_to_whatis (glyphs, ARRAY_SIZE (glyphs), string, length); } static void add_perldoc_to_whatis (const char *string, size_t length) { add_macro_to_whatis (perldocs, ARRAY_SIZE (perldocs), string, length); } static void mdoc_text (const char *string) { add_word_to_whatis (string); BEGIN (MAN_DESC); } static void newline_found (void) { /* If we are mid p_name and the last added char was not a space, * best add one. */ if (p_name != newname && *(p_name - 1) != ' ') { if (fill_mode) add_char_to_whatis (' '); else { add_char_to_whatis ((char) 0x11); BEGIN (MAN_NAME); } } waiting_for_quote = false; } int find_name (const char *file, const char *filename, lexgrog *p_lg, const char *encoding) { int ret = 0; decompress *d; char *page_encoding = NULL; bool run_col = p_lg->type == CATPAGE && *PROG_COL != '\0'; if (strcmp (file, "-") == 0) { d = decompress_fdopen (dup (STDIN_FILENO)); } else { struct stat st; int decompress_flags; char *lang; if (stat (file, &st)) { error (0, errno, "%s", file); return 0; } if (S_ISDIR (st.st_mode)) { error (0, EISDIR, "%s", file); return 0; } drop_effective_privs (); decompress_flags = 0; /* If we're looking at a cat page, then we need to run col * over it, which doesn't work conveniently with an * in-process decompressor. */ if (!run_col) decompress_flags |= DECOMPRESS_ALLOW_INPROCESS; d = decompress_open (file, decompress_flags); if (!d) { error (0, errno, _("can't open %s"), file); regain_effective_privs (); return 0; } regain_effective_privs (); if (!encoding) { lang = lang_dir (file); page_encoding = get_page_encoding (lang); free (lang); } } if (!page_encoding && encoding) page_encoding = xstrdup (encoding); if (page_encoding) { if (decompress_is_pipeline (d)) add_manconv (decompress_get_pipeline (d), page_encoding, "UTF-8"); else if (manconv_inprocess (d, page_encoding, "UTF-8") != 0) /* manconv should already have written to stderr, so * just return zero (i.e. no result). */ goto out; } if (run_col) { pipecmd *col_cmd; col_cmd = pipecmd_new_args (PROG_COL, "-b", "-p", "-x", (void *) 0); pipecmd_pre_exec (col_cmd, sandbox_load, sandbox_free, sandbox); pipeline_command (decompress_get_pipeline (d), col_cmd); } decompress_start (d); ret = find_name_decompressed (d, filename, p_lg); out: free (page_encoding); decompress_free (d); return ret; } int find_name_decompressed (decompress *d, const char *filename, lexgrog *p_lg) { int ret; decomp = d; fname = filename; *(p_name = newname) = '\0'; memset (filters, '_', sizeof (filters)); fill_mode = true; waiting_for_quote = false; if (p_lg->type == CATPAGE) BEGIN (CAT_FILE); else BEGIN (MAN_FILE); drop_effective_privs (); yyrestart (NULL); ret = yylex (); regain_effective_privs (); decompress_wait (decomp); if (ret) return 0; else { char f_tmp[MAX_FILTERS]; int j, k; /* wipe out any leading or trailing spaces */ if (*newname) { for (p_name = strchr (newname, '\0'); *(p_name - 1) == ' '; p_name--); if (*p_name == ' ') *p_name = '\0'; } for (p_name = newname; *p_name == ' '; p_name++); p_lg->whatis = xstrdup (p_name); memset (f_tmp, '\0', MAX_FILTERS); f_tmp[0] = '-'; for (j = k = 0; j < MAX_FILTERS; j++) if (filters[j] != '_') f_tmp[k++] = filters[j]; p_lg->filters = xstrdup (f_tmp); return p_name[0]; } }