diff options
Diffstat (limited to 'src/decompress.c')
-rw-r--r-- | src/decompress.c | 435 |
1 files changed, 435 insertions, 0 deletions
diff --git a/src/decompress.c b/src/decompress.c new file mode 100644 index 0000000..c483241 --- /dev/null +++ b/src/decompress.c @@ -0,0 +1,435 @@ +/* + * decompress.c: decompression abstraction layer + * + * Copyright (C) 2007, 2008 Colin Watson. + * + * This file is part of man-db. + * + * man-db is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * man-db is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with man-db; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif /* HAVE_CONFIG_H */ + +#include <assert.h> +#include <string.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> + +#ifdef HAVE_LIBZ +# include "zlib.h" +#endif /* HAVE_LIBZ */ + +#include "pipeline.h" + +#include "attribute.h" +#include "minmax.h" +#include "xalloc.h" +#include "xstrndup.h" +#include "xvasprintf.h" + +#include "manconfig.h" + +#include "compression.h" +#include "sandbox.h" + +#include "decompress.h" + +enum decompress_tag { + DECOMPRESS_PIPELINE, + DECOMPRESS_INPROCESS +}; + +struct decompress_inprocess { + char *buf; + size_t len; + size_t offset; + char *line_cache; +}; + +struct decompress { + enum decompress_tag tag; + union { + pipeline *p; + struct decompress_inprocess inprocess; + } u; +}; + +/* Create a new pipeline-based decompressor. Takes ownership of p. */ +static decompress *decompress_new_pipeline (pipeline *p) +{ + decompress *d = XMALLOC (decompress); + + d->tag = DECOMPRESS_PIPELINE; + d->u.p = p; + + return d; +} + +#ifdef HAVE_LIBZ + +/* Create a new in-process decompressor. Takes ownership of buf. */ +static decompress *decompress_new_inprocess (char *buf, size_t len) +{ + decompress *d = XMALLOC (decompress); + + d->tag = DECOMPRESS_INPROCESS; + d->u.inprocess.buf = buf; + d->u.inprocess.len = len; + d->u.inprocess.offset = 0; + d->u.inprocess.line_cache = NULL; + + return d; +} + +static void decompress_zlib (void *data MAYBE_UNUSED) +{ + gzFile zlibfile; + int fd; + + fd = dup (STDIN_FILENO); + if (fd < 0) + return; + + zlibfile = gzdopen (fd, "r"); + if (!zlibfile) { + close (fd); + return; + } + + for (;;) { + char buffer[4096]; + int r = gzread (zlibfile, buffer, 4096); + if (r <= 0) + break; + if (fwrite (buffer, 1, (size_t) r, stdout) < (size_t) r) + break; + } + + gzclose (zlibfile); + return; +} + +/* The largest number of uncompressed bytes we're prepared to read into + * memory. (We actually allow at most one fewer byte than this, for easy + * EOF detection.) + * + * At the time of writing, 11 out of 27959 (0.04%) installed manual pages on + * the author's system were larger than this. + * + * We could lift this restriction if we streamed in-process decompression + * instead, but that's a bit complicated: we'd also need to stream encoding + * conversion, and there's relatively little point until lexgrog can rely on + * preprocessor header lines rather than having to scan the whole file for + * preprocessor indications. For the time being, one-shot buffering is + * cheap enough and much simpler. + */ +#define MAX_INPROCESS 1048576 + +static decompress *decompress_try_zlib (const char *filename) +{ + gzFile zlibfile; + /* We only ever call this from the parent process (and don't + * currently use threads), and this lets us skip per-file memory + * allocation. + */ + static char buffer[MAX_INPROCESS]; + int len = 0; + + zlibfile = gzopen (filename, "r"); + if (!zlibfile) + return NULL; + + while (len < MAX_INPROCESS) { + /* Read one more byte than we're prepared to return, in + * order to detect EOF at the right position. The "len >= + * MAX_INPROCESS" check below catches the boundary case. + */ + int r = gzread (zlibfile, buffer + len, MAX_INPROCESS - len); + if (r < 0) { + gzclose (zlibfile); + return NULL; + } else if (r == 0) + break; + else + len += r; + } + + gzclose (zlibfile); + if (len >= MAX_INPROCESS) + return NULL; + /* Copy input data so that we don't have potential data corruption + * if more than one in-process decompressor is active at once. (An + * alternative might be to use a lock to prevent that situation.) + */ + return decompress_new_inprocess (xmemdup (buffer, (size_t) len), + (size_t) len); +} + +#define OPEN_FLAGS_UNUSED +#else /* !HAVE_LIBZ */ +#define OPEN_FLAGS_UNUSED MAYBE_UNUSED +#endif /* HAVE_LIBZ */ + +extern man_sandbox *sandbox; + +decompress *decompress_open (const char *filename, int flags OPEN_FLAGS_UNUSED) +{ + pipecmd *cmd; + pipeline *p; + struct stat st; +#ifdef HAVE_LIBZ + size_t filename_len; +#endif /* HAVE_LIBZ */ + char *ext; + struct compression *comp; + + if (stat (filename, &st) < 0 || S_ISDIR (st.st_mode)) + return NULL; + +#ifdef HAVE_LIBZ + filename_len = strlen (filename); + if (filename_len > 3 && STREQ (filename + filename_len - 3, ".gz")) { + if (flags & DECOMPRESS_ALLOW_INPROCESS) { + decompress *d = decompress_try_zlib (filename); + if (d) + return d; + } + + cmd = pipecmd_new_function ("zcat", &decompress_zlib, NULL, + NULL); + pipecmd_pre_exec (cmd, sandbox_load, sandbox_free, sandbox); + p = pipeline_new_commands (cmd, (void *) 0); + goto got_pipeline; + } +#endif /* HAVE_LIBZ */ + + ext = strrchr (filename, '.'); + if (ext) { + ++ext; + + for (comp = comp_list; comp->ext; ++comp) { + if (!STREQ (comp->ext, ext)) + continue; + + cmd = pipecmd_new_argstr (comp->prog); + pipecmd_pre_exec (cmd, sandbox_load, sandbox_free, + sandbox); + p = pipeline_new_commands (cmd, (void *) 0); + goto got_pipeline; + } + } + +#ifdef HAVE_GZIP + /* HP-UX */ + ext = strstr (filename, ".Z/"); + if (ext) { + cmd = pipecmd_new_argstr (PROG_GUNZIP); + pipecmd_pre_exec (cmd, sandbox_load, sandbox_free, sandbox); + p = pipeline_new_commands (cmd, (void *) 0); + goto got_pipeline; + } +#endif + + p = pipeline_new (); + +got_pipeline: + pipeline_want_infile (p, filename); + pipeline_want_out (p, -1); + return decompress_new_pipeline (p); +} + +decompress *decompress_fdopen (int fd) +{ + pipeline *p; +#ifdef HAVE_LIBZ + pipecmd *cmd; +#endif /* HAVE_LIBZ */ + +#ifdef HAVE_LIBZ + cmd = pipecmd_new_function ("zcat", &decompress_zlib, NULL, NULL); + pipecmd_pre_exec (cmd, sandbox_load, sandbox_free, sandbox); + p = pipeline_new_commands (cmd, (void *) 0); +#else /* HAVE_LIBZ */ + p = pipeline_new (); +#endif /* HAVE_LIBZ */ + + pipeline_want_in (p, fd); + pipeline_want_out (p, -1); + return decompress_new_pipeline (p); +} + +bool ATTRIBUTE_PURE decompress_is_pipeline (decompress *d) +{ + return d->tag == DECOMPRESS_PIPELINE; +} + +pipeline * ATTRIBUTE_PURE decompress_get_pipeline (decompress *d) +{ + assert (d->tag == DECOMPRESS_PIPELINE); + return d->u.p; +} + +const char * ATTRIBUTE_PURE decompress_inprocess_buf (decompress *d) +{ + assert (d->tag == DECOMPRESS_INPROCESS); + return d->u.inprocess.buf; +} + +size_t ATTRIBUTE_PURE decompress_inprocess_len (decompress *d) +{ + assert (d->tag == DECOMPRESS_INPROCESS); + return d->u.inprocess.len; +} + +void decompress_inprocess_replace (decompress *d, char *buf, size_t len) +{ + assert (d->tag == DECOMPRESS_INPROCESS); + + free (d->u.inprocess.line_cache); + free (d->u.inprocess.buf); + + d->u.inprocess.buf = buf; + d->u.inprocess.len = len; + d->u.inprocess.offset = 0; + d->u.inprocess.line_cache = NULL; +} + +void decompress_start (decompress *d) +{ + if (d->tag == DECOMPRESS_PIPELINE) + pipeline_start (d->u.p); +} + +const char *decompress_read (decompress *d, size_t *len) +{ + if (d->tag == DECOMPRESS_PIPELINE) + return pipeline_read (d->u.p, len); + else { + const char *ret; + assert (d->tag == DECOMPRESS_INPROCESS); + *len = MIN (*len, d->u.inprocess.len - d->u.inprocess.offset); + ret = d->u.inprocess.buf + d->u.inprocess.offset; + d->u.inprocess.offset += *len; + return ret; + } +} + +const char *decompress_peek (decompress *d, size_t *len) +{ + if (d->tag == DECOMPRESS_PIPELINE) + return pipeline_peek (d->u.p, len); + else { + assert (d->tag == DECOMPRESS_INPROCESS); + *len = MIN (*len, d->u.inprocess.len - d->u.inprocess.offset); + return d->u.inprocess.buf + d->u.inprocess.offset; + } +} + +void decompress_peek_skip (decompress *d, size_t len) +{ + if (d->tag == DECOMPRESS_PIPELINE) + pipeline_peek_skip (d->u.p, len); + else { + assert (d->tag == DECOMPRESS_INPROCESS); + assert (len <= d->u.inprocess.len - d->u.inprocess.offset); + d->u.inprocess.offset += len; + } +} + +const char *decompress_readline (decompress *d) +{ + if (d->tag == DECOMPRESS_PIPELINE) + return pipeline_readline (d->u.p); + else { + const char *cur, *end; + assert (d->tag == DECOMPRESS_INPROCESS); + /* This isn't on the hot path (only called for a few lines + * at the start of the file), so we can afford to + * reallocate. + */ + if (d->u.inprocess.line_cache) { + free (d->u.inprocess.line_cache); + d->u.inprocess.line_cache = NULL; + } + cur = d->u.inprocess.buf + d->u.inprocess.offset; + end = memchr (cur, '\n', + d->u.inprocess.len - d->u.inprocess.offset); + if (end) { + d->u.inprocess.line_cache = xstrndup + (cur, end - cur + 1); + d->u.inprocess.offset += end - cur + 1; + return d->u.inprocess.line_cache; + } else + return NULL; + } +} + +const char *decompress_peekline (decompress *d) +{ + if (d->tag == DECOMPRESS_PIPELINE) + return pipeline_peekline (d->u.p); + else { + const char *cur, *end; + assert (d->tag == DECOMPRESS_INPROCESS); + /* This isn't on the hot path (only called for a few lines + * at the start of the file), so we can afford to + * reallocate. + */ + if (d->u.inprocess.line_cache) { + free (d->u.inprocess.line_cache); + d->u.inprocess.line_cache = NULL; + } + cur = d->u.inprocess.buf + d->u.inprocess.offset; + end = memchr (cur, '\n', + d->u.inprocess.len - d->u.inprocess.offset); + if (end) { + d->u.inprocess.line_cache = xstrndup + (cur, end - cur + 1); + return d->u.inprocess.line_cache; + } else + return NULL; + } +} + +int decompress_wait (decompress *d) +{ + if (d->tag == DECOMPRESS_PIPELINE) + return pipeline_wait (d->u.p); + else { + assert (d->tag == DECOMPRESS_INPROCESS); + return 0; + } +} + +void decompress_free (decompress *d) +{ + if (!d) + return; + if (d->tag == DECOMPRESS_PIPELINE) + pipeline_free (d->u.p); + else { + assert (d->tag == DECOMPRESS_INPROCESS); + free (d->u.inprocess.line_cache); + free (d->u.inprocess.buf); + } + free (d); +} |