diff options
Diffstat (limited to '')
-rw-r--r-- | modules/metadata/mod_mime_magic.c | 2471 |
1 files changed, 2471 insertions, 0 deletions
diff --git a/modules/metadata/mod_mime_magic.c b/modules/metadata/mod_mime_magic.c new file mode 100644 index 0000000..22dadaf --- /dev/null +++ b/modules/metadata/mod_mime_magic.c @@ -0,0 +1,2471 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * mod_mime_magic: MIME type lookup via file magic numbers + * Copyright (c) 1996-1997 Cisco Systems, Inc. + * + * This software was submitted by Cisco Systems to the Apache Software Foundation in July + * 1997. Future revisions and derivatives of this source code must + * acknowledge Cisco Systems as the original contributor of this module. + * All other licensing and usage conditions are those of the Apache Software Foundation. + * + * Some of this code is derived from the free version of the file command + * originally posted to comp.sources.unix. Copyright info for that program + * is included below as required. + * --------------------------------------------------------------------------- + * - Copyright (c) Ian F. Darwin, 1987. Written by Ian F. Darwin. + * + * This software is not subject to any license of the American Telephone and + * Telegraph Company or of the Regents of the University of California. + * + * Permission is granted to anyone to use this software for any purpose on any + * computer system, and to alter it and redistribute it freely, subject to + * the following restrictions: + * + * 1. The author is not responsible for the consequences of use of this + * software, no matter how awful, even if they arise from flaws in it. + * + * 2. The origin of this software must not be misrepresented, either by + * explicit claim or by omission. Since few users ever read sources, credits + * must appear in the documentation. + * + * 3. Altered versions must be plainly marked as such, and must not be + * misrepresented as being the original software. Since few users ever read + * sources, credits must appear in the documentation. + * + * 4. This notice may not be removed or altered. + * ------------------------------------------------------------------------- + * + * For compliance with Mr Darwin's terms: this has been very significantly + * modified from the free "file" command. + * - all-in-one file for compilation convenience when moving from one + * version of Apache to the next. + * - Memory allocation is done through the Apache API's apr_pool_t structure. + * - All functions have had necessary Apache API request or server + * structures passed to them where necessary to call other Apache API + * routines. (i.e. usually for logging, files, or memory allocation in + * itself or a called function.) + * - struct magic has been converted from an array to a single-ended linked + * list because it only grows one record at a time, it's only accessed + * sequentially, and the Apache API has no equivalent of realloc(). + * - Functions have been changed to get their parameters from the server + * configuration instead of globals. (It should be reentrant now but has + * not been tested in a threaded environment.) + * - Places where it used to print results to stdout now saves them in a + * list where they're used to set the MIME type in the Apache request + * record. + * - Command-line flags have been removed since they will never be used here. + * + * Ian Kluft <ikluft@cisco.com> + * Engineering Information Framework + * Central Engineering + * Cisco Systems, Inc. + * San Jose, CA, USA + * + * Initial installation July/August 1996 + * Misc bug fixes May 1997 + * Submission to Apache Software Foundation July 1997 + * + */ + +#include "apr.h" +#include "apr_strings.h" +#include "apr_lib.h" +#define APR_WANT_STRFUNC +#include "apr_want.h" + +#if APR_HAVE_UNISTD_H +#include <unistd.h> +#endif + +#include "ap_config.h" +#include "httpd.h" +#include "http_config.h" +#include "http_request.h" +#include "http_core.h" +#include "http_log.h" +#include "http_protocol.h" +#include "util_script.h" + +/* ### this isn't set by configure? does anybody set this? */ +#ifdef HAVE_UTIME_H +#include <utime.h> +#endif + +/* + * data structures and related constants + */ + +#define MODNAME "mod_mime_magic" +#define MIME_MAGIC_DEBUG 0 + +#define MIME_BINARY_UNKNOWN "application/octet-stream" +#define MIME_TEXT_UNKNOWN "text/plain" + +#define MAXMIMESTRING 256 + +/* HOWMANY must be at least 4096 to make gzip -dcq work */ +#define HOWMANY 4096 +/* SMALL_HOWMANY limits how much work we do to figure out text files */ +#define SMALL_HOWMANY 1024 +#define MAXDESC 50 /* max leng of text description */ +#define MAXstring 64 /* max leng of "string" types */ + +struct magic { + struct magic *next; /* link to next entry */ + int lineno; /* line number from magic file */ + + short flag; +#define INDIR 1 /* if '>(...)' appears, */ +#define UNSIGNED 2 /* comparison is unsigned */ + short cont_level; /* level of ">" */ + struct { + char type; /* byte short long */ + long offset; /* offset from indirection */ + } in; + long offset; /* offset to magic number */ + unsigned char reln; /* relation (0=eq, '>'=gt, etc) */ + char type; /* int, short, long or string. */ + char vallen; /* length of string value, if any */ +#define BYTE 1 +#define SHORT 2 +#define LONG 4 +#define STRING 5 +#define DATE 6 +#define BESHORT 7 +#define BELONG 8 +#define BEDATE 9 +#define LESHORT 10 +#define LELONG 11 +#define LEDATE 12 + union VALUETYPE { + unsigned char b; + unsigned short h; + unsigned long l; + char s[MAXstring]; + unsigned char hs[2]; /* 2 bytes of a fixed-endian "short" */ + unsigned char hl[4]; /* 2 bytes of a fixed-endian "long" */ + } value; /* either number or string */ + unsigned long mask; /* mask before comparison with value */ + char nospflag; /* suppress space character */ + + /* NOTE: this string is suspected of overrunning - find it! */ + char desc[MAXDESC]; /* description */ +}; + +/* + * data structures for tar file recognition + * -------------------------------------------------------------------------- + * Header file for public domain tar (tape archive) program. + * + * @(#)tar.h 1.20 86/10/29 Public Domain. Created 25 August 1985 by John + * Gilmore, ihnp4!hoptoad!gnu. + * + * Header block on tape. + * + * I'm going to use traditional DP naming conventions here. A "block" is a big + * chunk of stuff that we do I/O on. A "record" is a piece of info that we + * care about. Typically many "record"s fit into a "block". + */ +#define RECORDSIZE 512 +#define NAMSIZ 100 +#define TUNMLEN 32 +#define TGNMLEN 32 + +union record { + char charptr[RECORDSIZE]; + struct header { + char name[NAMSIZ]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char chksum[8]; + char linkflag; + char linkname[NAMSIZ]; + char magic[8]; + char uname[TUNMLEN]; + char gname[TGNMLEN]; + char devmajor[8]; + char devminor[8]; + } header; +}; + +/* The magic field is filled with this if uname and gname are valid. */ +#define TMAGIC "ustar " /* 7 chars and a null */ + +/* + * file-function prototypes + */ +static int ascmagic(request_rec *, unsigned char *, apr_size_t); +static int is_tar(unsigned char *, apr_size_t); +static int softmagic(request_rec *, unsigned char *, apr_size_t); +static int tryit(request_rec *, unsigned char *, apr_size_t, int); +static int zmagic(request_rec *, unsigned char *, apr_size_t); + +static int getvalue(server_rec *, struct magic *, char **); +static int hextoint(int); +static char *getstr(server_rec *, char *, char *, int, int *); +static int parse(server_rec *, apr_pool_t *p, char *, int); + +static int match(request_rec *, unsigned char *, apr_size_t); +static int mget(request_rec *, union VALUETYPE *, unsigned char *, + struct magic *, apr_size_t); +static int mcheck(request_rec *, union VALUETYPE *, struct magic *); +static void mprint(request_rec *, union VALUETYPE *, struct magic *); + +static int uncompress(request_rec *, int, + unsigned char **, apr_size_t); +static long from_oct(int, char *); +static int fsmagic(request_rec *r, const char *fn); + +/* + * includes for ASCII substring recognition formerly "names.h" in file + * command + * + * Original notes: names and types used by ascmagic in file(1). These tokens are + * here because they can appear anywhere in the first HOWMANY bytes, while + * tokens in /etc/magic must appear at fixed offsets into the file. Don't + * make HOWMANY too high unless you have a very fast CPU. + */ + +/* these types are used to index the apr_table_t 'types': keep em in sync! */ +/* HTML inserted in first because this is a web server module now */ +#define L_HTML 0 /* HTML */ +#define L_C 1 /* first and foremost on UNIX */ +#define L_FORT 2 /* the oldest one */ +#define L_MAKE 3 /* Makefiles */ +#define L_PLI 4 /* PL/1 */ +#define L_MACH 5 /* some kinda assembler */ +#define L_ENG 6 /* English */ +#define L_PAS 7 /* Pascal */ +#define L_MAIL 8 /* Electronic mail */ +#define L_NEWS 9 /* Usenet Netnews */ + +static const char *types[] = +{ + "text/html", /* HTML */ + "text/plain", /* "c program text", */ + "text/plain", /* "fortran program text", */ + "text/plain", /* "make commands text", */ + "text/plain", /* "pl/1 program text", */ + "text/plain", /* "assembler program text", */ + "text/plain", /* "English text", */ + "text/plain", /* "pascal program text", */ + "message/rfc822", /* "mail text", */ + "message/news", /* "news text", */ + "application/binary", /* "can't happen error on names.h/types", */ + 0 +}; + +static const struct names { + const char *name; + short type; +} names[] = { + + /* These must be sorted by eye for optimal hit rate */ + /* Add to this list only after substantial meditation */ + { + "<html>", L_HTML + }, + { + "<HTML>", L_HTML + }, + { + "<head>", L_HTML + }, + { + "<HEAD>", L_HTML + }, + { + "<title>", L_HTML + }, + { + "<TITLE>", L_HTML + }, + { + "<h1>", L_HTML + }, + { + "<H1>", L_HTML + }, + { + "<!--", L_HTML + }, + { + "<!DOCTYPE HTML", L_HTML + }, + { + "/*", L_C + }, /* must precede "The", "the", etc. */ + { + "#include", L_C + }, + { + "char", L_C + }, + { + "The", L_ENG + }, + { + "the", L_ENG + }, + { + "double", L_C + }, + { + "extern", L_C + }, + { + "float", L_C + }, + { + "real", L_C + }, + { + "struct", L_C + }, + { + "union", L_C + }, + { + "CFLAGS", L_MAKE + }, + { + "LDFLAGS", L_MAKE + }, + { + "all:", L_MAKE + }, + { + ".PRECIOUS", L_MAKE + }, + /* + * Too many files of text have these words in them. Find another way to + * recognize Fortrash. + */ +#ifdef NOTDEF + { + "subroutine", L_FORT + }, + { + "function", L_FORT + }, + { + "block", L_FORT + }, + { + "common", L_FORT + }, + { + "dimension", L_FORT + }, + { + "integer", L_FORT + }, + { + "data", L_FORT + }, +#endif /* NOTDEF */ + { + ".ascii", L_MACH + }, + { + ".asciiz", L_MACH + }, + { + ".byte", L_MACH + }, + { + ".even", L_MACH + }, + { + ".globl", L_MACH + }, + { + "clr", L_MACH + }, + { + "(input,", L_PAS + }, + { + "dcl", L_PLI + }, + { + "Received:", L_MAIL + }, + { + ">From", L_MAIL + }, + { + "Return-Path:", L_MAIL + }, + { + "Cc:", L_MAIL + }, + { + "Newsgroups:", L_NEWS + }, + { + "Path:", L_NEWS + }, + { + "Organization:", L_NEWS + }, + { + NULL, 0 + } +}; + +#define NNAMES ((sizeof(names)/sizeof(struct names)) - 1) + +/* + * Result String List (RSL) + * + * The file(1) command prints its output. Instead, we store the various + * "printed" strings in a list (allocating memory as we go) and concatenate + * them at the end when we finally know how much space they'll need. + */ + +typedef struct magic_rsl_s { + const char *str; /* string, possibly a fragment */ + struct magic_rsl_s *next; /* pointer to next fragment */ +} magic_rsl; + +/* + * Apache module configuration structures + */ + +/* per-server info */ +typedef struct { + const char *magicfile; /* where magic be found */ + struct magic *magic; /* head of magic config list */ + struct magic *last; +} magic_server_config_rec; + +/* per-request info */ +typedef struct { + magic_rsl *head; /* result string list */ + magic_rsl *tail; + unsigned suf_recursion; /* recursion depth in suffix check */ +} magic_req_rec; + +/* + * configuration functions - called by Apache API routines + */ + +module AP_MODULE_DECLARE_DATA mime_magic_module; + +static void *create_magic_server_config(apr_pool_t *p, server_rec *d) +{ + /* allocate the config - use pcalloc because it needs to be zeroed */ + return apr_pcalloc(p, sizeof(magic_server_config_rec)); +} + +static void *merge_magic_server_config(apr_pool_t *p, void *basev, void *addv) +{ + magic_server_config_rec *base = (magic_server_config_rec *) basev; + magic_server_config_rec *add = (magic_server_config_rec *) addv; + magic_server_config_rec *new = (magic_server_config_rec *) + apr_palloc(p, sizeof(magic_server_config_rec)); + + new->magicfile = add->magicfile ? add->magicfile : base->magicfile; + new->magic = NULL; + new->last = NULL; + return new; +} + +static const char *set_magicfile(cmd_parms *cmd, void *dummy, const char *arg) +{ + magic_server_config_rec *conf = (magic_server_config_rec *) + ap_get_module_config(cmd->server->module_config, + &mime_magic_module); + + if (!conf) { + return MODNAME ": server structure not allocated"; + } + conf->magicfile = arg; + return NULL; +} + +/* + * configuration file commands - exported to Apache API + */ + +static const command_rec mime_magic_cmds[] = +{ + AP_INIT_TAKE1("MimeMagicFile", set_magicfile, NULL, RSRC_CONF, + "Path to MIME Magic file (in file(1) format)"), + {NULL} +}; + +/* + * RSL (result string list) processing routines + * + * These collect strings that would have been printed in fragments by file(1) + * into a list of magic_rsl structures with the strings. When complete, + * they're concatenated together to become the MIME content and encoding + * types. + * + * return value conventions for these functions: functions which return int: + * failure = -1, other = result functions which return pointers: failure = 0, + * other = result + */ + +/* allocate a per-request structure and put it in the request record */ +static magic_req_rec *magic_set_config(request_rec *r) +{ + magic_req_rec *req_dat = (magic_req_rec *) apr_palloc(r->pool, + sizeof(magic_req_rec)); + + req_dat->head = req_dat->tail = (magic_rsl *) NULL; + ap_set_module_config(r->request_config, &mime_magic_module, req_dat); + return req_dat; +} + +/* add a string to the result string list for this request */ +/* it is the responsibility of the caller to allocate "str" */ +static int magic_rsl_add(request_rec *r, const char *str) +{ + magic_req_rec *req_dat = (magic_req_rec *) + ap_get_module_config(r->request_config, &mime_magic_module); + magic_rsl *rsl; + + /* make sure we have a list to put it in */ + if (!req_dat) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_EINVAL, r, APLOGNO(01507) + MODNAME ": request config should not be NULL"); + if (!(req_dat = magic_set_config(r))) { + /* failure */ + return -1; + } + } + + /* allocate the list entry */ + rsl = (magic_rsl *) apr_palloc(r->pool, sizeof(magic_rsl)); + + /* fill it */ + rsl->str = str; + rsl->next = (magic_rsl *) NULL; + + /* append to the list */ + if (req_dat->head && req_dat->tail) { + req_dat->tail->next = rsl; + req_dat->tail = rsl; + } + else { + req_dat->head = req_dat->tail = rsl; + } + + /* success */ + return 0; +} + +/* RSL hook for puts-type functions */ +static int magic_rsl_puts(request_rec *r, const char *str) +{ + return magic_rsl_add(r, str); +} + +/* RSL hook for printf-type functions */ +static int magic_rsl_printf(request_rec *r, char *str,...) +{ + va_list ap; + + char buf[MAXMIMESTRING]; + + /* assemble the string into the buffer */ + va_start(ap, str); + apr_vsnprintf(buf, sizeof(buf), str, ap); + va_end(ap); + + /* add the buffer to the list */ + return magic_rsl_add(r, apr_pstrdup(r->pool, buf)); +} + +/* RSL hook for putchar-type functions */ +static int magic_rsl_putchar(request_rec *r, char c) +{ + char str[2]; + + /* high overhead for 1 char - just hope they don't do this much */ + str[0] = c; + str[1] = '\0'; + return magic_rsl_add(r, str); +} + +/* allocate and copy a contiguous string from a result string list */ +static char *rsl_strdup(request_rec *r, int start_frag, int start_pos, int len) +{ + char *result; /* return value */ + int cur_frag, /* current fragment number/counter */ + cur_pos, /* current position within fragment */ + res_pos; /* position in result string */ + magic_rsl *frag; /* list-traversal pointer */ + magic_req_rec *req_dat = (magic_req_rec *) + ap_get_module_config(r->request_config, &mime_magic_module); + + /* allocate the result string */ + result = (char *) apr_palloc(r->pool, len + 1); + + /* loop through and collect the string */ + res_pos = 0; + for (frag = req_dat->head, cur_frag = 0; + frag->next; + frag = frag->next, cur_frag++) { + /* loop to the first fragment */ + if (cur_frag < start_frag) + continue; + + /* loop through and collect chars */ + for (cur_pos = (cur_frag == start_frag) ? start_pos : 0; + frag->str[cur_pos]; + cur_pos++) { + if (cur_frag >= start_frag + && cur_pos >= start_pos + && res_pos <= len) { + result[res_pos++] = frag->str[cur_pos]; + if (res_pos > len) { + break; + } + } + } + } + + /* clean up and return */ + result[res_pos] = 0; +#if MIME_MAGIC_DEBUG + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01508) + MODNAME ": rsl_strdup() %d chars: %s", res_pos - 1, result); +#endif + return result; +} + +/* states for the state-machine algorithm in magic_rsl_to_request() */ +typedef enum { + rsl_leading_space, rsl_type, rsl_subtype, rsl_separator, rsl_encoding +} rsl_states; + +/* process the RSL and set the MIME info in the request record */ +static int magic_rsl_to_request(request_rec *r) +{ + int cur_frag, /* current fragment number/counter */ + cur_pos, /* current position within fragment */ + type_frag, /* content type starting point: fragment */ + type_pos, /* content type starting point: position */ + type_len, /* content type length */ + encoding_frag, /* content encoding starting point: fragment */ + encoding_pos, /* content encoding starting point: position */ + encoding_len; /* content encoding length */ + + char *tmp; + magic_rsl *frag; /* list-traversal pointer */ + rsl_states state; + + magic_req_rec *req_dat = (magic_req_rec *) + ap_get_module_config(r->request_config, &mime_magic_module); + + /* check if we have a result */ + if (!req_dat || !req_dat->head) { + /* empty - no match, we defer to other Apache modules */ + return DECLINED; + } + + /* start searching for the type and encoding */ + state = rsl_leading_space; + type_frag = type_pos = type_len = 0; + encoding_frag = encoding_pos = encoding_len = 0; + for (frag = req_dat->head, cur_frag = 0; + frag && frag->next; + frag = frag->next, cur_frag++) { + /* loop through the characters in the fragment */ + for (cur_pos = 0; frag->str[cur_pos]; cur_pos++) { + if (apr_isspace(frag->str[cur_pos])) { + /* process whitespace actions for each state */ + if (state == rsl_leading_space) { + /* eat whitespace in this state */ + continue; + } + else if (state == rsl_type) { + /* whitespace: type has no slash! */ + return DECLINED; + } + else if (state == rsl_subtype) { + /* whitespace: end of MIME type */ + state++; + continue; + } + else if (state == rsl_separator) { + /* eat whitespace in this state */ + continue; + } + else if (state == rsl_encoding) { + /* whitespace: end of MIME encoding */ + /* we're done */ + frag = req_dat->tail; + break; + } + else { + /* should not be possible */ + /* abandon malfunctioning module */ + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01509) + MODNAME ": bad state %d (ws)", state); + return DECLINED; + } + /* NOTREACHED */ + } + else if (state == rsl_type && + frag->str[cur_pos] == '/') { + /* copy the char and go to rsl_subtype state */ + type_len++; + state++; + } + else { + /* process non-space actions for each state */ + if (state == rsl_leading_space) { + /* non-space: begin MIME type */ + state++; + type_frag = cur_frag; + type_pos = cur_pos; + type_len = 1; + continue; + } + else if (state == rsl_type || + state == rsl_subtype) { + /* non-space: adds to type */ + type_len++; + continue; + } + else if (state == rsl_separator) { + /* non-space: begin MIME encoding */ + state++; + encoding_frag = cur_frag; + encoding_pos = cur_pos; + encoding_len = 1; + continue; + } + else if (state == rsl_encoding) { + /* non-space: adds to encoding */ + encoding_len++; + continue; + } + else { + /* should not be possible */ + /* abandon malfunctioning module */ + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01510) + MODNAME ": bad state %d (ns)", state); + return DECLINED; + } + /* NOTREACHED */ + } + /* NOTREACHED */ + } + } + + /* if we ended prior to state rsl_subtype, we had incomplete info */ + if (state != rsl_subtype && state != rsl_separator && + state != rsl_encoding) { + /* defer to other modules */ + return DECLINED; + } + + /* save the info in the request record */ + tmp = rsl_strdup(r, type_frag, type_pos, type_len); + /* XXX: this could be done at config time I'm sure... but I'm + * confused by all this magic_rsl stuff. -djg */ + ap_content_type_tolower(tmp); + ap_set_content_type(r, tmp); + + if (state == rsl_encoding) { + tmp = rsl_strdup(r, encoding_frag, + encoding_pos, encoding_len); + /* XXX: this could be done at config time I'm sure... but I'm + * confused by all this magic_rsl stuff. -djg */ + ap_str_tolower(tmp); + r->content_encoding = tmp; + } + + /* detect memory allocation or other errors */ + if (!r->content_type || + (state == rsl_encoding && !r->content_encoding)) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01511) + MODNAME ": unexpected state %d; could be caused by bad " + "data in magic file", + state); + return HTTP_INTERNAL_SERVER_ERROR; + } + + /* success! */ + return OK; +} + +/* + * magic_process - process input file r Apache API request record + * (formerly called "process" in file command, prefix added for clarity) Opens + * the file and reads a fixed-size buffer to begin processing the contents. + */ +static int magic_process(request_rec *r) +{ + apr_file_t *fd = NULL; + unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */ + apr_size_t nbytes = 0; /* number of bytes read from a datafile */ + int result; + + /* + * first try judging the file based on its filesystem status + */ + switch ((result = fsmagic(r, r->filename))) { + case DONE: + magic_rsl_putchar(r, '\n'); + return OK; + case OK: + break; + default: + /* fatal error, bail out */ + return result; + } + + if (apr_file_open(&fd, r->filename, APR_READ, APR_OS_DEFAULT, r->pool) != APR_SUCCESS) { + /* We can't open it, but we were able to stat it. */ + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01512) + MODNAME ": can't read `%s'", r->filename); + /* let some other handler decide what the problem is */ + return DECLINED; + } + + /* + * try looking at the first HOWMANY bytes + */ + nbytes = sizeof(buf) - 1; + if ((result = apr_file_read(fd, (char *) buf, &nbytes)) != APR_SUCCESS) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, result, r, APLOGNO(01513) + MODNAME ": read failed: %s", r->filename); + return HTTP_INTERNAL_SERVER_ERROR; + } + + if (nbytes == 0) { + return DECLINED; + } + else { + buf[nbytes++] = '\0'; /* null-terminate it */ + result = tryit(r, buf, nbytes, 1); + if (result != OK) { + return result; + } + } + + (void) apr_file_close(fd); + (void) magic_rsl_putchar(r, '\n'); + + return OK; +} + + +static int tryit(request_rec *r, unsigned char *buf, apr_size_t nb, + int checkzmagic) +{ + /* + * Try compression stuff + */ + if (checkzmagic == 1) { + if (zmagic(r, buf, nb) == 1) + return OK; + } + + /* + * try tests in /etc/magic (or surrogate magic file) + */ + if (softmagic(r, buf, nb) == 1) + return OK; + + /* + * try known keywords, check for ascii-ness too. + */ + if (ascmagic(r, buf, nb) == 1) + return OK; + + /* + * abandon hope, all ye who remain here + */ + return DECLINED; +} + +#define EATAB {while (apr_isspace(*l)) ++l;} + +/* + * apprentice - load configuration from the magic file r + * API request record + */ +static int apprentice(server_rec *s, apr_pool_t *p) +{ + apr_file_t *f = NULL; + apr_status_t result; + char line[BUFSIZ + 1]; + int errs = 0; + int lineno; +#if MIME_MAGIC_DEBUG + int rule = 0; + struct magic *m, *prevm; +#endif + magic_server_config_rec *conf = (magic_server_config_rec *) + ap_get_module_config(s->module_config, &mime_magic_module); + const char *fname = ap_server_root_relative(p, conf->magicfile); + + if (!fname) { + ap_log_error(APLOG_MARK, APLOG_ERR, APR_EBADPATH, s, APLOGNO(01514) + MODNAME ": Invalid magic file path %s", conf->magicfile); + return -1; + } + if ((result = apr_file_open(&f, fname, APR_READ | APR_BUFFERED, + APR_OS_DEFAULT, p)) != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, result, s, APLOGNO(01515) + MODNAME ": can't read magic file %s", fname); + return -1; + } + + /* set up the magic list (empty) */ + conf->magic = conf->last = NULL; + + /* parse it */ + for (lineno = 1; apr_file_gets(line, BUFSIZ, f) == APR_SUCCESS; lineno++) { + int ws_offset; + char *last = line + strlen(line) - 1; /* guaranteed that len >= 1 since an + * "empty" line contains a '\n' + */ + + /* delete newline and any other trailing whitespace */ + while (last >= line + && apr_isspace(*last)) { + *last = '\0'; + --last; + } + + /* skip leading whitespace */ + ws_offset = 0; + while (line[ws_offset] && apr_isspace(line[ws_offset])) { + ws_offset++; + } + + /* skip blank lines */ + if (line[ws_offset] == 0) { + continue; + } + + /* comment, do not parse */ + if (line[ws_offset] == '#') + continue; + +#if MIME_MAGIC_DEBUG + /* if we get here, we're going to use it so count it */ + rule++; +#endif + + /* parse it */ + if (parse(s, p, line + ws_offset, lineno) != 0) + ++errs; + } + + (void) apr_file_close(f); + +#if MIME_MAGIC_DEBUG + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01516) + MODNAME ": apprentice conf=%x file=%s m=%s m->next=%s last=%s", + conf, + conf->magicfile ? conf->magicfile : "NULL", + conf->magic ? "set" : "NULL", + (conf->magic && conf->magic->next) ? "set" : "NULL", + conf->last ? "set" : "NULL"); + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01517) + MODNAME ": apprentice read %d lines, %d rules, %d errors", + lineno, rule, errs); +#endif + +#if MIME_MAGIC_DEBUG + prevm = 0; + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01518) + MODNAME ": apprentice test"); + for (m = conf->magic; m; m = m->next) { + if (apr_isprint((((unsigned long) m) >> 24) & 255) && + apr_isprint((((unsigned long) m) >> 16) & 255) && + apr_isprint((((unsigned long) m) >> 8) & 255) && + apr_isprint(((unsigned long) m) & 255)) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01519) + MODNAME ": apprentice: POINTER CLOBBERED! " + "m=\"%c%c%c%c\" line=%d", + (((unsigned long) m) >> 24) & 255, + (((unsigned long) m) >> 16) & 255, + (((unsigned long) m) >> 8) & 255, + ((unsigned long) m) & 255, + prevm ? prevm->lineno : -1); + break; + } + prevm = m; + } +#endif + + return (errs ? -1 : 0); +} + +/* + * extend the sign bit if the comparison is to be signed + */ +static unsigned long signextend(server_rec *s, struct magic *m, unsigned long v) +{ + if (!(m->flag & UNSIGNED)) + switch (m->type) { + /* + * Do not remove the casts below. They are vital. When later + * compared with the data, the sign extension must have happened. + */ + case BYTE: + v = (char) v; + break; + case SHORT: + case BESHORT: + case LESHORT: + v = (short) v; + break; + case DATE: + case BEDATE: + case LEDATE: + case LONG: + case BELONG: + case LELONG: + v = (long) v; + break; + case STRING: + break; + default: + ap_log_error(APLOG_MARK, APLOG_ERR, 0, s, APLOGNO(01520) + MODNAME ": can't happen: m->type=%d", m->type); + return -1; + } + return v; +} + +/* + * parse one line from magic file, put into magic[index++] if valid + */ +static int parse(server_rec *serv, apr_pool_t *p, char *l, int lineno) +{ + struct magic *m; + char *t, *s; + magic_server_config_rec *conf = (magic_server_config_rec *) + ap_get_module_config(serv->module_config, &mime_magic_module); + + /* allocate magic structure entry */ + m = (struct magic *) apr_pcalloc(p, sizeof(struct magic)); + + /* append to linked list */ + m->next = NULL; + if (!conf->magic || !conf->last) { + conf->magic = conf->last = m; + } + else { + conf->last->next = m; + conf->last = m; + } + + /* set values in magic structure */ + m->flag = 0; + m->cont_level = 0; + m->lineno = lineno; + + while (*l == '>') { + ++l; /* step over */ + m->cont_level++; + } + + if (m->cont_level != 0 && *l == '(') { + ++l; /* step over */ + m->flag |= INDIR; + } + + /* get offset, then skip over it */ + m->offset = (int) strtol(l, &t, 0); + if (l == t) { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01521) + MODNAME ": offset %s invalid", l); + } + l = t; + + if (m->flag & INDIR) { + m->in.type = LONG; + m->in.offset = 0; + /* + * read [.lbs][+-]nnnnn) + */ + if (*l == '.') { + switch (*++l) { + case 'l': + m->in.type = LONG; + break; + case 's': + m->in.type = SHORT; + break; + case 'b': + m->in.type = BYTE; + break; + default: + ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01522) + MODNAME ": indirect offset type %c invalid", *l); + break; + } + l++; + } + s = l; + if (*l == '+' || *l == '-') + l++; + if (apr_isdigit((unsigned char) *l)) { + m->in.offset = strtol(l, &t, 0); + if (*s == '-') + m->in.offset = -m->in.offset; + } + else + t = l; + if (*t++ != ')') { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01523) + MODNAME ": missing ')' in indirect offset"); + } + l = t; + } + + + while (apr_isdigit((unsigned char) *l)) + ++l; + EATAB; + +#define NBYTE 4 +#define NSHORT 5 +#define NLONG 4 +#define NSTRING 6 +#define NDATE 4 +#define NBESHORT 7 +#define NBELONG 6 +#define NBEDATE 6 +#define NLESHORT 7 +#define NLELONG 6 +#define NLEDATE 6 + + if (*l == 'u') { + ++l; + m->flag |= UNSIGNED; + } + + /* get type, skip it */ + if (strncmp(l, "byte", NBYTE) == 0) { + m->type = BYTE; + l += NBYTE; + } + else if (strncmp(l, "short", NSHORT) == 0) { + m->type = SHORT; + l += NSHORT; + } + else if (strncmp(l, "long", NLONG) == 0) { + m->type = LONG; + l += NLONG; + } + else if (strncmp(l, "string", NSTRING) == 0) { + m->type = STRING; + l += NSTRING; + } + else if (strncmp(l, "date", NDATE) == 0) { + m->type = DATE; + l += NDATE; + } + else if (strncmp(l, "beshort", NBESHORT) == 0) { + m->type = BESHORT; + l += NBESHORT; + } + else if (strncmp(l, "belong", NBELONG) == 0) { + m->type = BELONG; + l += NBELONG; + } + else if (strncmp(l, "bedate", NBEDATE) == 0) { + m->type = BEDATE; + l += NBEDATE; + } + else if (strncmp(l, "leshort", NLESHORT) == 0) { + m->type = LESHORT; + l += NLESHORT; + } + else if (strncmp(l, "lelong", NLELONG) == 0) { + m->type = LELONG; + l += NLELONG; + } + else if (strncmp(l, "ledate", NLEDATE) == 0) { + m->type = LEDATE; + l += NLEDATE; + } + else { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01524) + MODNAME ": type %s invalid", l); + return -1; + } + /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ + if (*l == '&') { + ++l; + m->mask = signextend(serv, m, strtol(l, &l, 0)); + } + else + m->mask = ~0L; + EATAB; + + switch (*l) { + case '>': + case '<': + /* Old-style anding: "0 byte &0x80 dynamically linked" */ + case '&': + case '^': + case '=': + m->reln = *l; + ++l; + break; + case '!': + if (m->type != STRING) { + m->reln = *l; + ++l; + break; + } + /* FALL THROUGH */ + default: + if (*l == 'x' && apr_isspace(l[1])) { + m->reln = *l; + ++l; + goto GetDesc; /* Bill The Cat */ + } + m->reln = '='; + break; + } + EATAB; + + if (getvalue(serv, m, &l)) + return -1; + /* + * now get last part - the description + */ + GetDesc: + EATAB; + if (l[0] == '\b') { + ++l; + m->nospflag = 1; + } + else if ((l[0] == '\\') && (l[1] == 'b')) { + ++l; + ++l; + m->nospflag = 1; + } + else + m->nospflag = 0; + apr_cpystrn(m->desc, l, sizeof(m->desc)); + +#if MIME_MAGIC_DEBUG + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, serv, APLOGNO(01525) + MODNAME ": parse line=%d m=%x next=%x cont=%d desc=%s", + lineno, m, m->next, m->cont_level, m->desc); +#endif /* MIME_MAGIC_DEBUG */ + + return 0; +} + +/* + * Read a numeric value from a pointer, into the value union of a magic + * pointer, according to the magic type. Update the string pointer to point + * just after the number read. Return 0 for success, non-zero for failure. + */ +static int getvalue(server_rec *s, struct magic *m, char **p) +{ + int slen; + + if (m->type == STRING) { + *p = getstr(s, *p, m->value.s, sizeof(m->value.s), &slen); + m->vallen = slen; + } + else if (m->reln != 'x') + m->value.l = signextend(s, m, strtol(*p, p, 0)); + return 0; +} + +/* + * Convert a string containing C character escapes. Stop at an unescaped + * space or tab. Copy the converted version to "p", returning its length in + * *slen. Return updated scan pointer as function result. + */ +static char *getstr(server_rec *serv, register char *s, register char *p, + int plen, int *slen) +{ + char *origs = s, *origp = p; + char *pmax = p + plen - 1; + register int c; + register int val; + + while ((c = *s++) != '\0') { + if (apr_isspace(c)) + break; + if (p >= pmax) { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01526) + MODNAME ": string too long: %s", origs); + break; + } + if (c == '\\') { + switch (c = *s++) { + + case '\0': + goto out; + + default: + *p++ = (char) c; + break; + + case 'n': + *p++ = '\n'; + break; + + case 'r': + *p++ = '\r'; + break; + + case 'b': + *p++ = '\b'; + break; + + case 't': + *p++ = '\t'; + break; + + case 'f': + *p++ = '\f'; + break; + + case 'v': + *p++ = '\v'; + break; + + /* \ and up to 3 octal digits */ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + val = c - '0'; + c = *s++; /* try for 2 */ + if (c >= '0' && c <= '7') { + val = (val << 3) | (c - '0'); + c = *s++; /* try for 3 */ + if (c >= '0' && c <= '7') + val = (val << 3) | (c - '0'); + else + --s; + } + else + --s; + *p++ = (char) val; + break; + + /* \x and up to 3 hex digits */ + case 'x': + val = 'x'; /* Default if no digits */ + c = hextoint(*s++); /* Get next char */ + if (c >= 0) { + val = c; + c = hextoint(*s++); + if (c >= 0) { + val = (val << 4) + c; + c = hextoint(*s++); + if (c >= 0) { + val = (val << 4) + c; + } + else + --s; + } + else + --s; + } + else + --s; + *p++ = (char) val; + break; + } + } + else + *p++ = (char) c; + } + out: + *p = '\0'; + *slen = p - origp; + return s; +} + + +/* Single hex char to int; -1 if not a hex char. */ +static int hextoint(int c) +{ + if (apr_isdigit(c)) + return c - '0'; + if ((c >= 'a') && (c <= 'f')) + return c + 10 - 'a'; + if ((c >= 'A') && (c <= 'F')) + return c + 10 - 'A'; + return -1; +} + + +/* + * return DONE to indicate it's been handled + * return OK to indicate it's a regular file still needing handling + * other returns indicate a failure of some sort + */ +static int fsmagic(request_rec *r, const char *fn) +{ + switch (r->finfo.filetype) { + case APR_DIR: + magic_rsl_puts(r, DIR_MAGIC_TYPE); + return DONE; + case APR_CHR: + /* + * (void) magic_rsl_printf(r,"character special (%d/%d)", + * major(sb->st_rdev), minor(sb->st_rdev)); + */ + (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN); + return DONE; + case APR_BLK: + /* + * (void) magic_rsl_printf(r,"block special (%d/%d)", + * major(sb->st_rdev), minor(sb->st_rdev)); + */ + (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN); + return DONE; + /* TODO add code to handle V7 MUX and Blit MUX files */ + case APR_PIPE: + /* + * magic_rsl_puts(r,"fifo (named pipe)"); + */ + (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN); + return DONE; + case APR_LNK: + /* We used stat(), the only possible reason for this is that the + * symlink is broken. + */ + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01527) + MODNAME ": broken symlink (%s)", fn); + return HTTP_INTERNAL_SERVER_ERROR; + case APR_SOCK: + magic_rsl_puts(r, MIME_BINARY_UNKNOWN); + return DONE; + case APR_REG: + break; + default: + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01528) + MODNAME ": invalid file type %d.", r->finfo.filetype); + return HTTP_INTERNAL_SERVER_ERROR; + } + + /* + * regular file, check next possibility + */ + if (r->finfo.size == 0) { + magic_rsl_puts(r, MIME_TEXT_UNKNOWN); + return DONE; + } + return OK; +} + +/* + * softmagic - lookup one file in database (already read from /etc/magic by + * apprentice.c). Passed the name and FILE * of one file to be typed. + */ + /* ARGSUSED1 *//* nbytes passed for regularity, maybe need later */ +static int softmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes) +{ + if (match(r, buf, nbytes)) + return 1; + + return 0; +} + +/* + * Go through the whole list, stopping if you find a match. Process all the + * continuations of that match before returning. + * + * We support multi-level continuations: + * + * At any time when processing a successful top-level match, there is a current + * continuation level; it represents the level of the last successfully + * matched continuation. + * + * Continuations above that level are skipped as, if we see one, it means that + * the continuation that controls them - i.e, the lower-level continuation + * preceding them - failed to match. + * + * Continuations below that level are processed as, if we see one, it means + * we've finished processing or skipping higher-level continuations under the + * control of a successful or unsuccessful lower-level continuation, and are + * now seeing the next lower-level continuation and should process it. The + * current continuation level reverts to the level of the one we're seeing. + * + * Continuations at the current level are processed as, if we see one, there's + * no lower-level continuation that may have failed. + * + * If a continuation matches, we bump the current continuation level so that + * higher-level continuations are processed. + */ +static int match(request_rec *r, unsigned char *s, apr_size_t nbytes) +{ +#if MIME_MAGIC_DEBUG + int rule_counter = 0; +#endif + int cont_level = 0; + int need_separator = 0; + union VALUETYPE p; + magic_server_config_rec *conf = (magic_server_config_rec *) + ap_get_module_config(r->server->module_config, &mime_magic_module); + struct magic *m; + +#if MIME_MAGIC_DEBUG + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01529) + MODNAME ": match conf=%x file=%s m=%s m->next=%s last=%s", + conf, + conf->magicfile ? conf->magicfile : "NULL", + conf->magic ? "set" : "NULL", + (conf->magic && conf->magic->next) ? "set" : "NULL", + conf->last ? "set" : "NULL"); +#endif + +#if MIME_MAGIC_DEBUG + for (m = conf->magic; m; m = m->next) { + if (apr_isprint((((unsigned long) m) >> 24) & 255) && + apr_isprint((((unsigned long) m) >> 16) & 255) && + apr_isprint((((unsigned long) m) >> 8) & 255) && + apr_isprint(((unsigned long) m) & 255)) { + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01530) + MODNAME ": match: POINTER CLOBBERED! " + "m=\"%c%c%c%c\"", + (((unsigned long) m) >> 24) & 255, + (((unsigned long) m) >> 16) & 255, + (((unsigned long) m) >> 8) & 255, + ((unsigned long) m) & 255); + break; + } + } +#endif + + for (m = conf->magic; m; m = m->next) { +#if MIME_MAGIC_DEBUG + rule_counter++; + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01531) + MODNAME ": line=%d desc=%s", m->lineno, m->desc); +#endif + + /* check if main entry matches */ + if (!mget(r, &p, s, m, nbytes) || + !mcheck(r, &p, m)) { + struct magic *m_cont; + + /* + * main entry didn't match, flush its continuations + */ + if (!m->next || (m->next->cont_level == 0)) { + continue; + } + + m_cont = m->next; + while (m_cont && (m_cont->cont_level != 0)) { +#if MIME_MAGIC_DEBUG + rule_counter++; + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01532) + MODNAME ": line=%d mc=%x mc->next=%x cont=%d desc=%s", + m_cont->lineno, m_cont, + m_cont->next, m_cont->cont_level, + m_cont->desc); +#endif + /* + * this trick allows us to keep *m in sync when the continue + * advances the pointer + */ + m = m_cont; + m_cont = m_cont->next; + } + continue; + } + + /* if we get here, the main entry rule was a match */ + /* this will be the last run through the loop */ +#if MIME_MAGIC_DEBUG + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01533) + MODNAME ": rule matched, line=%d type=%d %s", + m->lineno, m->type, + (m->type == STRING) ? m->value.s : ""); +#endif + + /* print the match */ + mprint(r, &p, m); + + /* + * If we printed something, we'll need to print a blank before we + * print something else. + */ + if (m->desc[0]) + need_separator = 1; + /* and any continuations that match */ + cont_level++; + /* + * while (m && m->next && m->next->cont_level != 0 && ( m = m->next + * )) + */ + m = m->next; + while (m && (m->cont_level != 0)) { +#if MIME_MAGIC_DEBUG + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01534) + MODNAME ": match line=%d cont=%d type=%d %s", + m->lineno, m->cont_level, m->type, + (m->type == STRING) ? m->value.s : ""); +#endif + if (cont_level >= m->cont_level) { + if (cont_level > m->cont_level) { + /* + * We're at the end of the level "cont_level" + * continuations. + */ + cont_level = m->cont_level; + } + if (mget(r, &p, s, m, nbytes) && + mcheck(r, &p, m)) { + /* + * This continuation matched. Print its message, with a + * blank before it if the previous item printed and this + * item isn't empty. + */ + /* space if previous printed */ + if (need_separator + && (m->nospflag == 0) + && (m->desc[0] != '\0') + ) { + (void) magic_rsl_putchar(r, ' '); + need_separator = 0; + } + mprint(r, &p, m); + if (m->desc[0]) + need_separator = 1; + + /* + * If we see any continuations at a higher level, process + * them. + */ + cont_level++; + } + } + + /* move to next continuation record */ + m = m->next; + } +#if MIME_MAGIC_DEBUG + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01535) + MODNAME ": matched after %d rules", rule_counter); +#endif + return 1; /* all through */ + } +#if MIME_MAGIC_DEBUG + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01536) + MODNAME ": failed after %d rules", rule_counter); +#endif + return 0; /* no match at all */ +} + +static void mprint(request_rec *r, union VALUETYPE *p, struct magic *m) +{ + char *pp; + unsigned long v; + char time_str[APR_CTIME_LEN]; + + switch (m->type) { + case BYTE: + v = p->b; + break; + + case SHORT: + case BESHORT: + case LESHORT: + v = p->h; + break; + + case LONG: + case BELONG: + case LELONG: + v = p->l; + break; + + case STRING: + if (m->reln == '=') { + (void) magic_rsl_printf(r, m->desc, m->value.s); + } + else { + (void) magic_rsl_printf(r, m->desc, p->s); + } + return; + + case DATE: + case BEDATE: + case LEDATE: + apr_ctime(time_str, apr_time_from_sec(*(time_t *)&p->l)); + pp = time_str; + (void) magic_rsl_printf(r, m->desc, pp); + return; + default: + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01537) + MODNAME ": invalid m->type (%d) in mprint().", + m->type); + return; + } + + v = signextend(r->server, m, v) & m->mask; + (void) magic_rsl_printf(r, m->desc, (unsigned long) v); +} + +/* + * Convert the byte order of the data we are looking at + */ +static int mconvert(request_rec *r, union VALUETYPE *p, struct magic *m) +{ + char *rt; + + switch (m->type) { + case BYTE: + case SHORT: + case LONG: + case DATE: + return 1; + case STRING: + /* Null terminate and eat the return */ + p->s[sizeof(p->s) - 1] = '\0'; + if ((rt = strchr(p->s, '\n')) != NULL) + *rt = '\0'; + return 1; + case BESHORT: + p->h = (short) ((p->hs[0] << 8) | (p->hs[1])); + return 1; + case BELONG: + case BEDATE: + p->l = (long) + ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3])); + return 1; + case LESHORT: + p->h = (short) ((p->hs[1] << 8) | (p->hs[0])); + return 1; + case LELONG: + case LEDATE: + p->l = (long) + ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0])); + return 1; + default: + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01538) + MODNAME ": invalid type %d in mconvert().", m->type); + return 0; + } +} + + +static int mget(request_rec *r, union VALUETYPE *p, unsigned char *s, + struct magic *m, apr_size_t nbytes) +{ + long offset = m->offset; + + if (offset + sizeof(union VALUETYPE) > nbytes) + return 0; + + memcpy(p, s + offset, sizeof(union VALUETYPE)); + + if (!mconvert(r, p, m)) + return 0; + + if (m->flag & INDIR) { + + switch (m->in.type) { + case BYTE: + offset = p->b + m->in.offset; + break; + case SHORT: + offset = p->h + m->in.offset; + break; + case LONG: + offset = p->l + m->in.offset; + break; + } + + if (offset + sizeof(union VALUETYPE) > nbytes) + return 0; + + memcpy(p, s + offset, sizeof(union VALUETYPE)); + + if (!mconvert(r, p, m)) + return 0; + } + return 1; +} + +static int mcheck(request_rec *r, union VALUETYPE *p, struct magic *m) +{ + register unsigned long l = m->value.l; + register unsigned long v; + int matched; + + if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01539) + MODNAME ": BOINK"); + return 1; + } + + switch (m->type) { + case BYTE: + v = p->b; + break; + + case SHORT: + case BESHORT: + case LESHORT: + v = p->h; + break; + + case LONG: + case BELONG: + case LELONG: + case DATE: + case BEDATE: + case LEDATE: + v = p->l; + break; + + case STRING: + l = 0; + /* + * What we want here is: v = strncmp(m->value.s, p->s, m->vallen); + * but ignoring any nulls. bcmp doesn't give -/+/0 and isn't + * universally available anyway. + */ + v = 0; + { + register unsigned char *a = (unsigned char *) m->value.s; + register unsigned char *b = (unsigned char *) p->s; + register int len = m->vallen; + + while (--len >= 0) + if ((v = *b++ - *a++) != 0) + break; + } + break; + default: + /* bogosity, pretend that it just wasn't a match */ + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01540) + MODNAME ": invalid type %d in mcheck().", m->type); + return 0; + } + + v = signextend(r->server, m, v) & m->mask; + + switch (m->reln) { + case 'x': +#if MIME_MAGIC_DEBUG + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01541) + "%lu == *any* = 1", v); +#endif + matched = 1; + break; + + case '!': + matched = v != l; +#if MIME_MAGIC_DEBUG + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01542) + "%lu != %lu = %d", v, l, matched); +#endif + break; + + case '=': + matched = v == l; +#if MIME_MAGIC_DEBUG + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01543) + "%lu == %lu = %d", v, l, matched); +#endif + break; + + case '>': + if (m->flag & UNSIGNED) { + matched = v > l; +#if MIME_MAGIC_DEBUG + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01544) + "%lu > %lu = %d", v, l, matched); +#endif + } + else { + matched = (long) v > (long) l; +#if MIME_MAGIC_DEBUG + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01545) + "%ld > %ld = %d", v, l, matched); +#endif + } + break; + + case '<': + if (m->flag & UNSIGNED) { + matched = v < l; +#if MIME_MAGIC_DEBUG + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01546) + "%lu < %lu = %d", v, l, matched); +#endif + } + else { + matched = (long) v < (long) l; +#if MIME_MAGIC_DEBUG + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01547) + "%ld < %ld = %d", v, l, matched); +#endif + } + break; + + case '&': + matched = (v & l) == l; +#if MIME_MAGIC_DEBUG + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01548) + "((%lx & %lx) == %lx) = %d", v, l, l, matched); +#endif + break; + + case '^': + matched = (v & l) != l; +#if MIME_MAGIC_DEBUG + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01549) + "((%lx & %lx) != %lx) = %d", v, l, l, matched); +#endif + break; + + default: + /* bogosity, pretend it didn't match */ + matched = 0; + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01550) + MODNAME ": mcheck: can't happen: invalid relation %d.", + m->reln); + break; + } + + return matched; +} + +/* an optimization over plain strcmp() */ +#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0) + +static int ascmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes) +{ + int has_escapes = 0; + unsigned char *s; + char nbuf[SMALL_HOWMANY + 1]; /* one extra for terminating '\0' */ + char *token; + const struct names *p; + int small_nbytes; + char *strtok_state; + + /* these are easy, do them first */ + + /* + * for troff, look for . + letter + letter or .\"; this must be done to + * disambiguate tar archives' ./file and other trash from real troff + * input. + */ + if (*buf == '.') { + unsigned char *tp = buf + 1; + + while (apr_isspace(*tp)) + ++tp; /* skip leading whitespace */ + if ((apr_isalnum(*tp) || *tp == '\\') && + (apr_isalnum(*(tp + 1)) || *tp == '"')) { + magic_rsl_puts(r, "application/x-troff"); + return 1; + } + } + if ((*buf == 'c' || *buf == 'C') && apr_isspace(*(buf + 1))) { + /* Fortran */ + magic_rsl_puts(r, "text/plain"); + return 1; + } + + /* look for tokens from names.h - this is expensive!, so we'll limit + * ourselves to only SMALL_HOWMANY bytes */ + small_nbytes = (nbytes > SMALL_HOWMANY) ? SMALL_HOWMANY : nbytes; + /* make a copy of the buffer here because apr_strtok() will destroy it */ + s = (unsigned char *) memcpy(nbuf, buf, small_nbytes); + s[small_nbytes] = '\0'; + has_escapes = (memchr(s, '\033', small_nbytes) != NULL); + while ((token = apr_strtok((char *) s, " \t\n\r\f", &strtok_state)) != NULL) { + s = NULL; /* make apr_strtok() keep on tokin' */ + for (p = names; p < names + NNAMES; p++) { + if (STREQ(p->name, token)) { + magic_rsl_puts(r, types[p->type]); + if (has_escapes) + magic_rsl_puts(r, " (with escape sequences)"); + return 1; + } + } + } + + switch (is_tar(buf, nbytes)) { + case 1: + /* V7 tar archive */ + magic_rsl_puts(r, "application/x-tar"); + return 1; + case 2: + /* POSIX tar archive */ + magic_rsl_puts(r, "application/x-tar"); + return 1; + } + + /* all else fails, but it is ascii... */ + return 0; +} + + +/* + * compress routines: zmagic() - returns 0 if not recognized, uncompresses + * and prints information if recognized uncompress(s, method, old, n, newch) + * - uncompress old into new, using method, return sizeof new + */ + +static struct { + char *magic; + apr_size_t maglen; + char *argv[3]; + int silent; + char *encoding; /* MUST be lowercase */ +} compr[] = { + + /* we use gzip here rather than uncompress because we have to pass + * it a full filename -- and uncompress only considers filenames + * ending with .Z + */ + { + "\037\235", 2, { + "gzip", "-dcq", NULL + }, 0, "x-compress" + }, + { + "\037\213", 2, { + "gzip", "-dcq", NULL + }, 1, "x-gzip" + }, + /* + * XXX pcat does not work, cause I don't know how to make it read stdin, + * so we use gzip + */ + { + "\037\036", 2, { + "gzip", "-dcq", NULL + }, 0, "x-gzip" + }, +}; + +static int ncompr = sizeof(compr) / sizeof(compr[0]); + +static int zmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes) +{ + unsigned char *newbuf; + int newsize; + int i; + + for (i = 0; i < ncompr; i++) { + if (nbytes < compr[i].maglen) + continue; + if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0) + break; + } + + if (i == ncompr) + return 0; + + if ((newsize = uncompress(r, i, &newbuf, HOWMANY)) > 0) { + /* set encoding type in the request record */ + r->content_encoding = compr[i].encoding; + + newbuf[newsize-1] = '\0'; /* null-terminate uncompressed data */ + /* Try to detect the content type of the uncompressed data */ + if (tryit(r, newbuf, newsize, 0) != OK) { + return 0; + } + } + return 1; +} + + +struct uncompress_parms { + request_rec *r; + int method; +}; + +static int create_uncompress_child(struct uncompress_parms *parm, apr_pool_t *cntxt, + apr_file_t **pipe_in) +{ + int rc = 1; + const char *new_argv[4]; + request_rec *r = parm->r; + apr_pool_t *child_context = cntxt; + apr_procattr_t *procattr; + apr_proc_t *procnew; + + /* XXX missing 1.3 logic: + * + * what happens when !compr[parm->method].silent? + * Should we create the err pipe, read it, and copy to the log? + */ + + if ((apr_procattr_create(&procattr, child_context) != APR_SUCCESS) || + (apr_procattr_io_set(procattr, APR_FULL_BLOCK, + APR_FULL_BLOCK, APR_NO_PIPE) != APR_SUCCESS) || + (apr_procattr_dir_set(procattr, + ap_make_dirstr_parent(r->pool, r->filename)) != APR_SUCCESS) || + (apr_procattr_cmdtype_set(procattr, APR_PROGRAM_PATH) != APR_SUCCESS)) { + /* Something bad happened, tell the world. */ + ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_ENOPROC, r, APLOGNO(01551) + "couldn't setup child process: %s", r->filename); + } + else { + new_argv[0] = compr[parm->method].argv[0]; + new_argv[1] = compr[parm->method].argv[1]; + new_argv[2] = r->filename; + new_argv[3] = NULL; + + procnew = apr_pcalloc(child_context, sizeof(*procnew)); + rc = apr_proc_create(procnew, compr[parm->method].argv[0], + new_argv, NULL, procattr, child_context); + + if (rc != APR_SUCCESS) { + /* Bad things happened. Everyone should have cleaned up. */ + ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_ENOPROC, r, APLOGNO(01552) + MODNAME ": could not execute `%s'.", + compr[parm->method].argv[0]); + } + else { + apr_pool_note_subprocess(child_context, procnew, APR_KILL_AFTER_TIMEOUT); + *pipe_in = procnew->out; + } + } + + return (rc); +} + +static int uncompress(request_rec *r, int method, + unsigned char **newch, apr_size_t n) +{ + struct uncompress_parms parm; + apr_file_t *pipe_out = NULL; + apr_pool_t *sub_context; + apr_status_t rv; + + parm.r = r; + parm.method = method; + + /* We make a sub_pool so that we can collect our child early, otherwise + * there are cases (i.e. generating directory indicies with mod_autoindex) + * where we would end up with LOTS of zombies. + */ + if (apr_pool_create(&sub_context, r->pool) != APR_SUCCESS) + return -1; + + if ((rv = create_uncompress_child(&parm, sub_context, &pipe_out)) != APR_SUCCESS) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(01553) + MODNAME ": couldn't spawn uncompress process: %s", r->uri); + return -1; + } + + *newch = (unsigned char *) apr_palloc(r->pool, n); + rv = apr_file_read(pipe_out, *newch, &n); + if (n == 0) { + apr_pool_destroy(sub_context); + ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(01554) + MODNAME ": read failed from uncompress of %s", r->filename); + return -1; + } + apr_pool_destroy(sub_context); + return n; +} + +/* + * is_tar() -- figure out whether file is a tar archive. + * + * Stolen (by author of file utility) from the public domain tar program: Public + * Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu). + * + * @(#)list.c 1.18 9/23/86 Public Domain - gnu $Id: mod_mime_magic.c,v 1.7 + * 1997/06/24 00:41:02 ikluft Exp ikluft $ + * + * Comments changed and some code/comments reformatted for file command by Ian + * Darwin. + */ + +#define isodigit(c) (((unsigned char)(c) >= '0') && ((unsigned char)(c) <= '7')) + +/* + * Return 0 if the checksum is bad (i.e., probably not a tar archive), 1 for + * old UNIX tar file, 2 for Unix Std (POSIX) tar file. + */ + +static int is_tar(unsigned char *buf, apr_size_t nbytes) +{ + register union record *header = (union record *) buf; + register int i; + register long sum, recsum; + register char *p; + + if (nbytes < sizeof(union record)) + return 0; + + recsum = from_oct(8, header->header.chksum); + + sum = 0; + p = header->charptr; + for (i = sizeof(union record); --i >= 0;) { + /* + * We can't use unsigned char here because of old compilers, e.g. V7. + */ + sum += 0xFF & *p++; + } + + /* Adjust checksum to count the "chksum" field as blanks. */ + for (i = sizeof(header->header.chksum); --i >= 0;) + sum -= 0xFF & header->header.chksum[i]; + sum += ' ' * sizeof header->header.chksum; + + if (sum != recsum) + return 0; /* Not a tar archive */ + + if (0 == strcmp(header->header.magic, TMAGIC)) + return 2; /* Unix Standard tar archive */ + + return 1; /* Old fashioned tar archive */ +} + + +/* + * Quick and dirty octal conversion. + * + * Result is -1 if the field is invalid (all blank, or nonoctal). + */ +static long from_oct(int digs, char *where) +{ + register long value; + + while (apr_isspace(*where)) { /* Skip spaces */ + where++; + if (--digs <= 0) + return -1; /* All blank field */ + } + value = 0; + while (digs > 0 && isodigit(*where)) { /* Scan til nonoctal */ + value = (value << 3) | (*where++ - '0'); + --digs; + } + + if (digs > 0 && *where && !apr_isspace(*where)) + return -1; /* Ended on non-space/nul */ + + return value; +} + +/* + * Check for file-revision suffix + * + * This is for an obscure document control system used on an intranet. + * The web representation of each file's revision has an @1, @2, etc + * appended with the revision number. This needs to be stripped off to + * find the file suffix, which can be recognized by sending the name back + * through a sub-request. The base file name (without the @num suffix) + * must exist because its type will be used as the result. + */ +static int revision_suffix(request_rec *r) +{ + int suffix_pos, result; + char *sub_filename; + request_rec *sub; + +#if MIME_MAGIC_DEBUG + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01555) + MODNAME ": revision_suffix checking %s", r->filename); +#endif /* MIME_MAGIC_DEBUG */ + + /* check for recognized revision suffix */ + suffix_pos = strlen(r->filename) - 1; + if (!apr_isdigit(r->filename[suffix_pos])) { + return 0; + } + while (suffix_pos >= 0 && apr_isdigit(r->filename[suffix_pos])) + suffix_pos--; + if (suffix_pos < 0 || r->filename[suffix_pos] != '@') { + return 0; + } + + /* perform sub-request for the file name without the suffix */ + result = 0; + sub_filename = apr_pstrndup(r->pool, r->filename, suffix_pos); +#if MIME_MAGIC_DEBUG + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01556) + MODNAME ": subrequest lookup for %s", sub_filename); +#endif /* MIME_MAGIC_DEBUG */ + sub = ap_sub_req_lookup_file(sub_filename, r, NULL); + + /* extract content type/encoding/language from sub-request */ + if (sub->content_type) { + ap_set_content_type(r, apr_pstrdup(r->pool, sub->content_type)); +#if MIME_MAGIC_DEBUG + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01557) + MODNAME ": subrequest %s got %s", + sub_filename, r->content_type); +#endif /* MIME_MAGIC_DEBUG */ + if (sub->content_encoding) + r->content_encoding = + apr_pstrdup(r->pool, sub->content_encoding); + if (sub->content_languages) { + int n; + r->content_languages = apr_array_copy(r->pool, + sub->content_languages); + for (n = 0; n < r->content_languages->nelts; ++n) { + char **lang = ((char **)r->content_languages->elts) + n; + *lang = apr_pstrdup(r->pool, *lang); + } + } + result = 1; + } + + /* clean up */ + ap_destroy_sub_req(sub); + + return result; +} + +/* + * initialize the module + */ +static int magic_init(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *main_server) +{ + int result; + magic_server_config_rec *conf; + magic_server_config_rec *main_conf; + server_rec *s; +#if MIME_MAGIC_DEBUG + struct magic *m, *prevm; +#endif /* MIME_MAGIC_DEBUG */ + + main_conf = ap_get_module_config(main_server->module_config, &mime_magic_module); + for (s = main_server; s; s = s->next) { + conf = ap_get_module_config(s->module_config, &mime_magic_module); + if (conf->magicfile == NULL && s != main_server) { + /* inherits from the parent */ + *conf = *main_conf; + } + else if (conf->magicfile) { + result = apprentice(s, p); + if (result == -1) + return OK; +#if MIME_MAGIC_DEBUG + prevm = 0; + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01558) + MODNAME ": magic_init 1 test"); + for (m = conf->magic; m; m = m->next) { + if (apr_isprint((((unsigned long) m) >> 24) & 255) && + apr_isprint((((unsigned long) m) >> 16) & 255) && + apr_isprint((((unsigned long) m) >> 8) & 255) && + apr_isprint(((unsigned long) m) & 255)) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01559) + MODNAME ": magic_init 1: POINTER CLOBBERED! " + "m=\"%c%c%c%c\" line=%d", + (((unsigned long) m) >> 24) & 255, + (((unsigned long) m) >> 16) & 255, + (((unsigned long) m) >> 8) & 255, + ((unsigned long) m) & 255, + prevm ? prevm->lineno : -1); + break; + } + prevm = m; + } +#endif + } + } + return OK; +} + +/* + * Find the Content-Type from any resource this module has available + */ + +static int magic_find_ct(request_rec *r) +{ + int result; + magic_server_config_rec *conf; + + /* the file has to exist */ + if (r->finfo.filetype == APR_NOFILE || !r->filename) { + return DECLINED; + } + + /* was someone else already here? */ + if (r->content_type) { + return DECLINED; + } + + conf = ap_get_module_config(r->server->module_config, &mime_magic_module); + if (!conf || !conf->magic) { + return DECLINED; + } + + /* initialize per-request info */ + if (!magic_set_config(r)) { + return HTTP_INTERNAL_SERVER_ERROR; + } + + /* try excluding file-revision suffixes */ + if (revision_suffix(r) != 1) { + /* process it based on the file contents */ + if ((result = magic_process(r)) != OK) { + return result; + } + } + + /* if we have any results, put them in the request structure */ + return magic_rsl_to_request(r); +} + +static void register_hooks(apr_pool_t *p) +{ + static const char * const aszPre[]={ "mod_mime.c", NULL }; + + /* mod_mime_magic should be run after mod_mime, if at all. */ + + ap_hook_type_checker(magic_find_ct, aszPre, NULL, APR_HOOK_MIDDLE); + ap_hook_post_config(magic_init, NULL, NULL, APR_HOOK_FIRST); +} + +/* + * Apache API module interface + */ + +AP_DECLARE_MODULE(mime_magic) = +{ + STANDARD20_MODULE_STUFF, + NULL, /* dir config creator */ + NULL, /* dir merger --- default is to override */ + create_magic_server_config, /* server config */ + merge_magic_server_config, /* merge server config */ + mime_magic_cmds, /* command apr_table_t */ + register_hooks /* register hooks */ +}; |