diff options
author | Hartmut Holzgraefe <hholzgra@php.net> | 2002-03-24 23:00:47 +0000 |
---|---|---|
committer | Hartmut Holzgraefe <hholzgra@php.net> | 2002-03-24 23:00:47 +0000 |
commit | 2c413f116aaf83eed98ba0c303e138d86a17633c (patch) | |
tree | 4423f6671b0a78075899b8ba4b9e7710f9639cde | |
parent | 0f3b3045062bc392700f1d630228917679106c6a (diff) | |
download | php-git-2c413f116aaf83eed98ba0c303e138d86a17633c.tar.gz |
1st implementation of magic-file based mime_content_type(string filename)
-rw-r--r-- | ext/mime_magic/CREDITS | 2 | ||||
-rw-r--r-- | ext/mime_magic/EXPERIMENTAL | 0 | ||||
-rw-r--r-- | ext/mime_magic/config.m4 | 11 | ||||
-rw-r--r-- | ext/mime_magic/mime_magic.c | 2120 | ||||
-rw-r--r-- | ext/mime_magic/php_mime_magic.h | 454 |
5 files changed, 2587 insertions, 0 deletions
diff --git a/ext/mime_magic/CREDITS b/ext/mime_magic/CREDITS new file mode 100644 index 0000000000..ce54609ecc --- /dev/null +++ b/ext/mime_magic/CREDITS @@ -0,0 +1,2 @@ +mime_magic +Hartmut Holzgraefe diff --git a/ext/mime_magic/EXPERIMENTAL b/ext/mime_magic/EXPERIMENTAL new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/ext/mime_magic/EXPERIMENTAL diff --git a/ext/mime_magic/config.m4 b/ext/mime_magic/config.m4 new file mode 100644 index 0000000000..21e1c88a1e --- /dev/null +++ b/ext/mime_magic/config.m4 @@ -0,0 +1,11 @@ +dnl $Id$ +dnl config.m4 for extension mime_magic + +PHP_ARG_ENABLE(mime_magic, whether to enable mime_magic support, +[ --enable-mime_magic Enable mime_magic support]) + +if test "$PHP_MIME_MAGIC" != "no"; then + dnl PHP_SUBST(MIME_MAGIC_SHARED_LIBADD) + + PHP_NEW_EXTENSION(mime_magic, mime_magic.c, $ext_shared) +fi diff --git a/ext/mime_magic/mime_magic.c b/ext/mime_magic/mime_magic.c new file mode 100644 index 0000000000..07af88ec07 --- /dev/null +++ b/ext/mime_magic/mime_magic.c @@ -0,0 +1,2120 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 4 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2002 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 2.02 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available at through the world-wide-web at | + | http://www.php.net/license/2_02.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: Hartmut Holzgraefe | + +----------------------------------------------------------------------+ + + $Id$ + + This module contains a lot of stuff taken from Apache mod_mime_magic, + so the lincense section is a little bit longer than usual: + + ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2000 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + * + * Portions of this software are based upon public domain software + * originally written at the National Center for Supercomputing Applications, + * University of Illinois, Urbana-Champaign. + */ + +/* + * mod_mime_magic: MIME type lookup via file magic numbers + * Copyright (c) 1996-1997 Cisco Systems, Inc. + * + * This software was submitted by Cisco Systems to the Apache Group in July + * 1997. Future revisions and derivatives of this source code must + * acknowledge Cisco Systems as the original contributor of this module. + * All other licensing and usage conditions are those of the Apache Group. + * + * Some of this code is derived from the free version of the file command + * originally posted to comp.sources.unix. Copyright info for that program + * is included below as required. + * --------------------------------------------------------------------------- + * - Copyright (c) Ian F. Darwin, 1987. Written by Ian F. Darwin. + * + * This software is not subject to any license of the American Telephone and + * Telegraph Company or of the Regents of the University of California. + * + * Permission is granted to anyone to use this software for any purpose on any + * computer system, and to alter it and redistribute it freely, subject to + * the following restrictions: + * + * 1. The author is not responsible for the consequences of use of this + * software, no matter how awful, even if they arise from flaws in it. + * + * 2. The origin of this software must not be misrepresented, either by + * explicit claim or by omission. Since few users ever read sources, credits + * must appear in the documentation. + * + * 3. Altered versions must be plainly marked as such, and must not be + * misrepresented as being the original software. Since few users ever read + * sources, credits must appear in the documentation. + * + * 4. This notice may not be removed or altered. + * ------------------------------------------------------------------------- + * + * For compliance with Mr Darwin's terms: this has been very significantly + * modified from the free "file" command. + * - all-in-one file for compilation convenience when moving from one + * version of Apache to the next. + * - Memory allocation is done through the Apache API's pool structure. + * - All functions have had necessary Apache API request or server + * structures passed to them where necessary to call other Apache API + * routines. (i.e. usually for logging, files, or memory allocation in + * itself or a called function.) + * - struct magic has been converted from an array to a single-ended linked + * list because it only grows one record at a time, it's only accessed + * sequentially, and the Apache API has no equivalent of realloc(). + * - Functions have been changed to get their parameters from the server + * configuration instead of globals. (It should be reentrant now but has + * not been tested in a threaded environment.) + * - Places where it used to print results to stdout now saves them in a + * list where they're used to set the MIME type in the Apache request + * record. + * - Command-line flags have been removed since they will never be used here. + * + * Ian Kluft <ikluft@cisco.com> + * Engineering Information Framework + * Central Engineering + * Cisco Systems, Inc. + * San Jose, CA, USA + * + * Initial installation July/August 1996 + * Misc bug fixes May 1997 + * Submission to Apache Group July 1997 + * + */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "php.h" +#include "php_ini.h" +#include "ext/standard/info.h" +#include "php_mime_magic.h" + +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#define MODNAME "mime_magic" + +ZEND_DECLARE_MODULE_GLOBALS(mime_magic) + + +/* True global resources - no need for thread safety here */ +/* static int le_mime_magic; */ +static magic_server_config_rec mime_global; + +/* {{{ mime_magic_functions[] + * + * Every user visible function must have an entry in mime_magic_functions[]. + */ +function_entry mime_magic_functions[] = { + PHP_FE(mime_content_type, NULL) /* For testing, remove later. */ + {NULL, NULL, NULL} /* Must be the last line in mime_magic_functions[] */ +}; +/* }}} */ + +/* {{{ mime_magic_module_entry + */ +zend_module_entry mime_magic_module_entry = { +#if ZEND_MODULE_API_NO >= 20010901 + STANDARD_MODULE_HEADER, +#endif + "mime_magic", + mime_magic_functions, + PHP_MINIT(mime_magic), + PHP_MSHUTDOWN(mime_magic), + PHP_RINIT(mime_magic), /* Replace with NULL if there's nothing to do at request start */ + PHP_RSHUTDOWN(mime_magic), /* Replace with NULL if there's nothing to do at request end */ + PHP_MINFO(mime_magic), +#if ZEND_MODULE_API_NO >= 20010901 + "0.1", /* Replace with version number for your extension */ +#endif + STANDARD_MODULE_PROPERTIES +}; +/* }}} */ + +#ifdef COMPILE_DL_MIME_MAGIC +ZEND_GET_MODULE(mime_magic) +#endif + +/* {{{ PHP_INI + */ +PHP_INI_BEGIN() + STD_PHP_INI_ENTRY("mime_magic.magicfile", "/usr/share/misc/magic.mime", PHP_INI_SYSTEM, OnUpdateString, magicfile, zend_mime_magic_globals, mime_magic_globals) +PHP_INI_END() +/* }}} */ + +/* {{{ php_mime_magic_init_globals + */ +static void php_mime_magic_init_globals(zend_mime_magic_globals *mime_magic_globals) +{ + mime_magic_globals->magicfile = NULL; +} +/* }}} */ + +/* {{{ PHP_MINIT_FUNCTION + */ +PHP_MINIT_FUNCTION(mime_magic) +{ + ZEND_INIT_MODULE_GLOBALS(mime_magic, php_mime_magic_init_globals, NULL); + REGISTER_INI_ENTRIES(); + + if(mime_magic_globals.magicfile) { + mime_global.magicfile = mime_magic_globals.magicfile; + apprentice(); + } + + return SUCCESS; +} +/* }}} */ + +/* {{{ PHP_MSHUTDOWN_FUNCTION + */ +PHP_MSHUTDOWN_FUNCTION(mime_magic) +{ + UNREGISTER_INI_ENTRIES(); + return SUCCESS; +} +/* }}} */ + +/* Remove if there's nothing to do at request start */ +/* {{{ PHP_RINIT_FUNCTION + */ +PHP_RINIT_FUNCTION(mime_magic) +{ + return SUCCESS; +} +/* }}} */ + +/* Remove if there's nothing to do at request end */ +/* {{{ PHP_RSHUTDOWN_FUNCTION + */ +PHP_RSHUTDOWN_FUNCTION(mime_magic) +{ + return SUCCESS; +} +/* }}} */ + +/* {{{ PHP_MINFO_FUNCTION + */ +PHP_MINFO_FUNCTION(mime_magic) +{ + php_info_print_table_start(); + php_info_print_table_header(2, "mime_magic support", "enabled"); + php_info_print_table_end(); + + /* Remove comments if you have entries in php.ini + DISPLAY_INI_ENTRIES(); + */ +} +/* }}} */ + + +/* Remove the following function when you have succesfully modified config.m4 + so that your module can be compiled into PHP, it exists only for testing + purposes. */ + +/* {{{ proto string mime_content_type(string filename) + Return content-type for file */ +PHP_FUNCTION(mime_content_type) +{ + char *filename = NULL; + int filename_len; + magic_server_config_rec *conf = &mime_global; + char *content_type=NULL, *content_encoding=NULL; + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &filename, &filename_len) == FAILURE) { + return; + } + + if(!conf->magic) { + php_error(E_WARNING, MODNAME " not initialized"); + RETURN_FALSE; + } + + magic_set_config(); + + if(OK != magic_process(filename)) { + RETVAL_FALSE; + } else if(OK != magic_rsl_get(&content_type, &content_encoding)) { + RETVAL_FALSE; + } else { + RETVAL_STRING(content_type, 1); + } + + if(content_type) efree(content_type); + if(content_encoding) efree(content_encoding); + + magic_free_config(MIME_MAGIC_G(req_dat)); +} +/* }}} */ + +#define EATAB {while (isspace((unsigned char) *l)) ++l;} + +/* + * apprentice - load configuration from the magic file r + * API request record + */ +static int apprentice(void) +{ + FILE *f; + char line[BUFSIZ + 1]; + int errs = 0; + int lineno; +#if MIME_MAGIC_DEBUG + int rule = 0; + struct magic *m, *prevm; +#endif + char *fname; + magic_server_config_rec *conf = &mime_global; + + fname = conf->magicfile; // todo cwd? + f = fopen(fname, "r"); + if (f == NULL) { + php_error(E_WARNING, + MODNAME ": can't read magic file %s", fname); + return -1; + } + + /* set up the magic list (empty) */ + conf->magic = conf->last = NULL; + + /* parse it */ + for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++) { + int ws_offset; + + /* delete newline */ + if (line[0]) { + line[strlen(line) - 1] = '\0'; + } + + /* skip leading whitespace */ + ws_offset = 0; + while (line[ws_offset] && isspace(line[ws_offset])) { + ws_offset++; + } + + /* skip blank lines */ + if (line[ws_offset] == 0) { + continue; + } + + /* comment, do not parse */ + if (line[ws_offset] == '#') + continue; + +#if MIME_MAGIC_DEBUG + /* if we get here, we're going to use it so count it */ + rule++; +#endif + + /* parse it */ + if (parse(line + ws_offset, lineno) != 0) + ++errs; + } + + (void) fclose(f); + +#if MIME_MAGIC_DEBUG + php_error(E_NOTICE, + MODNAME ": apprentice conf=%x file=%s m=%s m->next=%s last=%s", + conf, + conf->magicfile ? conf->magicfile : "NULL", + conf->magic ? "set" : "NULL", + (conf->magic && conf->magic->next) ? "set" : "NULL", + conf->last ? "set" : "NULL"); + php_error(E_NOTICE, + MODNAME ": apprentice read %d lines, %d rules, %d errors", + lineno, rule, errs); +#endif + +#if MIME_MAGIC_DEBUG + prevm = 0; + php_error(E_NOTICE, + MODNAME ": apprentice test"); + for (m = conf->magic; m; m = m->next) { + if (isprint((((unsigned long) m) >> 24) & 255) && + isprint((((unsigned long) m) >> 16) & 255) && + isprint((((unsigned long) m) >> 8) & 255) && + isprint(((unsigned long) m) & 255)) { + php_error(E_NOTICE, + MODNAME ": apprentice: POINTER CLOBBERED! " + "m=\"%c%c%c%c\" line=%d", + (((unsigned long) m) >> 24) & 255, + (((unsigned long) m) >> 16) & 255, + (((unsigned long) m) >> 8) & 255, + ((unsigned long) m) & 255, + prevm ? prevm->lineno : -1); + break; + } + prevm = m; + } +#endif + + return (errs ? -1 : 0); +} + +/* + * extend the sign bit if the comparison is to be signed + */ +static unsigned long signextend(struct magic *m, unsigned long v) +{ + if (!(m->flag & UNSIGNED)) + switch (m->type) { + /* + * Do not remove the casts below. They are vital. When later + * compared with the data, the sign extension must have happened. + */ + case BYTE: + v = (char) v; + break; + case SHORT: + case BESHORT: + case LESHORT: + v = (short) v; + break; + case DATE: + case BEDATE: + case LEDATE: + case LONG: + case BELONG: + case LELONG: + v = (long) v; + break; + case STRING: + break; + default: + php_error(E_WARNING, + MODNAME ": can't happen: m->type=%d", m->type); + return -1; + } + return v; +} + +/* + * parse one line from magic file, put into magic[index++] if valid + */ +static int parse(char *l, int lineno) +{ + struct magic *m; + char *t, *s; + magic_server_config_rec *conf = &mime_global; + + /* allocate magic structure entry */ + m = (struct magic *) calloc(1, sizeof(struct magic)); + + /* append to linked list */ + m->next = NULL; + if (!conf->magic || !conf->last) { + conf->magic = conf->last = m; + } + else { + conf->last->next = m; + conf->last = m; + } + + /* set values in magic structure */ + m->flag = 0; + m->cont_level = 0; + m->lineno = lineno; + + while (*l == '>') { + ++l; /* step over */ + m->cont_level++; + } + + if (m->cont_level != 0 && *l == '(') { + ++l; /* step over */ + m->flag |= INDIR; + } + + /* get offset, then skip over it */ + m->offset = (int) strtol(l, &t, 0); + if (l == t) { + php_error(E_WARNING, + MODNAME ": offset %s invalid", l); + } + l = t; + + if (m->flag & INDIR) { + m->in.type = LONG; + m->in.offset = 0; + /* + * read [.lbs][+-]nnnnn) + */ + if (*l == '.') { + switch (*++l) { + case 'l': + m->in.type = LONG; + break; + case 's': + m->in.type = SHORT; + break; + case 'b': + m->in.type = BYTE; + break; + default: + php_error(E_WARNING, + MODNAME ": indirect offset type %c invalid", *l); + break; + } + l++; + } + s = l; + if (*l == '+' || *l == '-') + l++; + if (isdigit((unsigned char) *l)) { + m->in.offset = strtol(l, &t, 0); + if (*s == '-') + m->in.offset = -m->in.offset; + } + else + t = l; + if (*t++ != ')') { + php_error(E_WARNING, + MODNAME ": missing ')' in indirect offset"); + } + l = t; + } + + + while (isdigit((unsigned char) *l)) + ++l; + EATAB; + +#define NBYTE 4 +#define NSHORT 5 +#define NLONG 4 +#define NSTRING 6 +#define NDATE 4 +#define NBESHORT 7 +#define NBELONG 6 +#define NBEDATE 6 +#define NLESHORT 7 +#define NLELONG 6 +#define NLEDATE 6 + + if (*l == 'u') { + ++l; + m->flag |= UNSIGNED; + } + + /* get type, skip it */ + if (strncmp(l, "byte", NBYTE) == 0) { + m->type = BYTE; + l += NBYTE; + } + else if (strncmp(l, "short", NSHORT) == 0) { + m->type = SHORT; + l += NSHORT; + } + else if (strncmp(l, "long", NLONG) == 0) { + m->type = LONG; + l += NLONG; + } + else if (strncmp(l, "string", NSTRING) == 0) { + m->type = STRING; + l += NSTRING; + } + else if (strncmp(l, "date", NDATE) == 0) { + m->type = DATE; + l += NDATE; + } + else if (strncmp(l, "beshort", NBESHORT) == 0) { + m->type = BESHORT; + l += NBESHORT; + } + else if (strncmp(l, "belong", NBELONG) == 0) { + m->type = BELONG; + l += NBELONG; + } + else if (strncmp(l, "bedate", NBEDATE) == 0) { + m->type = BEDATE; + l += NBEDATE; + } + else if (strncmp(l, "leshort", NLESHORT) == 0) { + m->type = LESHORT; + l += NLESHORT; + } + else if (strncmp(l, "lelong", NLELONG) == 0) { + m->type = LELONG; + l += NLELONG; + } + else if (strncmp(l, "ledate", NLEDATE) == 0) { + m->type = LEDATE; + l += NLEDATE; + } + else { + php_error(E_WARNING, + MODNAME ": type %s invalid", l); + return -1; + } + /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ + if (*l == '&') { + ++l; + m->mask = signextend(m, strtol(l, &l, 0)); + } + else + m->mask = ~0L; + EATAB; + + switch (*l) { + case '>': + case '<': + /* Old-style anding: "0 byte &0x80 dynamically linked" */ + case '&': + case '^': + case '=': + m->reln = *l; + ++l; + break; + case '!': + if (m->type != STRING) { + m->reln = *l; + ++l; + break; + } + /* FALL THROUGH */ + default: + if (*l == 'x' && isspace((unsigned char) l[1])) { + m->reln = *l; + ++l; + goto GetDesc; /* Bill The Cat */ + } + m->reln = '='; + break; + } + EATAB; + + if (getvalue(m, &l)) + return -1; + /* + * now get last part - the description + */ + GetDesc: + EATAB; + if (l[0] == '\b') { + ++l; + m->nospflag = 1; + } + else if ((l[0] == '\\') && (l[1] == 'b')) { + ++l; + ++l; + m->nospflag = 1; + } + else + m->nospflag = 0; + strncpy(m->desc, l, sizeof(m->desc) - 1); + m->desc[sizeof(m->desc) - 1] = '\0'; + +#if MIME_MAGIC_DEBUG + php_error(E_NOTICE, + MODNAME ": parse line=%d m=%x next=%x cont=%d desc=%s", + lineno, m, m->next, m->cont_level, m->desc); +#endif /* MIME_MAGIC_DEBUG */ + + return 0; +} + +/* + * Read a numeric value from a pointer, into the value union of a magic + * pointer, according to the magic type. Update the string pointer to point + * just after the number read. Return 0 for success, non-zero for failure. + */ +static int getvalue(struct magic *m, char **p) +{ + int slen; + + if (m->type == STRING) { + *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen); + m->vallen = slen; + } + else if (m->reln != 'x') + m->value.l = signextend(m, strtol(*p, p, 0)); + return 0; +} + +/* + * Convert a string containing C character escapes. Stop at an unescaped + * space or tab. Copy the converted version to "p", returning its length in + * *slen. Return updated scan pointer as function result. + */ +static char *getstr(register char *s, register char *p, + int plen, int *slen) +{ + char *origs = s, *origp = p; + char *pmax = p + plen - 1; + register int c; + register int val; + + while ((c = *s++) != '\0') { + if (isspace((unsigned char) c)) + break; + if (p >= pmax) { + php_error(E_WARNING, + MODNAME ": string too long: %s", origs); + break; + } + if (c == '\\') { + switch (c = *s++) { + + case '\0': + goto out; + + default: + *p++ = (char) c; + break; + + case 'n': + *p++ = '\n'; + break; + + case 'r': + *p++ = '\r'; + break; + + case 'b': + *p++ = '\b'; + break; + + case 't': + *p++ = '\t'; + break; + + case 'f': + *p++ = '\f'; + break; + + case 'v': + *p++ = '\v'; + break; + + /* \ and up to 3 octal digits */ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + val = c - '0'; + c = *s++; /* try for 2 */ + if (c >= '0' && c <= '7') { + val = (val << 3) | (c - '0'); + c = *s++; /* try for 3 */ + if (c >= '0' && c <= '7') + val = (val << 3) | (c - '0'); + else + --s; + } + else + --s; + *p++ = (char) val; + break; + + /* \x and up to 3 hex digits */ + case 'x': + val = 'x'; /* Default if no digits */ + c = hextoint(*s++); /* Get next char */ + if (c >= 0) { + val = c; + c = hextoint(*s++); + if (c >= 0) { + val = (val << 4) + c; + c = hextoint(*s++); + if (c >= 0) { + val = (val << 4) + c; + } + else + --s; + } + else + --s; + } + else + --s; + *p++ = (char) val; + break; + } + } + else + *p++ = (char) c; + } + out: + *p = '\0'; + *slen = p - origp; + return s; +} + +/* Single hex char to int; -1 if not a hex char. */ +static int hextoint(int c) +{ + if (isdigit((unsigned char) c)) + return c - '0'; + if ((c >= 'a') && (c <= 'f')) + return c + 10 - 'a'; + if ((c >= 'A') && (c <= 'F')) + return c + 10 - 'A'; + return -1; +} + + +/* + * RSL (result string list) processing routines + * + * These collect strings that would have been printed in fragments by file(1) + * into a list of magic_rsl structures with the strings. When complete, + * they're concatenated together to become the MIME content and encoding + * types. + * + * return value conventions for these functions: functions which return int: + * failure = -1, other = result functions which return pointers: failure = 0, + * other = result + */ + +/* allocate a per-request structure and put it in the request record */ +static magic_req_rec *magic_set_config(void) +{ + magic_req_rec *req_dat = (magic_req_rec *) emalloc(sizeof(magic_req_rec)); + + req_dat->head = req_dat->tail = (magic_rsl *) NULL; + MIME_MAGIC_G(req_dat) = req_dat; + return req_dat; +} + +static void magic_free_config(magic_req_rec *req_dat) { + magic_rsl *curr, *next; + + if(!req_dat) return; + + curr = req_dat->head; + while(curr) { + next = curr->next; + efree(curr->str); + efree(curr); + curr = next; + } + + efree(req_dat); +} + +/* add a string to the result string list for this request */ +/* it is the responsibility of the caller to allocate "str" */ +static int magic_rsl_add(char *str) +{ + magic_req_rec *req_dat = MIME_MAGIC_G(req_dat); + magic_rsl *rsl; + + /* make sure we have a list to put it in */ + if (!req_dat) { + php_error(E_WARNING, + MODNAME ": request config should not be NULL"); + if (!(req_dat = magic_set_config())) { + /* failure */ + return -1; + } + } + + /* allocate the list entry */ + rsl = (magic_rsl *) emalloc(sizeof(magic_rsl)); + + /* fill it */ + rsl->str = estrdup(str); + rsl->next = (magic_rsl *) NULL; + + /* append to the list */ + if (req_dat->head && req_dat->tail) { + req_dat->tail->next = rsl; + req_dat->tail = rsl; + } + else { + req_dat->head = req_dat->tail = rsl; + } + + /* success */ + return 0; +} + +/* RSL hook for puts-type functions */ +static int magic_rsl_puts(char *str) +{ + return magic_rsl_add(str); +} + +/* RSL hook for printf-type functions */ +static int magic_rsl_printf(char *str,...) +{ + va_list ap; + + char buf[MAXMIMESTRING]; + + /* assemble the string into the buffer */ + va_start(ap, str); + vsnprintf(buf, sizeof(buf), str, ap); + va_end(ap); + + /* add the buffer to the list */ + return magic_rsl_add(buf); +} + +/* RSL hook for putchar-type functions */ +static int magic_rsl_putchar(char c) +{ + char str[2]; + + /* high overhead for 1 char - just hope they don't do this much */ + str[0] = c; + str[1] = '\0'; + return magic_rsl_add(str); +} + +/* allocate and copy a contiguous string from a result string list */ +static char *rsl_strdup(int start_frag, int start_pos, int len) +{ + char *result; /* return value */ + int cur_frag, /* current fragment number/counter */ + cur_pos, /* current position within fragment */ + res_pos; /* position in result string */ + magic_rsl *frag; /* list-traversal pointer */ + magic_req_rec *req_dat = MIME_MAGIC_G(req_dat); + + /* allocate the result string */ + result = (char *) emalloc(len + 1); + + /* loop through and collect the string */ + res_pos = 0; + for (frag = req_dat->head, cur_frag = 0; + frag->next; + frag = frag->next, cur_frag++) { + /* loop to the first fragment */ + if (cur_frag < start_frag) + continue; + + /* loop through and collect chars */ + for (cur_pos = (cur_frag == start_frag) ? start_pos : 0; + frag->str[cur_pos]; + cur_pos++) { + if (cur_frag >= start_frag + && cur_pos >= start_pos + && res_pos <= len) { + result[res_pos++] = frag->str[cur_pos]; + if (res_pos > len) { + break; + } + } + } + } + + /* clean up and return */ + result[res_pos] = 0; +#if MIME_MAGIC_DEBUG + php_error(E_NOTICE, + MODNAME ": rsl_strdup() %d chars: %s", res_pos - 1, result); +#endif + return result; +} + +/* + * magic_process - process input file r Apache API request record + * (formerly called "process" in file command, prefix added for clarity) Opens + * the file and reads a fixed-size buffer to begin processing the contents. + */ +static int magic_process(char *filename) +{ + int fd = 0; + unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */ + int nbytes = 0; /* number of bytes read from a datafile */ + int result; + + /* + * first try judging the file based on its filesystem status + */ + switch ((result = fsmagic(filename))) { + case DONE: + magic_rsl_putchar('\n'); + return OK; + case OK: + break; + default: + /* fatal error, bail out */ + return result; + } + + if ((fd = open(filename, O_RDONLY, 0)) < 0) { + /* We can't open it, but we were able to stat it. */ + php_error(E_WARNING, + MODNAME ": can't read `%s'", filename); + /* let some other handler decide what the problem is */ + return DECLINED; + } + + /* + * try looking at the first HOWMANY bytes + */ + if ((nbytes = read(fd, (char *) buf, sizeof(buf) - 1)) == -1) { + php_error(E_WARNING, + MODNAME ": read failed: %s", filename); + return ERROR; + } + + if (nbytes == 0) + magic_rsl_puts(MIME_TEXT_UNKNOWN); + else { + buf[nbytes++] = '\0'; /* null-terminate it */ + tryit(buf, nbytes, 1); + } + + (void) close(fd); + (void) magic_rsl_putchar('\n'); + + return OK; +} + +static void tryit(unsigned char *buf, int nb, int checkzmagic) +{ + /* + * Try compression stuff + */ +#if 0 /* TODO */ + if (checkzmagic == 1) { + if (zmagic(buf, nb) == 1) + return; + } +#endif + + /* + * try tests in /etc/magic (or surrogate magic file) + */ + if (softmagic(buf, nb) == 1) + return; + + /* + * try known keywords, check for ascii-ness too. + */ + if (ascmagic(buf, nb) == 1) + return; + + /* + * abandon hope, all ye who remain here + */ + magic_rsl_puts(MIME_BINARY_UNKNOWN); +} + + +/* + * return DONE to indicate it's been handled + * return OK to indicate it's a regular file still needing handling + * other returns indicate a failure of some sort + */ +static int fsmagic(const char *fn) +{ + struct stat finfo; + + stat(fn, &finfo); + + switch (finfo.st_mode & S_IFMT) { + case S_IFDIR: + magic_rsl_puts(DIR_MAGIC_TYPE); + return DONE; + case S_IFCHR: + /* + * (void) magic_rsl_printf(r,"character special (%d/%d)", + * major(sb->st_rdev), minor(sb->st_rdev)); + */ + (void) magic_rsl_puts(MIME_BINARY_UNKNOWN); + return DONE; +#ifdef S_IFBLK + case S_IFBLK: + /* + * (void) magic_rsl_printf(r,"block special (%d/%d)", + * major(sb->st_rdev), minor(sb->st_rdev)); + */ + (void) magic_rsl_puts(MIME_BINARY_UNKNOWN); + return DONE; + /* TODO add code to handle V7 MUX and Blit MUX files */ +#endif +#ifdef S_IFIFO + case S_IFIFO: + /* + * magic_rsl_puts(r,"fifo (named pipe)"); + */ + (void) magic_rsl_puts(MIME_BINARY_UNKNOWN); + return DONE; +#endif +#ifdef S_IFLNK + case S_IFLNK: + /* We used stat(), the only possible reason for this is that the + * symlink is broken. + */ + php_error(E_WARNING, + MODNAME ": broken symlink (%s)", fn); + return ERROR; +#endif +#ifdef S_IFSOCK +#ifndef __COHERENT__ + case S_IFSOCK: + magic_rsl_puts(MIME_BINARY_UNKNOWN); + return DONE; +#endif +#endif + case S_IFREG: + break; + default: + php_error(E_WARNING, + MODNAME ": invalid mode 0%o.", (unsigned int)finfo.st_mode); + return ERROR; + } + + /* + * regular file, check next possibility + */ + if (finfo.st_size == 0) { + magic_rsl_puts(MIME_TEXT_UNKNOWN); + return DONE; + } + return OK; +} + +/* + * softmagic - lookup one file in database (already read from /etc/magic by + * apprentice.c). Passed the name and FILE * of one file to be typed. + */ +/* ARGSUSED1 *//* nbytes passed for regularity, maybe need later */ +static int softmagic(unsigned char *buf, int nbytes) +{ + if (match(buf, nbytes)) + return 1; + + return 0; +} + +/* + * Go through the whole list, stopping if you find a match. Process all the + * continuations of that match before returning. + * + * We support multi-level continuations: + * + * At any time when processing a successful top-level match, there is a current + * continuation level; it represents the level of the last successfully + * matched continuation. + * + * Continuations above that level are skipped as, if we see one, it means that + * the continuation that controls them - i.e, the lower-level continuation + * preceding them - failed to match. + * + * Continuations below that level are processed as, if we see one, it means + * we've finished processing or skipping higher-level continuations under the + * control of a successful or unsuccessful lower-level continuation, and are + * now seeing the next lower-level continuation and should process it. The + * current continuation level reverts to the level of the one we're seeing. + * + * Continuations at the current level are processed as, if we see one, there's + * no lower-level continuation that may have failed. + * + * If a continuation matches, we bump the current continuation level so that + * higher-level continuations are processed. + */ +static int match(unsigned char *s, int nbytes) +{ +#if MIME_MAGIC_DEBUG + int rule_counter = 0; +#endif + int cont_level = 0; + int need_separator = 0; + union VALUETYPE p; + magic_server_config_rec *conf = &mime_global; + struct magic *m; + +#if MIME_MAGIC_DEBUG + php_error(E_NOTICE, + MODNAME ": match conf=%x file=%s m=%s m->next=%s last=%s", + conf, + conf->magicfile ? conf->magicfile : "NULL", + conf->magic ? "set" : "NULL", + (conf->magic && conf->magic->next) ? "set" : "NULL", + conf->last ? "set" : "NULL"); +#endif + +#if MIME_MAGIC_DEBUG + for (m = conf->magic; m; m = m->next) { + if (isprint((((unsigned long) m) >> 24) & 255) && + isprint((((unsigned long) m) >> 16) & 255) && + isprint((((unsigned long) m) >> 8) & 255) && + isprint(((unsigned long) m) & 255)) { + php_error(E_NOTICE, + MODNAME ": match: POINTER CLOBBERED! " + "m=\"%c%c%c%c\"", + (((unsigned long) m) >> 24) & 255, + (((unsigned long) m) >> 16) & 255, + (((unsigned long) m) >> 8) & 255, + ((unsigned long) m) & 255); + break; + } + } +#endif + + for (m = conf->magic; m; m = m->next) { +#if MIME_MAGIC_DEBUG + rule_counter++; + php_error(E_NOTICE, + MODNAME ": line=%d desc=%s", m->lineno, m->desc); +#endif + + /* check if main entry matches */ + if (!mget(&p, s, m, nbytes) || + !mcheck(&p, m)) { + struct magic *m_cont; + + /* + * main entry didn't match, flush its continuations + */ + if (!m->next || (m->next->cont_level == 0)) { + continue; + } + + m_cont = m->next; + while (m_cont && (m_cont->cont_level != 0)) { +#if MIME_MAGIC_DEBUG + rule_counter++; + php_error(E_NOTICE, + MODNAME ": line=%d mc=%x mc->next=%x cont=%d desc=%s", + m_cont->lineno, m_cont, + m_cont->next, m_cont->cont_level, + m_cont->desc); +#endif + /* + * this trick allows us to keep *m in sync when the continue + * advances the pointer + */ + m = m_cont; + m_cont = m_cont->next; + } + continue; + } + + /* if we get here, the main entry rule was a match */ + /* this will be the last run through the loop */ +#if MIME_MAGIC_DEBUG + php_error(E_NOTICE, + MODNAME ": rule matched, line=%d type=%d %s", + m->lineno, m->type, + (m->type == STRING) ? m->value.s : ""); +#endif + + /* print the match */ + mprint(&p, m); + + /* + * If we printed something, we'll need to print a blank before we + * print something else. + */ + if (m->desc[0]) + need_separator = 1; + /* and any continuations that match */ + cont_level++; + /* + * while (m && m->next && m->next->cont_level != 0 && ( m = m->next + * )) + */ + m = m->next; + while (m && (m->cont_level != 0)) { +#if MIME_MAGIC_DEBUG + php_error(E_NOTICE, + MODNAME ": match line=%d cont=%d type=%d %s", + m->lineno, m->cont_level, m->type, + (m->type == STRING) ? m->value.s : ""); +#endif + if (cont_level >= m->cont_level) { + if (cont_level > m->cont_level) { + /* + * We're at the end of the level "cont_level" + * continuations. + */ + cont_level = m->cont_level; + } + if (mget(&p, s, m, nbytes) && + mcheck(&p, m)) { + /* + * This continuation matched. Print its message, with a + * blank before it if the previous item printed and this + * item isn't empty. + */ + /* space if previous printed */ + if (need_separator + && (m->nospflag == 0) + && (m->desc[0] != '\0') + ) { + (void) magic_rsl_putchar(' '); + need_separator = 0; + } + mprint(&p, m); + if (m->desc[0]) + need_separator = 1; + + /* + * If we see any continuations at a higher level, process + * them. + */ + cont_level++; + } + } + + /* move to next continuation record */ + m = m->next; + } +#if MIME_MAGIC_DEBUG + php_error(E_NOTICE, + MODNAME ": matched after %d rules", rule_counter); +#endif + return 1; /* all through */ + } +#if MIME_MAGIC_DEBUG + php_error(E_NOTICE, + MODNAME ": failed after %d rules", rule_counter); +#endif + return 0; /* no match at all */ +} + +/* an optimization over plain strcmp() */ +#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0) + +static int ascmagic(unsigned char *buf, int nbytes) +{ + int has_escapes = 0; + unsigned char *s; + char nbuf[HOWMANY + 1]; /* one extra for terminating '\0' */ + char *token; + register struct names *p; + int small_nbytes; + + /* these are easy, do them first */ + + /* + * for troff, look for . + letter + letter or .\"; this must be done to + * disambiguate tar archives' ./file and other trash from real troff + * input. + */ + if (*buf == '.') { + unsigned char *tp = buf + 1; + + while (isspace(*tp)) + ++tp; /* skip leading whitespace */ + if ((isalnum(*tp) || *tp == '\\') && + (isalnum(*(tp + 1)) || *tp == '"')) { + magic_rsl_puts("application/x-troff"); + return 1; + } + } + if ((*buf == 'c' || *buf == 'C') && isspace(*(buf + 1))) { + /* Fortran */ + magic_rsl_puts("text/plain"); + return 1; + } + + /* look for tokens from names.h - this is expensive!, so we'll limit + * ourselves to only SMALL_HOWMANY bytes */ + small_nbytes = (nbytes > SMALL_HOWMANY) ? SMALL_HOWMANY : nbytes; + /* make a copy of the buffer here because strtok() will destroy it */ + s = (unsigned char *) memcpy(nbuf, buf, small_nbytes); + s[small_nbytes] = '\0'; + has_escapes = (memchr(s, '\033', small_nbytes) != NULL); + /* XXX: not multithread safe */ + while ((token = strtok((char *) s, " \t\n\r\f")) != NULL) { + s = NULL; /* make strtok() keep on tokin' */ + for (p = names; p < names + NNAMES; p++) { + if (STREQ(p->name, token)) { + magic_rsl_puts(types[p->type]); + if (has_escapes) + magic_rsl_puts(" (with escape sequences)"); + return 1; + } + } + } + + switch (is_tar(buf, nbytes)) { + case 1: + /* V7 tar archive */ + magic_rsl_puts("application/x-tar"); + return 1; + case 2: + /* POSIX tar archive */ + magic_rsl_puts("application/x-tar"); + return 1; + } + + /* all else fails, but it is ascii... */ + if (has_escapes) { + /* text with escape sequences */ + /* we leave this open for further differentiation later */ + magic_rsl_puts("text/plain"); + } + else { + /* plain text */ + magic_rsl_puts("text/plain"); + } + return 1; +} + +/* + * is_tar() -- figure out whether file is a tar archive. + * + * Stolen (by author of file utility) from the public domain tar program: Public + * Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu). + * + * @(#)list.c 1.18 9/23/86 Public Domain - gnu $Id: mod_mime_magic.c,v 1.7 + * 1997/06/24 00:41:02 ikluft Exp ikluft $ + * + * Comments changed and some code/comments reformatted for file command by Ian + * Darwin. + */ + +#define isodigit(c) ( ((c) >= '0') && ((c) <= '7') ) + +/* + * Return 0 if the checksum is bad (i.e., probably not a tar archive), 1 for + * old UNIX tar file, 2 for Unix Std (POSIX) tar file. + */ + +static int is_tar(unsigned char *buf, int nbytes) +{ + register union record *header = (union record *) buf; + register int i; + register long sum, recsum; + register char *p; + + if (nbytes < sizeof(union record)) + return 0; + + recsum = from_oct(8, header->header.chksum); + + sum = 0; + p = header->charptr; + for (i = sizeof(union record); --i >= 0;) { + /* + * We can't use unsigned char here because of old compilers, e.g. V7. + */ + sum += 0xFF & *p++; + } + + /* Adjust checksum to count the "chksum" field as blanks. */ + for (i = sizeof(header->header.chksum); --i >= 0;) + sum -= 0xFF & header->header.chksum[i]; + sum += ' ' * sizeof header->header.chksum; + + if (sum != recsum) + return 0; /* Not a tar archive */ + + if (0 == strcmp(header->header.magic, TMAGIC)) + return 2; /* Unix Standard tar archive */ + + return 1; /* Old fashioned tar archive */ +} + + +/* + * Quick and dirty octal conversion. + * + * Result is -1 if the field is invalid (all blank, or nonoctal). + */ +static long from_oct(int digs, char *where) +{ + register long value; + + while (isspace(*where)) { /* Skip spaces */ + where++; + if (--digs <= 0) + return -1; /* All blank field */ + } + value = 0; + while (digs > 0 && isodigit(*where)) { /* Scan til nonoctal */ + value = (value << 3) | (*where++ - '0'); + --digs; + } + + if (digs > 0 && *where && !isspace(*where)) + return -1; /* Ended on non-space/nul */ + + return value; +} + +static int mget(union VALUETYPE *p, unsigned char *s, + struct magic *m, int nbytes) +{ + long offset = m->offset; + + if (offset + sizeof(union VALUETYPE) > nbytes) + return 0; + + memcpy(p, s + offset, sizeof(union VALUETYPE)); + + if (!mconvert(p, m)) + return 0; + + if (m->flag & INDIR) { + + switch (m->in.type) { + case BYTE: + offset = p->b + m->in.offset; + break; + case SHORT: + offset = p->h + m->in.offset; + break; + case LONG: + offset = p->l + m->in.offset; + break; + } + + if (offset + sizeof(union VALUETYPE) > nbytes) + return 0; + + memcpy(p, s + offset, sizeof(union VALUETYPE)); + + if (!mconvert(p, m)) + return 0; + } + return 1; +} + +static int mcheck(union VALUETYPE *p, struct magic *m) +{ + register unsigned long l = m->value.l; + register unsigned long v; + int matched; + + if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) { + php_error(E_WARNING, + MODNAME ": BOINK"); + return 1; + } + + switch (m->type) { + case BYTE: + v = p->b; + break; + + case SHORT: + case BESHORT: + case LESHORT: + v = p->h; + break; + + case LONG: + case BELONG: + case LELONG: + case DATE: + case BEDATE: + case LEDATE: + v = p->l; + break; + + case STRING: + l = 0; + /* + * What we want here is: v = strncmp(m->value.s, p->s, m->vallen); + * but ignoring any nulls. bcmp doesn't give -/+/0 and isn't + * universally available anyway. + */ + v = 0; + { + register unsigned char *a = (unsigned char *) m->value.s; + register unsigned char *b = (unsigned char *) p->s; + register int len = m->vallen; + + while (--len >= 0) + if ((v = *b++ - *a++) != 0) + break; + } + break; + default: + /* bogosity, pretend that it just wasn't a match */ + php_error(E_WARNING, + MODNAME ": invalid type %d in mcheck().", m->type); + return 0; + } + + v = signextend(m, v) & m->mask; + + switch (m->reln) { + case 'x': +#if MIME_MAGIC_DEBUG + php_error(E_NOTICE, + "%lu == *any* = 1", v); +#endif + matched = 1; + break; + + case '!': + matched = v != l; +#if MIME_MAGIC_DEBUG + php_error(E_NOTICE, + "%lu != %lu = %d", v, l, matched); +#endif + break; + + case '=': + matched = v == l; +#if MIME_MAGIC_DEBUG + php_error(E_NOTICE, + "%lu == %lu = %d", v, l, matched); +#endif + break; + + case '>': + if (m->flag & UNSIGNED) { + matched = v > l; +#if MIME_MAGIC_DEBUG + php_error(E_NOTICE, + "%lu > %lu = %d", v, l, matched); +#endif + } + else { + matched = (long) v > (long) l; +#if MIME_MAGIC_DEBUG + php_error(E_NOTICE, + "%ld > %ld = %d", v, l, matched); +#endif + } + break; + + case '<': + if (m->flag & UNSIGNED) { + matched = v < l; +#if MIME_MAGIC_DEBUG + php_error(E_NOTICE, + "%lu < %lu = %d", v, l, matched); +#endif + } + else { + matched = (long) v < (long) l; +#if MIME_MAGIC_DEBUG + php_error(E_NOTICE, + "%ld < %ld = %d", v, l, matched); +#endif + } + break; + + case '&': + matched = (v & l) == l; +#if MIME_MAGIC_DEBUG + php_error(E_NOTICE, + "((%lx & %lx) == %lx) = %d", v, l, l, matched); +#endif + break; + + case '^': + matched = (v & l) != l; +#if MIME_MAGIC_DEBUG + php_error(E_NOTICE, + "((%lx & %lx) != %lx) = %d", v, l, l, matched); +#endif + break; + + default: + /* bogosity, pretend it didn't match */ + matched = 0; + php_error(E_WARNING, + MODNAME ": mcheck: can't happen: invalid relation %d.", + m->reln); + break; + } + + return matched; +} + +#if 0 /* TODO */ +/* + * compress routines: zmagic() - returns 0 if not recognized, uncompresses + * and prints information if recognized uncompress(s, method, old, n, newch) + * - uncompress old into new, using method, return sizeof new + */ + +static struct { + char *magic; + int maglen; + char *argv[3]; + int silent; + char *encoding; /* MUST be lowercase */ +} compr[] = { + + /* we use gzip here rather than uncompress because we have to pass + * it a full filename -- and uncompress only considers filenames + * ending with .Z + */ + { + "\037\235", 2, { + "gzip", "-dcq", NULL + }, 0, "x-compress" + }, + { + "\037\213", 2, { + "gzip", "-dcq", NULL + }, 1, "x-gzip" + }, + /* + * XXX pcat does not work, cause I don't know how to make it read stdin, + * so we use gzip + */ + { + "\037\036", 2, { + "gzip", "-dcq", NULL + }, 0, "x-gzip" + }, +}; + +static int ncompr = sizeof(compr) / sizeof(compr[0]); + +static int zmagic(unsigned char *buf, int nbytes) +{ + unsigned char *newbuf; + int newsize; + int i; + + for (i = 0; i < ncompr; i++) { + if (nbytes < compr[i].maglen) + continue; + if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0) + break; + } + + if (i == ncompr) + return 0; + + if ((newsize = uncompress(i, &newbuf, nbytes)) > 0) { + tryit(newbuf, newsize, 0); + + /* set encoding type in the request record */ + /* TODO r->content_encoding = compr[i].encoding; */ + } + return 1; +} + + +struct uncompress_parms { + request_rec *r; + int method; +}; + +static int uncompress_child(void *data, child_info *pinfo) +{ + struct uncompress_parms *parm = data; +#ifndef WIN32 + char *new_argv[4]; + + new_argv[0] = compr[parm->method].argv[0]; + new_argv[1] = compr[parm->method].argv[1]; + new_argv[2] = parm->r->filename; + new_argv[3] = NULL; + + if (compr[parm->method].silent) { + close(STDERR_FILENO); + } + + execvp(compr[parm->method].argv[0], new_argv); + ap_log_rerror(APLOG_MARK, APLOG_ERR, parm->r, + MODNAME ": could not execute `%s'.", + compr[parm->method].argv[0]); + return -1; +#else + char *pCommand; + STARTUPINFO si; + PROCESS_INFORMATION pi; + pid_t pid; + + memset(&si, 0, sizeof(si)); + memset(&pi, 0, sizeof(pi)); + + pid = -1; + + /* + * Look at the arguments... + */ + pCommand = ap_pstrcat(parm->r->pool, compr[parm->method].argv[0], " ", + compr[parm->method].argv[1], " \"", + parm->r->filename, "\"", NULL); + + /* + * Make child process use hPipeOutputWrite as standard out, + * and make sure it does not show on screen. + */ + si.cb = sizeof(si); + si.dwFlags = STARTF_USESTDHANDLES | STARTF_USESHOWWINDOW; + si.wShowWindow = SW_HIDE; + si.hStdInput = pinfo->hPipeInputRead; + si.hStdOutput = pinfo->hPipeOutputWrite; + si.hStdError = pinfo->hPipeErrorWrite; + + if (CreateProcess(NULL, pCommand, NULL, NULL, TRUE, 0, NULL, + ap_make_dirstr_parent(parm->r->pool, parm->r->filename), + &si, &pi)) { + pid = pi.dwProcessId; + /* + * We must close the handles to the new process and its main thread + * to prevent handle and memory leaks. + */ + CloseHandle(pi.hProcess); + CloseHandle(pi.hThread); + } + return (pid); +#endif +} + +static int uncompress(int method, unsigned char **newch, int n) +{ + struct uncompress_parms parm; + BUFF *bout; + pool *sub_pool; + + parm.r = r; + parm.method = method; + + /* We make a sub_pool so that we can collect our child early, otherwise + * there are cases (i.e. generating directory indicies with mod_autoindex) + * where we would end up with LOTS of zombies. + */ + sub_pool = ap_make_sub_pool(r->pool); + + if (!ap_bspawn_child(sub_pool, uncompress_child, &parm, kill_always, + NULL, &bout, NULL)) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, r, + MODNAME ": couldn't spawn uncompress process: %s", r->uri); + return -1; + } + + *newch = (unsigned char *) emalloc(n); + if ((n = ap_bread(bout, *newch, n)) <= 0) { + ap_destroy_pool(sub_pool); + ap_log_rerror(APLOG_MARK, APLOG_ERR, r, + MODNAME ": read failed %s", r->filename); + return -1; + } + ap_destroy_pool(sub_pool); + return n; +} +#endif + +static void mprint(union VALUETYPE *p, struct magic *m) +{ + char *pp, *rt; + unsigned long v; + + switch (m->type) { + case BYTE: + v = p->b; + break; + + case SHORT: + case BESHORT: + case LESHORT: + v = p->h; + break; + + case LONG: + case BELONG: + case LELONG: + v = p->l; + break; + + case STRING: + if (m->reln == '=') { + (void) magic_rsl_printf(m->desc, m->value.s); + } + else { + (void) magic_rsl_printf(m->desc, p->s); + } + return; + + case DATE: + case BEDATE: + case LEDATE: + /* XXX: not multithread safe */ + pp = ctime((time_t *) & p->l); + if ((rt = strchr(pp, '\n')) != NULL) + *rt = '\0'; + (void) magic_rsl_printf(m->desc, pp); + return; + default: + php_error(E_WARNING, + MODNAME ": invalid m->type (%d) in mprint().", + m->type); + return; + } + + v = signextend(m, v) & m->mask; + (void) magic_rsl_printf(m->desc, (unsigned long) v); +} + +/* + * Convert the byte order of the data we are looking at + */ +static int mconvert(union VALUETYPE *p, struct magic *m) +{ + char *rt; + + switch (m->type) { + case BYTE: + case SHORT: + case LONG: + case DATE: + return 1; + case STRING: + /* Null terminate and eat the return */ + p->s[sizeof(p->s) - 1] = '\0'; + if ((rt = strchr(p->s, '\n')) != NULL) + *rt = '\0'; + return 1; + case BESHORT: + p->h = (short) ((p->hs[0] << 8) | (p->hs[1])); + return 1; + case BELONG: + case BEDATE: + p->l = (long) + ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3])); + return 1; + case LESHORT: + p->h = (short) ((p->hs[1] << 8) | (p->hs[0])); + return 1; + case LELONG: + case LEDATE: + p->l = (long) + ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0])); + return 1; + default: + php_error(E_WARNING, + MODNAME ": invalid type %d in mconvert().", m->type); + return 0; + } +} + + +/* states for the state-machine algorithm in magic_rsl_to_request() */ +typedef enum { + rsl_leading_space, rsl_type, rsl_subtype, rsl_separator, rsl_encoding +} rsl_states; + +/* process the RSL and set the MIME info in the request record */ +static int magic_rsl_get(char **content_type, char **content_encoding) +{ + int cur_frag, /* current fragment number/counter */ + cur_pos, /* current position within fragment */ + type_frag, /* content type starting point: fragment */ + type_pos, /* content type starting point: position */ + type_len, /* content type length */ + encoding_frag, /* content encoding starting point: fragment */ + encoding_pos, /* content encoding starting point: position */ + encoding_len; /* content encoding length */ + + magic_rsl *frag; /* list-traversal pointer */ + rsl_states state; + + magic_req_rec *req_dat = MIME_MAGIC_G(req_dat); + + /* check if we have a result */ + if (!req_dat || !req_dat->head) { + /* empty - no match, we defer to other Apache modules */ + return DECLINED; + } + + /* start searching for the type and encoding */ + state = rsl_leading_space; + type_frag = type_pos = type_len = 0; + encoding_frag = encoding_pos = encoding_len = 0; + for (frag = req_dat->head, cur_frag = 0; + frag && frag->next; + frag = frag->next, cur_frag++) { + /* loop through the characters in the fragment */ + for (cur_pos = 0; frag->str[cur_pos]; cur_pos++) { + if (isspace(frag->str[cur_pos])) { + /* process whitespace actions for each state */ + if (state == rsl_leading_space) { + /* eat whitespace in this state */ + continue; + } + else if (state == rsl_type) { + /* whitespace: type has no slash! */ + return DECLINED; + } + else if (state == rsl_subtype) { + /* whitespace: end of MIME type */ + state++; + continue; + } + else if (state == rsl_separator) { + /* eat whitespace in this state */ + continue; + } + else if (state == rsl_encoding) { + /* whitespace: end of MIME encoding */ + /* we're done */ + frag = req_dat->tail; + break; + } + else { + /* should not be possible */ + /* abandon malfunctioning module */ + php_error(E_WARNING, + MODNAME ": bad state %d (ws)", state); + return DECLINED; + } + /* NOTREACHED */ + } + else if (state == rsl_type && + frag->str[cur_pos] == '/') { + /* copy the char and go to rsl_subtype state */ + type_len++; + state++; + } + else { + /* process non-space actions for each state */ + if (state == rsl_leading_space) { + /* non-space: begin MIME type */ + state++; + type_frag = cur_frag; + type_pos = cur_pos; + type_len = 1; + continue; + } + else if (state == rsl_type || + state == rsl_subtype) { + /* non-space: adds to type */ + type_len++; + continue; + } + else if (state == rsl_separator) { + /* non-space: begin MIME encoding */ + state++; + encoding_frag = cur_frag; + encoding_pos = cur_pos; + encoding_len = 1; + continue; + } + else if (state == rsl_encoding) { + /* non-space: adds to encoding */ + encoding_len++; + continue; + } + else { + /* should not be possible */ + /* abandon malfunctioning module */ + php_error(E_WARNING, + MODNAME ": bad state %d (ns)", state); + return DECLINED; + } + /* NOTREACHED */ + } + /* NOTREACHED */ + } + } + + /* if we ended prior to state rsl_subtype, we had incomplete info */ + if (state != rsl_subtype && state != rsl_separator && + state != rsl_encoding) { + /* defer to other modules */ + return DECLINED; + } + + /* save the info in the request record */ + if (state == rsl_subtype || state == rsl_encoding || + state == rsl_encoding) { + char *tmp; + tmp = rsl_strdup(type_frag, type_pos, type_len); + /* XXX: this could be done at config time I'm sure... but I'm + * confused by all this magic_rsl stuff. -djg */ + /* TODO ap_content_type_tolower(tmp); */ + *content_type = tmp; + } + if (state == rsl_encoding) { + char *tmp; + tmp = rsl_strdup(encoding_frag, + encoding_pos, encoding_len); + /* XXX: this could be done at config time I'm sure... but I'm + * confused by all this magic_rsl stuff. -djg */ + /* TODO ap_str_tolower(tmp); */ + *content_encoding = tmp; + } + + /* detect memory allocation errors */ + if (!content_type || + (state == rsl_encoding && !*content_encoding)) { + return ERROR; + } + + /* success! */ + return OK; +} + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 fdm=marker + * vim<600: noet sw=4 ts=4 + */ diff --git a/ext/mime_magic/php_mime_magic.h b/ext/mime_magic/php_mime_magic.h new file mode 100644 index 0000000000..60a9e4e64d --- /dev/null +++ b/ext/mime_magic/php_mime_magic.h @@ -0,0 +1,454 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 4 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2002 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 2.02 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available at through the world-wide-web at | + | http://www.php.net/license/2_02.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: | + +----------------------------------------------------------------------+ + + $Id$ +*/ + +#ifndef PHP_MIME_MAGIC_H +#define PHP_MIME_MAGIC_H + +#define MIME_MAGIC_DEBUG 0 + +#define MIME_BINARY_UNKNOWN "application/octet-stream" +#define MIME_TEXT_UNKNOWN "text/plain" + +#define MAXMIMESTRING 256 + +#define OK 0 +#define DECLINED -1 +#define DONE -2 +#define ERROR -3 + +#define DIR_MAGIC_TYPE "httpd/unix-directory" + +/* HOWMANY must be at least 4096 to make gzip -dcq work */ +#define HOWMANY 4096 +/* SMALL_HOWMANY limits how much work we do to figure out text files */ +#define SMALL_HOWMANY 1024 +#define MAXDESC 50 /* max leng of text description */ +#define MAXstring 64 /* max leng of "string" types */ + +struct magic { + struct magic *next; /* link to next entry */ + int lineno; /* line number from magic file */ + + short flag; +#define INDIR 1 /* if '>(...)' appears, */ +#define UNSIGNED 2 /* comparison is unsigned */ + short cont_level; /* level of ">" */ + struct { + char type; /* byte short long */ + long offset; /* offset from indirection */ + } in; + long offset; /* offset to magic number */ + unsigned char reln; /* relation (0=eq, '>'=gt, etc) */ + char type; /* int, short, long or string. */ + char vallen; /* length of string value, if any */ +#define BYTE 1 +#define SHORT 2 +#define LONG 4 +#define STRING 5 +#define DATE 6 +#define BESHORT 7 +#define BELONG 8 +#define BEDATE 9 +#define LESHORT 10 +#define LELONG 11 +#define LEDATE 12 + union VALUETYPE { + unsigned char b; + unsigned short h; + unsigned long l; + char s[MAXstring]; + unsigned char hs[2]; /* 2 bytes of a fixed-endian "short" */ + unsigned char hl[4]; /* 2 bytes of a fixed-endian "long" */ + } value; /* either number or string */ + unsigned long mask; /* mask before comparison with value */ + char nospflag; /* supress space character */ + + /* NOTE: this string is suspected of overrunning - find it! */ + char desc[MAXDESC]; /* description */ +}; + +/* + * data structures for tar file recognition + * -------------------------------------------------------------------------- + * Header file for public domain tar (tape archive) program. + * + * @(#)tar.h 1.20 86/10/29 Public Domain. Created 25 August 1985 by John + * Gilmore, ihnp4!hoptoad!gnu. + * + * Header block on tape. + * + * I'm going to use traditional DP naming conventions here. A "block" is a big + * chunk of stuff that we do I/O on. A "record" is a piece of info that we + * care about. Typically many "record"s fit into a "block". + */ +#define RECORDSIZE 512 +#define NAMSIZ 100 +#define TUNMLEN 32 +#define TGNMLEN 32 + +union record { + char charptr[RECORDSIZE]; + struct header { + char name[NAMSIZ]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char chksum[8]; + char linkflag; + char linkname[NAMSIZ]; + char magic[8]; + char uname[TUNMLEN]; + char gname[TGNMLEN]; + char devmajor[8]; + char devminor[8]; + } header; +}; + +/* The magic field is filled with this if uname and gname are valid. */ +#define TMAGIC "ustar " /* 7 chars and a null */ + +/* + * file-function prototypes + */ +/*TODO*/ +#define request_rec void +#define server_rec void +#define pool void +#define cmd_parms void + +static int apprentice(void); +static int ascmagic(unsigned char *, int); +static int is_tar(unsigned char *, int); +static int softmagic(unsigned char *, int); +static void tryit(unsigned char *, int, int); +static int zmagic(unsigned char *, int); + +static int getvalue(struct magic *, char **); +static int hextoint(int); +static char *getstr(char *, char *, int, int *); +static int parse(char *, int); + +static int match(unsigned char *, int); +static int mget(union VALUETYPE *, unsigned char *, + struct magic *, int); +static int mcheck(union VALUETYPE *, struct magic *); +static void mprint(union VALUETYPE *, struct magic *); +static int mconvert(union VALUETYPE *, struct magic *); +static int magic_rsl_get(char **, char **); +static int magic_process(char *); + +static int uncompress(int, + unsigned char **, int); +static long from_oct(int, char *); +static int fsmagic(const char *fn); + +/* + * includes for ASCII substring recognition formerly "names.h" in file + * command + * + * Original notes: names and types used by ascmagic in file(1). These tokens are + * here because they can appear anywhere in the first HOWMANY bytes, while + * tokens in /etc/magic must appear at fixed offsets into the file. Don't + * make HOWMANY too high unless you have a very fast CPU. + */ + +/* these types are used to index the table 'types': keep em in sync! */ +/* HTML inserted in first because this is a web server module now */ +#define L_HTML 0 /* HTML */ +#define L_C 1 /* first and foremost on UNIX */ +#define L_FORT 2 /* the oldest one */ +#define L_MAKE 3 /* Makefiles */ +#define L_PLI 4 /* PL/1 */ +#define L_MACH 5 /* some kinda assembler */ +#define L_ENG 6 /* English */ +#define L_PAS 7 /* Pascal */ +#define L_MAIL 8 /* Electronic mail */ +#define L_NEWS 9 /* Usenet Netnews */ + +static char *types[] = + { + "text/html", /* HTML */ + "text/plain", /* "c program text", */ + "text/plain", /* "fortran program text", */ + "text/plain", /* "make commands text", */ + "text/plain", /* "pl/1 program text", */ + "text/plain", /* "assembler program text", */ + "text/plain", /* "English text", */ + "text/plain", /* "pascal program text", */ + "message/rfc822", /* "mail text", */ + "message/news", /* "news text", */ + "application/binary", /* "can't happen error on names.h/types", */ + 0 + }; + +static struct names { + char *name; + short type; +} names[] = { + + /* These must be sorted by eye for optimal hit rate */ + /* Add to this list only after substantial meditation */ + { + "<html>", L_HTML + }, + { + "<HTML>", L_HTML + }, + { + "<head>", L_HTML + }, + { + "<HEAD>", L_HTML + }, + { + "<title>", L_HTML + }, + { + "<TITLE>", L_HTML + }, + { + "<h1>", L_HTML + }, + { + "<H1>", L_HTML + }, + { + "<!--", L_HTML + }, + { + "<!DOCTYPE HTML", L_HTML + }, + { + "/*", L_C + }, /* must precede "The", "the", etc. */ + { + "#include", L_C + }, + { + "char", L_C + }, + { + "The", L_ENG + }, + { + "the", L_ENG + }, + { + "double", L_C + }, + { + "extern", L_C + }, + { + "float", L_C + }, + { + "real", L_C + }, + { + "struct", L_C + }, + { + "union", L_C + }, + { + "CFLAGS", L_MAKE + }, + { + "LDFLAGS", L_MAKE + }, + { + "all:", L_MAKE + }, + { + ".PRECIOUS", L_MAKE + }, + /* + * Too many files of text have these words in them. Find another way to + * recognize Fortrash. + */ +#ifdef NOTDEF + { + "subroutine", L_FORT + }, + { + "function", L_FORT + }, + { + "block", L_FORT + }, + { + "common", L_FORT + }, + { + "dimension", L_FORT + }, + { + "integer", L_FORT + }, + { + "data", L_FORT + }, +#endif /* NOTDEF */ + { + ".ascii", L_MACH + }, + { + ".asciiz", L_MACH + }, + { + ".byte", L_MACH + }, + { + ".even", L_MACH + }, + { + ".globl", L_MACH + }, + { + "clr", L_MACH + }, + { + "(input,", L_PAS + }, + { + "dcl", L_PLI + }, + { + "Received:", L_MAIL + }, + { + ">From", L_MAIL + }, + { + "Return-Path:", L_MAIL + }, + { + "Cc:", L_MAIL + }, + { + "Newsgroups:", L_NEWS + }, + { + "Path:", L_NEWS + }, + { + "Organization:", L_NEWS + }, + { + NULL, 0 + } +}; + +#define NNAMES ((sizeof(names)/sizeof(struct names)) - 1) + +/* + * Result String List (RSL) + * + * The file(1) command prints its output. Instead, we store the various + * "printed" strings in a list (allocating memory as we go) and concatenate + * them at the end when we finally know how much space they'll need. + */ + +typedef struct magic_rsl_s { + char *str; /* string, possibly a fragment */ + struct magic_rsl_s *next; /* pointer to next fragment */ +} magic_rsl; + +/* + * Apache module configuration structures + */ + +/* per-server info */ +typedef struct { + char *magicfile; /* where magic be found */ + struct magic *magic; /* head of magic config list */ + struct magic *last; +} magic_server_config_rec; + +/* per-request info */ +typedef struct { + magic_rsl *head; /* result string list */ + magic_rsl *tail; + unsigned suf_recursion; /* recursion depth in suffix check */ +} magic_req_rec; + +static magic_req_rec *magic_set_config(void); +static void magic_free_config(magic_req_rec *); + + + +extern zend_module_entry mime_magic_module_entry; +#define phpext_mime_magic_ptr &mime_magic_module_entry + +#ifdef PHP_WIN32 +#define PHP_MIME_MAGIC_API __declspec(dllexport) +#else +#define PHP_MIME_MAGIC_API +#endif + +#ifdef ZTS +#include "TSRM.h" +#endif + +PHP_MINIT_FUNCTION(mime_magic); +PHP_MSHUTDOWN_FUNCTION(mime_magic); +PHP_RINIT_FUNCTION(mime_magic); +PHP_RSHUTDOWN_FUNCTION(mime_magic); +PHP_MINFO_FUNCTION(mime_magic); + +PHP_FUNCTION(mime_content_type); + +ZEND_BEGIN_MODULE_GLOBALS(mime_magic) + char *magicfile; + magic_req_rec *req_dat; +ZEND_END_MODULE_GLOBALS(mime_magic) + +/* In every utility function you add that needs to use variables + in php_mime_magic_globals, call TSRM_FETCH(); after declaring other + variables used by that function, or better yet, pass in TSRMG_CC + after the last function argument and declare your utility function + with TSRMG_DC after the last declared argument. Always refer to + the globals in your function as MIME_MAGIC_G(variable). You are + encouraged to rename these macros something shorter, see + examples in any other php module directory. +*/ + +#ifdef ZTS +#define MIME_MAGIC_G(v) TSRMG(mime_magic_globals_id, zend_mime_magic_globals *, v) +#else +#define MIME_MAGIC_G(v) (mime_magic_globals.v) +#endif + + + + +#endif /* PHP_MIME_MAGIC_H */ + + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * indent-tabs-mode: t + * End: + */ |