diff options
Diffstat (limited to 'ext/filter/sanitizing_filters.c')
-rw-r--r-- | ext/filter/sanitizing_filters.c | 389 |
1 files changed, 389 insertions, 0 deletions
diff --git a/ext/filter/sanitizing_filters.c b/ext/filter/sanitizing_filters.c new file mode 100644 index 0000000..30da05a --- /dev/null +++ b/ext/filter/sanitizing_filters.c @@ -0,0 +1,389 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2013 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Derick Rethans <derick@php.net> | + +----------------------------------------------------------------------+ +*/ + +/* $Id$ */ + +#include "php_filter.h" +#include "filter_private.h" +#include "ext/standard/php_smart_str.h" + +/* {{{ STRUCTS */ +typedef unsigned long filter_map[256]; +/* }}} */ + +/* {{{ HELPER FUNCTIONS */ +static void php_filter_encode_html(zval *value, const unsigned char *chars) +{ + smart_str str = {0}; + int len = Z_STRLEN_P(value); + unsigned char *s = (unsigned char *)Z_STRVAL_P(value); + unsigned char *e = s + len; + + if (Z_STRLEN_P(value) == 0) { + return; + } + + while (s < e) { + if (chars[*s]) { + smart_str_appendl(&str, "&#", 2); + smart_str_append_unsigned(&str, (unsigned long)*s); + smart_str_appendc(&str, ';'); + } else { + /* XXX: this needs to be optimized to work with blocks of 'safe' chars */ + smart_str_appendc(&str, *s); + } + s++; + } + + smart_str_0(&str); + str_efree(Z_STRVAL_P(value)); + Z_STRVAL_P(value) = str.c; + Z_STRLEN_P(value) = str.len; +} + +static const unsigned char hexchars[] = "0123456789ABCDEF"; + +#define LOWALPHA "abcdefghijklmnopqrstuvwxyz" +#define HIALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +#define DIGIT "0123456789" + +#define DEFAULT_URL_ENCODE LOWALPHA HIALPHA DIGIT "-._" + +static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul) +{ + unsigned char *str, *p; + unsigned char tmp[256]; + unsigned char *s = (unsigned char *)chars; + unsigned char *e = s + char_len; + + memset(tmp, 1, sizeof(tmp)-1); + + while (s < e) { + tmp[*s++] = 0; + } +/* XXX: This is not needed since these chars in the allowed list never include the high/low/null value + if (encode_nul) { + tmp[0] = 1; + } + if (high) { + memset(tmp + 127, 1, sizeof(tmp) - 127); + } + if (low) { + memset(tmp, 1, 32); + } +*/ + p = str = (unsigned char *) safe_emalloc(3, Z_STRLEN_P(value), 1); + s = (unsigned char *)Z_STRVAL_P(value); + e = s + Z_STRLEN_P(value); + + while (s < e) { + if (tmp[*s]) { + *p++ = '%'; + *p++ = hexchars[(unsigned char) *s >> 4]; + *p++ = hexchars[(unsigned char) *s & 15]; + } else { + *p++ = *s; + } + s++; + } + *p = '\0'; + str_efree(Z_STRVAL_P(value)); + Z_STRVAL_P(value) = (char *)str; + Z_STRLEN_P(value) = p - str; +} + +static void php_filter_strip(zval *value, long flags) +{ + unsigned char *buf, *str; + int i, c; + + /* Optimization for if no strip flags are set */ + if (! ((flags & FILTER_FLAG_STRIP_LOW) || (flags & FILTER_FLAG_STRIP_HIGH)) ) { + return; + } + + str = (unsigned char *)Z_STRVAL_P(value); + buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1); + c = 0; + for (i = 0; i < Z_STRLEN_P(value); i++) { + if ((str[i] > 127) && (flags & FILTER_FLAG_STRIP_HIGH)) { + } else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) { + } else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) { + } else { + buf[c] = str[i]; + ++c; + } + } + /* update zval string data */ + buf[c] = '\0'; + str_efree(Z_STRVAL_P(value)); + Z_STRVAL_P(value) = (char *)buf; + Z_STRLEN_P(value) = c; +} +/* }}} */ + +/* {{{ FILTER MAP HELPERS */ +static void filter_map_init(filter_map *map) +{ + memset(map, 0, sizeof(filter_map)); +} + +static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list) +{ + int l, i; + + l = strlen((const char*)allowed_list); + for (i = 0; i < l; ++i) { + (*map)[allowed_list[i]] = flag; + } +} + +static void filter_map_apply(zval *value, filter_map *map) +{ + unsigned char *buf, *str; + int i, c; + + str = (unsigned char *)Z_STRVAL_P(value); + buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1); + c = 0; + for (i = 0; i < Z_STRLEN_P(value); i++) { + if ((*map)[str[i]]) { + buf[c] = str[i]; + ++c; + } + } + /* update zval string data */ + buf[c] = '\0'; + str_efree(Z_STRVAL_P(value)); + Z_STRVAL_P(value) = (char *)buf; + Z_STRLEN_P(value) = c; +} +/* }}} */ + +/* {{{ php_filter_string */ +void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL) +{ + size_t new_len; + unsigned char enc[256] = {0}; + + /* strip high/strip low ( see flags )*/ + php_filter_strip(value, flags); + + if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) { + enc['\''] = enc['"'] = 1; + } + if (flags & FILTER_FLAG_ENCODE_AMP) { + enc['&'] = 1; + } + if (flags & FILTER_FLAG_ENCODE_LOW) { + memset(enc, 1, 32); + } + if (flags & FILTER_FLAG_ENCODE_HIGH) { + memset(enc + 127, 1, sizeof(enc) - 127); + } + + php_filter_encode_html(value, enc); + + /* strip tags, implicitly also removes \0 chars */ + new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1); + Z_STRLEN_P(value) = new_len; + + if (new_len == 0) { + zval_dtor(value); + if (flags & FILTER_FLAG_EMPTY_STRING_NULL) { + ZVAL_NULL(value); + } else { + ZVAL_EMPTY_STRING(value); + } + return; + } +} +/* }}} */ + +/* {{{ php_filter_encoded */ +void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL) +{ + /* apply strip_high and strip_low filters */ + php_filter_strip(value, flags); + /* urlencode */ + php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1); +} +/* }}} */ + +/* {{{ php_filter_special_chars */ +void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL) +{ + unsigned char enc[256] = {0}; + + php_filter_strip(value, flags); + + /* encodes ' " < > & \0 to numerical entities */ + enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1; + + /* if strip low is not set, then we encode them as &#xx; */ + memset(enc, 1, 32); + + if (flags & FILTER_FLAG_ENCODE_HIGH) { + memset(enc + 127, 1, sizeof(enc) - 127); + } + + php_filter_encode_html(value, enc); +} +/* }}} */ + +/* {{{ php_filter_full_special_chars */ +void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL) +{ + char *buf; + size_t len; + int quotes; + + if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) { + quotes = ENT_QUOTES; + } else { + quotes = ENT_NOQUOTES; + } + buf = php_escape_html_entities_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 1, quotes, SG(default_charset), 0 TSRMLS_CC); + str_efree(Z_STRVAL_P(value)); + Z_STRVAL_P(value) = buf; + Z_STRLEN_P(value) = len; +} +/* }}} */ + +/* {{{ php_filter_unsafe_raw */ +void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL) +{ + /* Only if no flags are set (optimization) */ + if (flags != 0 && Z_STRLEN_P(value) > 0) { + unsigned char enc[256] = {0}; + + php_filter_strip(value, flags); + + if (flags & FILTER_FLAG_ENCODE_AMP) { + enc['&'] = 1; + } + if (flags & FILTER_FLAG_ENCODE_LOW) { + memset(enc, 1, 32); + } + if (flags & FILTER_FLAG_ENCODE_HIGH) { + memset(enc + 127, 1, sizeof(enc) - 127); + } + + php_filter_encode_html(value, enc); + } else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) { + zval_dtor(value); + ZVAL_NULL(value); + } +} +/* }}} */ + + + +/* {{{ php_filter_email */ +#define SAFE "$-_.+" +#define EXTRA "!*'()," +#define NATIONAL "{}|\\^~[]`" +#define PUNCTUATION "<>#%\"" +#define RESERVED ";/?:@&=" + +void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL) +{ + /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */ + const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]"; + filter_map map; + + filter_map_init(&map); + filter_map_update(&map, 1, allowed_list); + filter_map_apply(value, &map); +} +/* }}} */ + +/* {{{ php_filter_url */ +void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL) +{ + /* Strip all chars not part of section 5 of + * http://www.faqs.org/rfcs/rfc1738.html */ + const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED; + filter_map map; + + filter_map_init(&map); + filter_map_update(&map, 1, allowed_list); + filter_map_apply(value, &map); +} +/* }}} */ + +/* {{{ php_filter_number_int */ +void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL) +{ + /* strip everything [^0-9+-] */ + const unsigned char allowed_list[] = "+-" DIGIT; + filter_map map; + + filter_map_init(&map); + filter_map_update(&map, 1, allowed_list); + filter_map_apply(value, &map); +} +/* }}} */ + +/* {{{ php_filter_number_float */ +void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL) +{ + /* strip everything [^0-9+-] */ + const unsigned char allowed_list[] = "+-" DIGIT; + filter_map map; + + filter_map_init(&map); + filter_map_update(&map, 1, allowed_list); + + /* depending on flags, strip '.', 'e', ",", "'" */ + if (flags & FILTER_FLAG_ALLOW_FRACTION) { + filter_map_update(&map, 2, (const unsigned char *) "."); + } + if (flags & FILTER_FLAG_ALLOW_THOUSAND) { + filter_map_update(&map, 3, (const unsigned char *) ","); + } + if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) { + filter_map_update(&map, 4, (const unsigned char *) "eE"); + } + filter_map_apply(value, &map); +} +/* }}} */ + +/* {{{ php_filter_magic_quotes */ +void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL) +{ + char *buf; + int len; + + /* just call php_addslashes quotes */ + buf = php_addslashes(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 0 TSRMLS_CC); + + str_efree(Z_STRVAL_P(value)); + Z_STRVAL_P(value) = buf; + Z_STRLEN_P(value) = len; +} +/* }}} */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 fdm=marker + * vim<600: noet sw=4 ts=4 + */ |