diff options
author | Nikita Popov <nikic@php.net> | 2015-03-21 20:10:19 +0100 |
---|---|---|
committer | Nikita Popov <nikic@php.net> | 2015-04-02 16:31:17 +0200 |
commit | a8bf1c5d8f5755b53492e58040cfe88150eb57b6 (patch) | |
tree | c3b94b3c9cb7bdf5964a5428673459a78e12edc5 /ext/tokenizer | |
parent | 83a15801cc8619a1a83307e573bf1b63f0d4b53c (diff) | |
download | php-git-a8bf1c5d8f5755b53492e58040cfe88150eb57b6.tar.gz |
Throw ParseException from lexer
Primarily to avoid getting fatal errors from token_get_all().
Implemented using a magic E_ERROR token, which the lexer emits to
force a parser failure.
Diffstat (limited to 'ext/tokenizer')
-rw-r--r-- | ext/tokenizer/tests/parse_errors.phpt | 26 | ||||
-rw-r--r-- | ext/tokenizer/tokenizer.c | 2 | ||||
-rwxr-xr-x | ext/tokenizer/tokenizer_data_gen.sh | 6 |
3 files changed, 33 insertions, 1 deletions
diff --git a/ext/tokenizer/tests/parse_errors.phpt b/ext/tokenizer/tests/parse_errors.phpt new file mode 100644 index 0000000000..3ee2cb081b --- /dev/null +++ b/ext/tokenizer/tests/parse_errors.phpt @@ -0,0 +1,26 @@ +--TEST-- +Parse errors during token_get_all() +--SKIPIF-- +<?php if (!extension_loaded("tokenizer")) print "skip"; ?> +--FILE-- +<?php + +function test_parse_error($code) { + try { + var_dump(token_get_all($code)); + } catch (ParseException $e) { + echo $e->getMessage(), "\n"; + } +} + +test_parse_error('<?php var_dump(078);'); +test_parse_error('<?php var_dump("\u{xyz}");'); +test_parse_error('<?php var_dump("\u{ffffff}");'); +test_parse_error('<?php var_dump(078 + 078);'); + +?> +--EXPECT-- +Invalid numeric literal +Invalid UTF-8 codepoint escape sequence +Invalid UTF-8 codepoint escape sequence: Codepoint too large +Invalid numeric literal diff --git a/ext/tokenizer/tokenizer.c b/ext/tokenizer/tokenizer.c index 52095d077d..c011894441 100644 --- a/ext/tokenizer/tokenizer.c +++ b/ext/tokenizer/tokenizer.c @@ -123,6 +123,8 @@ static void tokenize(zval *return_value) case T_DOC_COMMENT: destroy = 0; break; + case T_ERROR: + return; } if (token_type >= 256) { diff --git a/ext/tokenizer/tokenizer_data_gen.sh b/ext/tokenizer/tokenizer_data_gen.sh index 13384c8490..49ef7cbfa3 100755 --- a/ext/tokenizer/tokenizer_data_gen.sh +++ b/ext/tokenizer/tokenizer_data_gen.sh @@ -45,7 +45,10 @@ echo '/* echo 'void tokenizer_register_constants(INIT_FUNC_ARGS) {' >> $OUTFILE -$AWK '/^#define T_/ { print " REGISTER_LONG_CONSTANT(\"" $2 "\", " $2 ", CONST_CS | CONST_PERSISTENT);" }' < $INFILE >> $OUTFILE +$AWK ' + /^#define T_ERROR/ { next } + /^#define T_/ { print " REGISTER_LONG_CONSTANT(\"" $2 "\", " $2 ", CONST_CS | CONST_PERSISTENT);" } +' < $INFILE >> $OUTFILE echo ' REGISTER_LONG_CONSTANT("T_DOUBLE_COLON", T_PAAMAYIM_NEKUDOTAYIM, CONST_CS | CONST_PERSISTENT);' >> $OUTFILE echo '}' >> $OUTFILE @@ -61,6 +64,7 @@ $AWK ' print " case T_PAAMAYIM_NEKUDOTAYIM: return \"T_DOUBLE_COLON\";" next } + /^#define T_ERROR/ { next } /^#define T_/ { print " case " $2 ": return \"" $2 "\";" } |