summaryrefslogtreecommitdiff
path: root/ext/tokenizer
diff options
context:
space:
mode:
authorNikita Popov <nikic@php.net>2015-03-21 20:10:19 +0100
committerNikita Popov <nikic@php.net>2015-04-02 16:31:17 +0200
commita8bf1c5d8f5755b53492e58040cfe88150eb57b6 (patch)
treec3b94b3c9cb7bdf5964a5428673459a78e12edc5 /ext/tokenizer
parent83a15801cc8619a1a83307e573bf1b63f0d4b53c (diff)
downloadphp-git-a8bf1c5d8f5755b53492e58040cfe88150eb57b6.tar.gz
Throw ParseException from lexer
Primarily to avoid getting fatal errors from token_get_all(). Implemented using a magic E_ERROR token, which the lexer emits to force a parser failure.
Diffstat (limited to 'ext/tokenizer')
-rw-r--r--ext/tokenizer/tests/parse_errors.phpt26
-rw-r--r--ext/tokenizer/tokenizer.c2
-rwxr-xr-xext/tokenizer/tokenizer_data_gen.sh6
3 files changed, 33 insertions, 1 deletions
diff --git a/ext/tokenizer/tests/parse_errors.phpt b/ext/tokenizer/tests/parse_errors.phpt
new file mode 100644
index 0000000000..3ee2cb081b
--- /dev/null
+++ b/ext/tokenizer/tests/parse_errors.phpt
@@ -0,0 +1,26 @@
+--TEST--
+Parse errors during token_get_all()
+--SKIPIF--
+<?php if (!extension_loaded("tokenizer")) print "skip"; ?>
+--FILE--
+<?php
+
+function test_parse_error($code) {
+ try {
+ var_dump(token_get_all($code));
+ } catch (ParseException $e) {
+ echo $e->getMessage(), "\n";
+ }
+}
+
+test_parse_error('<?php var_dump(078);');
+test_parse_error('<?php var_dump("\u{xyz}");');
+test_parse_error('<?php var_dump("\u{ffffff}");');
+test_parse_error('<?php var_dump(078 + 078);');
+
+?>
+--EXPECT--
+Invalid numeric literal
+Invalid UTF-8 codepoint escape sequence
+Invalid UTF-8 codepoint escape sequence: Codepoint too large
+Invalid numeric literal
diff --git a/ext/tokenizer/tokenizer.c b/ext/tokenizer/tokenizer.c
index 52095d077d..c011894441 100644
--- a/ext/tokenizer/tokenizer.c
+++ b/ext/tokenizer/tokenizer.c
@@ -123,6 +123,8 @@ static void tokenize(zval *return_value)
case T_DOC_COMMENT:
destroy = 0;
break;
+ case T_ERROR:
+ return;
}
if (token_type >= 256) {
diff --git a/ext/tokenizer/tokenizer_data_gen.sh b/ext/tokenizer/tokenizer_data_gen.sh
index 13384c8490..49ef7cbfa3 100755
--- a/ext/tokenizer/tokenizer_data_gen.sh
+++ b/ext/tokenizer/tokenizer_data_gen.sh
@@ -45,7 +45,10 @@ echo '/*
echo 'void tokenizer_register_constants(INIT_FUNC_ARGS) {' >> $OUTFILE
-$AWK '/^#define T_/ { print " REGISTER_LONG_CONSTANT(\"" $2 "\", " $2 ", CONST_CS | CONST_PERSISTENT);" }' < $INFILE >> $OUTFILE
+$AWK '
+ /^#define T_ERROR/ { next }
+ /^#define T_/ { print " REGISTER_LONG_CONSTANT(\"" $2 "\", " $2 ", CONST_CS | CONST_PERSISTENT);" }
+' < $INFILE >> $OUTFILE
echo ' REGISTER_LONG_CONSTANT("T_DOUBLE_COLON", T_PAAMAYIM_NEKUDOTAYIM, CONST_CS | CONST_PERSISTENT);' >> $OUTFILE
echo '}' >> $OUTFILE
@@ -61,6 +64,7 @@ $AWK '
print " case T_PAAMAYIM_NEKUDOTAYIM: return \"T_DOUBLE_COLON\";"
next
}
+ /^#define T_ERROR/ { next }
/^#define T_/ {
print " case " $2 ": return \"" $2 "\";"
}