summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2019-09-14 12:08:20 +0200
committerNikita Popov <nikita.ppv@gmail.com>2019-09-14 12:10:06 +0200
commit3f76f9416ff82c10174dc382285b4b91789e278b (patch)
tree813137b6bcdd0fd541b7f43b459b048edfd8597a
parentab4f725d83cbcb1172a96aaa5134f0595f3fb0fa (diff)
downloadphp-git-3f76f9416ff82c10174dc382285b4b91789e278b.tar.gz
Fix double-free on invalid large octal with separators
To clean up the mess here a bit, check for invalid octal digits with an explicit loop instead of mixing this into the string to number conversion. Also clean up some type usage.
-rw-r--r--Zend/zend_language_scanner.l77
-rw-r--r--ext/tokenizer/tests/invalid_large_octal_with_underscores.phpt31
2 files changed, 60 insertions, 48 deletions
diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l
index 84ed669dfb..86b8bb1a70 100644
--- a/Zend/zend_language_scanner.l
+++ b/Zend/zend_language_scanner.l
@@ -120,7 +120,7 @@ do { \
BEGIN_EXTERN_C()
-static void strip_underscores(char *str, int *len)
+static void strip_underscores(char *str, size_t *len)
{
char *src = str, *dest = str;
while (*src != '\0') {
@@ -1771,8 +1771,9 @@ NEWLINE ("\r"|"\n"|"\r\n")
<ST_IN_SCRIPTING>{BNUM} {
/* The +/- 2 skips "0b" */
- int len = yyleng - 2, contains_underscores;
+ size_t len = yyleng - 2;
char *end, *bin = yytext + 2;
+ zend_bool contains_underscores;
/* Skip any leading 0s */
while (len > 0 && (*bin == '0' || *bin == '_')) {
@@ -1811,10 +1812,25 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
<ST_IN_SCRIPTING>{LNUM} {
- int len = yyleng, contains_underscores;
+ size_t len = yyleng;
char *end, *lnum = yytext;
-
- contains_underscores = (memchr(lnum, '_', len) != NULL);
+ zend_bool is_octal = lnum[0] == '0';
+ zend_bool contains_underscores = (memchr(lnum, '_', len) != NULL);
+
+ /* Digits 8 and 9 are illegal in octal literals. */
+ if (is_octal) {
+ size_t i;
+ for (i = 0; i < len; i++) {
+ if (lnum[i] == '8' || lnum[i] == '9') {
+ zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
+ ZVAL_UNDEF(zendlval);
+ if (PARSER_MODE()) {
+ RETURN_TOKEN(T_ERROR);
+ }
+ RETURN_TOKEN_WITH_VAL(T_LNUMBER);
+ }
+ }
+ }
if (contains_underscores) {
lnum = estrndup(lnum, len);
@@ -1824,21 +1840,8 @@ NEWLINE ("\r"|"\n"|"\r\n")
if (len < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
errno = 0;
/* base must be passed explicitly for correct parse error on Windows */
- ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, lnum[0] == '0' ? 8 : 10));
- /* This isn't an assert, we need to ensure 019 isn't valid octal
- * Because the lexing itself doesn't do that for us
- */
- if (end != lnum + len) {
- zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
- ZVAL_UNDEF(zendlval);
- if (contains_underscores) {
- efree(lnum);
- }
- if (PARSER_MODE()) {
- RETURN_TOKEN(T_ERROR);
- }
- RETURN_TOKEN_WITH_VAL(T_LNUMBER);
- }
+ ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, is_octal ? 8 : 10));
+ ZEND_ASSERT(end == lnum + len);
} else {
errno = 0;
ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, 0));
@@ -1849,35 +1852,13 @@ NEWLINE ("\r"|"\n"|"\r\n")
} else {
ZVAL_DOUBLE(zendlval, zend_strtod(lnum, (const char **)&end));
}
- /* Also not an assert for the same reason */
- if (end != lnum + len) {
- zend_throw_exception(zend_ce_parse_error,
- "Invalid numeric literal", 0);
- ZVAL_UNDEF(zendlval);
- if (contains_underscores) {
- efree(lnum);
- }
- if (PARSER_MODE()) {
- RETURN_TOKEN(T_ERROR);
- }
- }
+ ZEND_ASSERT(end == lnum + len);
if (contains_underscores) {
efree(lnum);
}
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
}
- /* Also not an assert for the same reason */
- if (end != lnum + len) {
- zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
- ZVAL_UNDEF(zendlval);
- if (contains_underscores) {
- efree(lnum);
- }
- if (PARSER_MODE()) {
- RETURN_TOKEN(T_ERROR);
- }
- RETURN_TOKEN_WITH_VAL(T_DNUMBER);
- }
+ ZEND_ASSERT(end == lnum + len);
}
ZEND_ASSERT(!errno);
if (contains_underscores) {
@@ -1888,8 +1869,9 @@ NEWLINE ("\r"|"\n"|"\r\n")
<ST_IN_SCRIPTING>{HNUM} {
/* The +/- 2 skips "0x" */
- int len = yyleng - 2, contains_underscores;
+ size_t len = yyleng - 2;
char *end, *hex = yytext + 2;
+ zend_bool contains_underscores;
/* Skip any leading 0s */
while (len > 0 && (*hex == '0' || *hex == '_')) {
@@ -1954,10 +1936,9 @@ string:
<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
const char *end;
- int len = yyleng, contains_underscores;
+ size_t len = yyleng;
char *dnum = yytext;
-
- contains_underscores = (memchr(dnum, '_', len) != NULL);
+ zend_bool contains_underscores = (memchr(dnum, '_', len) != NULL);
if (contains_underscores) {
dnum = estrndup(dnum, len);
diff --git a/ext/tokenizer/tests/invalid_large_octal_with_underscores.phpt b/ext/tokenizer/tests/invalid_large_octal_with_underscores.phpt
new file mode 100644
index 0000000000..b6d43c1ff1
--- /dev/null
+++ b/ext/tokenizer/tests/invalid_large_octal_with_underscores.phpt
@@ -0,0 +1,31 @@
+--TEST--
+Large invalid octal number with underscores
+--FILE--
+<?php
+
+var_dump(token_get_all("<?php 0_10000000000000000000009;"));
+
+?>
+--EXPECTF--
+array(3) {
+ [0]=>
+ array(3) {
+ [0]=>
+ int(%d)
+ [1]=>
+ string(6) "<?php "
+ [2]=>
+ int(1)
+ }
+ [1]=>
+ array(3) {
+ [0]=>
+ int(%d)
+ [1]=>
+ string(25) "0_10000000000000000000009"
+ [2]=>
+ int(1)
+ }
+ [2]=>
+ string(1) ";"
+}