summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGurusamy Sarathy <gsar@cpan.org>2000-05-02 06:48:19 +0000
committerGurusamy Sarathy <gsar@cpan.org>2000-05-02 06:48:19 +0000
commitb21ed0a92b5a07dd021a85728802e72edfa03699 (patch)
tree2bca4101a44803d4b5c72468e9d4dc24f12743a5
parenta6b2f353992254a6ec5c40c60b053f7a6817c8e4 (diff)
downloadperl-b21ed0a92b5a07dd021a85728802e72edfa03699.tar.gz
change#3798 broke the meaning of "\0_7_7", tr/\0_// etc.; fix it
such that underscores are only ignored in literal numbers, "\x{...}", and hex/oct argument p4raw-link: @3798 on //depot/cfgperl: 252aa0820e6bce274b33bd342cfc65e18a59a165 p4raw-id: //depot/perl@6044
-rw-r--r--perl.c2
-rw-r--r--pp.c2
-rw-r--r--regcomp.c11
-rwxr-xr-xt/op/oct.t86
-rw-r--r--toke.c3
-rw-r--r--util.c37
6 files changed, 93 insertions, 48 deletions
diff --git a/perl.c b/perl.c
index ee71369b2a..ff851b4417 100644
--- a/perl.c
+++ b/perl.c
@@ -1984,6 +1984,7 @@ Perl_moreswitches(pTHX_ char *s)
case '0':
{
dTHR;
+ numlen = 0; /* disallow underscores */
rschar = (U32)scan_oct(s, 4, &numlen);
SvREFCNT_dec(PL_nrs);
if (rschar & ~((U8)~0))
@@ -2099,6 +2100,7 @@ Perl_moreswitches(pTHX_ char *s)
if (isDIGIT(*s)) {
PL_ors = savepv("\n");
PL_orslen = 1;
+ numlen = 0; /* disallow underscores */
*PL_ors = (char)scan_oct(s, 3 + (*s == '0'), &numlen);
s += numlen;
}
diff --git a/pp.c b/pp.c
index 17824bd090..a86be7ad3e 100644
--- a/pp.c
+++ b/pp.c
@@ -1885,6 +1885,7 @@ PP(pp_hex)
STRLEN n_a;
tmps = POPpx;
+ argtype = 1; /* allow underscores */
XPUSHn(scan_hex(tmps, 99, &argtype));
RETURN;
}
@@ -1902,6 +1903,7 @@ PP(pp_oct)
tmps++;
if (*tmps == '0')
tmps++;
+ argtype = 1; /* allow underscores */
if (*tmps == 'x')
value = scan_hex(++tmps, 99, &argtype);
else if (*tmps == 'b')
diff --git a/regcomp.c b/regcomp.c
index 7af090e882..95437105b8 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2605,8 +2605,10 @@ tryagain:
if (!e)
FAIL("Missing right brace on \\x{}");
else if (UTF) {
+ numlen = 1; /* allow underscores */
ender = (UV)scan_hex(p + 1, e - p, &numlen);
- if (numlen + len >= 127) { /* numlen is generous */
+ /* numlen is generous */
+ if (numlen + len >= 127) {
p--;
goto loopdone;
}
@@ -2616,6 +2618,7 @@ tryagain:
FAIL("Can't use \\x{} without 'use utf8' declaration");
}
else {
+ numlen = 0; /* disallow underscores */
ender = (UV)scan_hex(p, 2, &numlen);
p += numlen;
}
@@ -2629,6 +2632,7 @@ tryagain:
case '5': case '6': case '7': case '8':case '9':
if (*p == '0' ||
(isDIGIT(p[1]) && atoi(p) >= PL_regnpar) ) {
+ numlen = 0; /* disallow underscores */
ender = (UV)scan_oct(p, 3, &numlen);
p += numlen;
}
@@ -2940,6 +2944,7 @@ S_regclass(pTHX)
case 'a': value = '\057'; break;
#endif
case 'x':
+ numlen = 0; /* disallow underscores */
value = (UV)scan_hex(PL_regcomp_parse, 2, &numlen);
PL_regcomp_parse += numlen;
break;
@@ -2949,6 +2954,7 @@ S_regclass(pTHX)
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
+ numlen = 0; /* disallow underscores */
value = (UV)scan_oct(--PL_regcomp_parse, 3, &numlen);
PL_regcomp_parse += numlen;
break;
@@ -3414,12 +3420,14 @@ S_regclassutf8(pTHX)
e = strchr(PL_regcomp_parse++, '}');
if (!e)
FAIL("Missing right brace on \\x{}");
+ numlen = 1; /* allow underscores */
value = (UV)scan_hex(PL_regcomp_parse,
e - PL_regcomp_parse,
&numlen);
PL_regcomp_parse = e + 1;
}
else {
+ numlen = 0; /* disallow underscores */
value = (UV)scan_hex(PL_regcomp_parse, 2, &numlen);
PL_regcomp_parse += numlen;
}
@@ -3430,6 +3438,7 @@ S_regclassutf8(pTHX)
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
+ numlen = 0; /* disallow underscores */
value = (UV)scan_oct(--PL_regcomp_parse, 3, &numlen);
PL_regcomp_parse += numlen;
break;
diff --git a/t/op/oct.t b/t/op/oct.t
index 27ac5aa042..3a487d8173 100755
--- a/t/op/oct.t
+++ b/t/op/oct.t
@@ -1,53 +1,67 @@
#!./perl
-print "1..36\n";
+print "1..44\n";
-print +(oct('0b10101') == 0b10101) ? "ok" : "not ok", " 1\n";
-print +(oct('0b10101') == 025) ? "ok" : "not ok", " 2\n";
-print +(oct('0b10101') == 21) ? "ok" : "not ok", " 3\n";
-print +(oct('0b10101') == 0x15) ? "ok" : "not ok", " 4\n";
+print +(oct('0b1_0101') == 0b101_01) ? "ok" : "not ok", " 1\n";
+print +(oct('0b10_101') == 0_2_5) ? "ok" : "not ok", " 2\n";
+print +(oct('0b101_01') == 2_1) ? "ok" : "not ok", " 3\n";
+print +(oct('0b1010_1') == 0x1_5) ? "ok" : "not ok", " 4\n";
-print +(oct('b10101') == 0b10101) ? "ok" : "not ok", " 5\n";
-print +(oct('b10101') == 025) ? "ok" : "not ok", " 6\n";
-print +(oct('b10101') == 21) ? "ok" : "not ok", " 7\n";
-print +(oct('b10101') == 0x15) ? "ok" : "not ok", " 8\n";
+print +(oct('b1_0101') == 0b10101) ? "ok" : "not ok", " 5\n";
+print +(oct('b10_101') == 025) ? "ok" : "not ok", " 6\n";
+print +(oct('b101_01') == 21) ? "ok" : "not ok", " 7\n";
+print +(oct('b1010_1') == 0x15) ? "ok" : "not ok", " 8\n";
-print +(oct('01234') == 0b1010011100) ? "ok" : "not ok", " 9\n";
-print +(oct('01234') == 01234) ? "ok" : "not ok", " 10\n";
-print +(oct('01234') == 668) ? "ok" : "not ok", " 11\n";
+print +(oct('01_234') == 0b10_1001_1100) ? "ok" : "not ok", " 9\n";
+print +(oct('012_34') == 01234) ? "ok" : "not ok", " 10\n";
+print +(oct('0123_4') == 668) ? "ok" : "not ok", " 11\n";
print +(oct('01234') == 0x29c) ? "ok" : "not ok", " 12\n";
-print +(oct('0x1234') == 0b1001000110100) ? "ok" : "not ok", " 13\n";
-print +(oct('0x1234') == 011064) ? "ok" : "not ok", " 14\n";
-print +(oct('0x1234') == 4660) ? "ok" : "not ok", " 15\n";
-print +(oct('0x1234') == 0x1234) ? "ok" : "not ok", " 16\n";
+print +(oct('0x1_234') == 0b10010_00110100) ? "ok" : "not ok", " 13\n";
+print +(oct('0x12_34') == 01_1064) ? "ok" : "not ok", " 14\n";
+print +(oct('0x123_4') == 4660) ? "ok" : "not ok", " 15\n";
+print +(oct('0x1234') == 0x12_34) ? "ok" : "not ok", " 16\n";
-print +(oct('x1234') == 0b1001000110100) ? "ok" : "not ok", " 17\n";
-print +(oct('x1234') == 011064) ? "ok" : "not ok", " 18\n";
-print +(oct('x1234') == 4660) ? "ok" : "not ok", " 19\n";
-print +(oct('x1234') == 0x1234) ? "ok" : "not ok", " 20\n";
+print +(oct('x1_234') == 0b100100011010_0) ? "ok" : "not ok", " 17\n";
+print +(oct('x12_34') == 0_11064) ? "ok" : "not ok", " 18\n";
+print +(oct('x123_4') == 4660) ? "ok" : "not ok", " 19\n";
+print +(oct('x1234') == 0x_1234) ? "ok" : "not ok", " 20\n";
-print +(hex('01234') == 0b1001000110100) ? "ok" : "not ok", " 21\n";
-print +(hex('01234') == 011064) ? "ok" : "not ok", " 22\n";
-print +(hex('01234') == 4660) ? "ok" : "not ok", " 23\n";
-print +(hex('01234') == 0x1234) ? "ok" : "not ok", " 24\n";
+print +(hex('01_234') == 0b_1001000110100) ? "ok" : "not ok", " 21\n";
+print +(hex('012_34') == 011064) ? "ok" : "not ok", " 22\n";
+print +(hex('0123_4') == 4660) ? "ok" : "not ok", " 23\n";
+print +(hex('01234_') == 0x1234) ? "ok" : "not ok", " 24\n";
-print +(hex('0x1234') == 0b1001000110100) ? "ok" : "not ok", " 25\n";
-print +(hex('0x1234') == 011064) ? "ok" : "not ok", " 26\n";
-print +(hex('0x1234') == 4660) ? "ok" : "not ok", " 27\n";
-print +(hex('0x1234') == 0x1234) ? "ok" : "not ok", " 28\n";
+print +(hex('0x_1234') == 0b1001000110100) ? "ok" : "not ok", " 25\n";
+print +(hex('0x1_234') == 011064) ? "ok" : "not ok", " 26\n";
+print +(hex('0x12_34') == 4660) ? "ok" : "not ok", " 27\n";
+print +(hex('0x1234_') == 0x1234) ? "ok" : "not ok", " 28\n";
-print +(hex('x1234') == 0b1001000110100) ? "ok" : "not ok", " 29\n";
-print +(hex('x1234') == 011064) ? "ok" : "not ok", " 30\n";
-print +(hex('x1234') == 4660) ? "ok" : "not ok", " 31\n";
-print +(hex('x1234') == 0x1234) ? "ok" : "not ok", " 32\n";
+print +(hex('x_1234') == 0b1001000110100) ? "ok" : "not ok", " 29\n";
+print +(hex('x12_34') == 011064) ? "ok" : "not ok", " 30\n";
+print +(hex('x123_4') == 4660) ? "ok" : "not ok", " 31\n";
+print +(hex('x1234_') == 0x1234) ? "ok" : "not ok", " 32\n";
-print +(oct('0b11111111111111111111111111111111') == 4294967295) ?
+print +(oct('0b1111_1111_1111_1111_1111_1111_1111_1111') == 4294967295) ?
"ok" : "not ok", " 33\n";
-print +(oct('037777777777') == 4294967295) ?
+print +(oct('037_777_777_777') == 4294967295) ?
"ok" : "not ok", " 34\n";
-print +(oct('0xffffffff') == 4294967295) ?
+print +(oct('0xffff_ffff') == 4294967295) ?
"ok" : "not ok", " 35\n";
-print +(hex('0xffffffff') == 4294967295) ?
+print +(hex('0xff_ff_ff_ff') == 4294967295) ?
"ok" : "not ok", " 36\n";
+
+$_ = "\0_7_7";
+print length eq 5 ? "ok" : "not ok", " 37\n";
+print $_ eq "\0"."_"."7"."_"."7" ? "ok" : "not ok", " 38\n";
+chop, chop, chop, chop;
+print $_ eq "\0" ? "ok" : "not ok", " 39\n";
+print "\077_" eq "?_" ? "ok" : "not ok", " 40\n";
+
+$_ = "\x_7_7";
+print length eq 5 ? "ok" : "not ok", " 41\n";
+print $_ eq "\0"."_"."7"."_"."7" ? "ok" : "not ok", " 42\n";
+chop, chop, chop, chop;
+print $_ eq "\0" ? "ok" : "not ok", " 43\n";
+print "\x2F_" eq "/_" ? "ok" : "not ok", " 44\n";
diff --git a/toke.c b/toke.c
index 860e3c1f8c..10273a0111 100644
--- a/toke.c
+++ b/toke.c
@@ -1389,6 +1389,7 @@ S_scan_const(pTHX_ char *start)
/* \132 indicates an octal constant */
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
+ len = 0; /* disallow underscores */
uv = (UV)scan_oct(s, 3, &len);
s += len;
goto NUM_ESCAPE_INSERT;
@@ -1402,10 +1403,12 @@ S_scan_const(pTHX_ char *start)
yyerror("Missing right brace on \\x{}");
e = s;
}
+ len = 1; /* allow underscores */
uv = (UV)scan_hex(s + 1, e - s - 1, &len);
s = e + 1;
}
else {
+ len = 0; /* disallow underscores */
uv = (UV)scan_hex(s, 2, &len);
s += len;
}
diff --git a/util.c b/util.c
index 059d9a45fc..2dfbfaaaf8 100644
--- a/util.c
+++ b/util.c
@@ -2877,9 +2877,13 @@ Perl_scan_bin(pTHX_ char *start, I32 len, I32 *retlen)
for (; len-- && *s; s++) {
if (!(*s == '0' || *s == '1')) {
- if (*s == '_')
- continue; /* Note: does not check for __ and the like. */
- if (seenb == FALSE && *s == 'b' && ruv == 0) {
+ if (*s == '_' && len && *retlen
+ && (s[1] == '0' || s[1] == '1'))
+ {
+ --len;
+ ++s;
+ }
+ else if (seenb == FALSE && *s == 'b' && ruv == 0) {
/* Disallow 0bbb0b0bbb... */
seenb = TRUE;
continue;
@@ -2902,7 +2906,8 @@ Perl_scan_bin(pTHX_ char *start, I32 len, I32 *retlen)
if (ckWARN_d(WARN_OVERFLOW))
Perl_warner(aTHX_ WARN_OVERFLOW,
"Integer overflow in binary number");
- } else
+ }
+ else
ruv = xuv | (*s - '0');
}
if (overflowed) {
@@ -2942,8 +2947,12 @@ Perl_scan_oct(pTHX_ char *start, I32 len, I32 *retlen)
for (; len-- && *s; s++) {
if (!(*s >= '0' && *s <= '7')) {
- if (*s == '_')
- continue; /* Note: does not check for __ and the like. */
+ if (*s == '_' && len && *retlen
+ && (s[1] >= '0' && s[1] <= '7'))
+ {
+ --len;
+ ++s;
+ }
else {
/* Allow \octal to work the DWIM way (that is, stop scanning
* as soon as non-octal characters are seen, complain only iff
@@ -2967,7 +2976,8 @@ Perl_scan_oct(pTHX_ char *start, I32 len, I32 *retlen)
if (ckWARN_d(WARN_OVERFLOW))
Perl_warner(aTHX_ WARN_OVERFLOW,
"Integer overflow in octal number");
- } else
+ }
+ else
ruv = xuv | (*s - '0');
}
if (overflowed) {
@@ -3010,9 +3020,13 @@ Perl_scan_hex(pTHX_ char *start, I32 len, I32 *retlen)
for (; len-- && *s; s++) {
hexdigit = strchr((char *) PL_hexdigit, *s);
if (!hexdigit) {
- if (*s == '_')
- continue; /* Note: does not check for __ and the like. */
- if (seenx == FALSE && *s == 'x' && ruv == 0) {
+ if (*s == '_' && len && *retlen && s[1]
+ && (hexdigit = strchr((char *) PL_hexdigit, s[1])))
+ {
+ --len;
+ ++s;
+ }
+ else if (seenx == FALSE && *s == 'x' && ruv == 0) {
/* Disallow 0xxx0x0xxx... */
seenx = TRUE;
continue;
@@ -3035,7 +3049,8 @@ Perl_scan_hex(pTHX_ char *start, I32 len, I32 *retlen)
if (ckWARN_d(WARN_OVERFLOW))
Perl_warner(aTHX_ WARN_OVERFLOW,
"Integer overflow in hexadecimal number");
- } else
+ }
+ else
ruv = xuv | ((hexdigit - PL_hexdigit) & 15);
}
if (overflowed) {