[Merge] More UTF8 patches

These are more of Brian Fraser’s UTF8 patches from perl ticket #107008. There will probably be just one more before 5.16 (the label patch).
author: Father Chrysostomos <sprout@cpan.org> 2012-03-25 13:46:16 -0700
committer: Father Chrysostomos <sprout@cpan.org> 2012-03-25 13:46:16 -0700
commit: 70558906b0dcb94c924d98d19c32c8f22f495cde (patch)
tree: a5a5613f812e7d70575733c064ec7f989446ea15
parent: 78c28381895e365e220a83fe0515986e1d6c6ea1 (diff)
parent: 97eb901d2f2a70940d8b0cfa133eb3cdfee12f30 (diff)
download: perl-70558906b0dcb94c924d98d19c32c8f22f495cde.tar.gz
4 files changed, 190 insertions, 20 deletions
diff --git a/t/lib/warnings/op b/t/lib/warnings/op
index f2270dc01a..de74d2e360 100644
--- a/t/lib/warnings/op
+++ b/t/lib/warnings/op
@@ -857,7 +857,7 @@ use open qw( :utf8 :std );
 use warnings;
 eval "sub fòò (\$\0) {}";
 EXPECT
-Illegal character in prototype for main::fòò : $\x{0} at (eval 1) line 1.
+Illegal character in prototype for main::fòò : $\0 at (eval 1) line 1.
 ########
 # op.c
 use utf8;
@@ -865,7 +865,7 @@ use open qw( :utf8 :std );
 use warnings;
 eval "sub foo (\0) {}";
 EXPECT
-Illegal character in prototype for main::foo : \x{0} at (eval 1) line 1.
+Illegal character in prototype for main::foo : \0 at (eval 1) line 1.
 ########
 # op.c
 use utf8;
@@ -882,7 +882,21 @@ use open qw( :utf8 :std );
 use warnings;
 BEGIN { eval "sub foo (\0) {}"; }
 EXPECT
-Illegal character in prototype for main::foo : \x{0} at (eval 1) line 1.
+Illegal character in prototype for main::foo : \0 at (eval 1) line 1.
+########
+# op.c
+use warnings;
+eval "sub foo (\xAB) {}";
+EXPECT
+Illegal character in prototype for main::foo : \x{ab} at (eval 1) line 1.
+########
+# op.c
+use utf8;
+use open qw( :utf8 :std );
+use warnings;
+BEGIN { eval "sub foo (\x{30cb}) {}"; }
+EXPECT
+Illegal character in prototype for main::foo : \x{30cb} at (eval 1) line 1.
 ########
 # op.c
 use utf8;
diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke
index a6841d2d09..dd8dc3d517 100644
--- a/t/lib/warnings/toke
+++ b/t/lib/warnings/toke
@@ -616,6 +616,30 @@ EXPECT
 Bareword "FRED::" refers to nonexistent package at bar line 25.
 ########
 # toke.c
+use utf8;
+use open qw( :utf8 :std );
+use warnings 'bareword' ;
+#line 25 "bar"
+$a = FRÈD:: ;
+no warnings 'bareword' ;
+#line 25 "bar"
+$a = FRÈD:: ;
+EXPECT
+Bareword "FRÈD::" refers to nonexistent package at bar line 25.
+########
+# toke.c
+use utf8;
+use open qw( :utf8 :std );
+use warnings 'bareword' ;
+#line 25 "bar"
+$a = ϞϞϞ:: ;
+no warnings 'bareword' ;
+#line 25 "bar"
+$a = ϞϞϞ:: ;
+EXPECT
+Bareword "ϞϞϞ::" refers to nonexistent package at bar line 25.
+########
+# toke.c
 use warnings 'ambiguous' ;
 sub time {}
 my $a = time() ;
@@ -692,11 +716,77 @@ Ambiguous use of -fred resolved as -&fred() at - line 9.
 Ambiguous use of -fred resolved as -&fred() at - line 11.
 ########
 # toke.c
+use utf8;
+use open qw( :utf8 :std );
+sub frèd {};
+-frèd ;
+EXPECT
+Ambiguous use of -frèd resolved as -&frèd() at - line 5.
+########
+# toke.c
+$^W = 0 ;
+use utf8;
+use open qw( :utf8 :std );
+sub frèd {} ;
+-frèd ;
+{
+    no warnings 'ambiguous' ;
+    -frèd ;
+    use warnings 'ambiguous' ;
+    -frèd ;
+}
+-frèd ;
+EXPECT
+Ambiguous use of -frèd resolved as -&frèd() at - line 6.
+Ambiguous use of -frèd resolved as -&frèd() at - line 11.
+Ambiguous use of -frèd resolved as -&frèd() at - line 13.
+########
+# toke.c
+use utf8;
+use open qw( :utf8 :std );
+sub ᒍᒘᒊ {};
+-ᒍᒘᒊ ;
+EXPECT
+Ambiguous use of -ᒍᒘᒊ resolved as -&ᒍᒘᒊ() at - line 5.
+########
+# toke.c
+$^W = 0 ;
+use utf8;
+use open qw( :utf8 :std );
+sub ᒍᒘᒊ {} ;
+-ᒍᒘᒊ ;
+{
+    no warnings 'ambiguous' ;
+    -ᒍᒘᒊ ;
+    use warnings 'ambiguous' ;
+    -ᒍᒘᒊ ;
+}
+-ᒍᒘᒊ ;
+EXPECT
+Ambiguous use of -ᒍᒘᒊ resolved as -&ᒍᒘᒊ() at - line 6.
+Ambiguous use of -ᒍᒘᒊ resolved as -&ᒍᒘᒊ() at - line 11.
+Ambiguous use of -ᒍᒘᒊ resolved as -&ᒍᒘᒊ() at - line 13.
+########
+# toke.c
 open FOO || time;
 open local *FOO; # should be ok
 EXPECT
 Precedence problem: open FOO should be open(FOO) at - line 2.
 ########
+# toke.c
+use utf8;
+use open qw( :utf8 :std );
+open FÒÒ || time;
+EXPECT
+Precedence problem: open FÒÒ should be open(FÒÒ) at - line 4.
+########
+# toke.c
+use utf8;
+use open qw( :utf8 :std );
+open ᒍOO || time;
+EXPECT
+Precedence problem: open ᒍOO should be open(ᒍOO) at - line 4.
+########
 # toke.c (and [perl #16184])
 open FOO => "<&0"; close FOO;
 EXPECT
@@ -719,6 +809,40 @@ Precedence problem: open FOO should be open(FOO) at - line 10.
 ########
 # toke.c
 $^W = 0 ;
+use utf8;
+use open qw( :utf8 :std );
+open FÒÒ || time;
+{
+    no warnings 'precedence' ;
+    open FÒÒ || time;
+    use warnings 'precedence' ;
+    open FÒÒ || time;
+}
+open FÒÒ || time;
+EXPECT
+Precedence problem: open FÒÒ should be open(FÒÒ) at - line 5.
+Precedence problem: open FÒÒ should be open(FÒÒ) at - line 10.
+Precedence problem: open FÒÒ should be open(FÒÒ) at - line 12.
+########
+# toke.c
+use utf8;
+use open qw( :utf8 :std );
+$^W = 0 ;
+open ᒍÒÒ || time;
+{
+    no warnings 'precedence' ;
+    open ᒍÒÒ || time;
+    use warnings 'precedence' ;
+    open ᒍÒÒ || time;
+}
+open ᒍÒÒ || time;
+EXPECT
+Precedence problem: open ᒍÒÒ should be open(ᒍÒÒ) at - line 5.
+Precedence problem: open ᒍÒÒ should be open(ᒍÒÒ) at - line 10.
+Precedence problem: open ᒍÒÒ should be open(ᒍÒÒ) at - line 12.
+########
+# toke.c
+$^W = 0 ;
 *foo *foo ;
 {
     no warnings 'ambiguous' ;
@@ -1118,3 +1242,11 @@ no warnings 'ambiguous' ;
 $a = ${f렏} ;
 EXPECT
 Ambiguous use of ${f렏} resolved to $f렏 at - line 6.
+########
+# toke.c
+use utf8;
+use open qw( :utf8 :std );
+use warnings;
+CORE::렏;
+EXPECT
+CORE::렏 is not a keyword at - line 5.
diff --git a/t/uni/parser.t b/t/uni/parser.t
index 256864cb80..63c2deba68 100644
--- a/t/uni/parser.t
+++ b/t/uni/parser.t
@@ -7,7 +7,7 @@ BEGIN {
     require './test.pl';
 }
 
-plan (tests => 45);
+plan (tests => 47);
 
 use utf8;
 use open qw( :utf8 :std );
@@ -138,3 +138,9 @@ is ${"main::\345\225\217"}, undef, "..and using the encoded form doesn't";
     eval qq!print \x{30cb}, "comma""!;
     like $@, qr/No comma allowed after filehandle/, "No comma allowed after filehandle triggers correctly for UTF-8 filehandles.";
 }
+
+# tests for "Bad name"
+eval q{ Ｆｏｏ::$bar };
+like( $@, qr/Bad name after Ｆｏｏ::/, 'Bad name after Ｆｏｏ::' );
+eval q{ Ｆｏｏ''bar };
+like( $@, qr/Bad name after Ｆｏｏ'/, 'Bad name after Ｆｏｏ\'' );
diff --git a/toke.c b/toke.c
index e43bc744a8..1d7a44fcf0 100644
--- a/toke.c
+++ b/toke.c
@@ -6659,7 +6659,9 @@ Perl_yylex(pTHX)
 		    s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
 				  TRUE, &morelen);
 		    if (!morelen)
-			Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
+			Perl_croak(aTHX_ "Bad name after %"SVf"%s",
+                                        SVfARG(newSVpvn_flags(PL_tokenbuf, len,
+                                            (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
 				*s == '\'' ? "'" : "::");
 		    len += morelen;
 		    pkgname = 1;
@@ -6685,8 +6687,9 @@ Perl_yylex(pTHX)
 		    if (ckWARN(WARN_BAREWORD)
 			&& ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
 			Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
-		  	    "Bareword \"%s\" refers to nonexistent package",
-			     PL_tokenbuf);
+		  	    "Bareword \"%"SVf"\" refers to nonexistent package",
+			     SVfARG(newSVpvn_flags(PL_tokenbuf, len,
+                                        (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
 		    len -= 2;
 		    PL_tokenbuf[len] = '\0';
 		    gv = NULL;
@@ -6867,10 +6870,12 @@ Perl_yylex(pTHX)
 		/* Not a method, so call it a subroutine (if defined) */
 
 		if (cv) {
-		    if (lastchar == '-')
-			Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-					 "Ambiguous use of -%s resolved as -&%s()",
-					 PL_tokenbuf, PL_tokenbuf);
+		    if (lastchar == '-') {
+                        const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
+ 			Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
+				"Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
+				SVfARG(tmpsv), SVfARG(tmpsv));
+                    }
 		    /* Check for a constant sub */
 		    if ((sv = cv_const_sv(cv))) {
 		  its_constant:
@@ -7196,7 +7201,9 @@ Perl_yylex(pTHX)
 		d = s;
 		s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
 		if (!(tmp = keyword(PL_tokenbuf, len, 1)))
-		    Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
+		    Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
+                                    SVfARG(newSVpvn_flags(PL_tokenbuf, len,
+                                                (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
 		if (tmp < 0)
 		    tmp = -tmp;
 		else if (tmp == KEY_require || tmp == KEY_do
@@ -7715,8 +7722,14 @@ Perl_yylex(pTHX)
 	    s = SKIPSPACE1(s);
 	    if (isIDFIRST_lazy_if(s,UTF)) {
 		const char *t;
-		for (d = s; isALNUM_lazy_if(d,UTF);)
-		    d++;
+		for (d = s; isALNUM_lazy_if(d,UTF);) {
+		    d += UTF ? UTF8SKIP(d) : 1;
+                    if (UTF) {
+                        while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
+                            d += UTF ? UTF8SKIP(d) : 1;
+                        }
+                    }
+                }
 		for (t=d; isSPACE(*t);)
 		    t++;
 		if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
@@ -7725,10 +7738,11 @@ Perl_yylex(pTHX)
 		    && !(t[0] == ':' && t[1] == ':')
 		    && !keyword(s, d-s, 0)
 		) {
-		    int parms_len = (int)(d-s);
+		    SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
+                                                SVs_TEMP | (UTF ? SVf_UTF8 : 0));
 		    Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
-			   "Precedence problem: open %.*s should be open(%.*s)",
-			    parms_len, s, parms_len, s);
+			   "Precedence problem: open %"SVf" should be open(%"SVf")",
+			    SVfARG(tmpsv), SVfARG(tmpsv));
 		}
 	    }
 	    LOP(OP_OPEN,XTERM);
@@ -8217,9 +8231,13 @@ Perl_yylex(pTHX)
 				    "Illegal character %sin prototype for %"SVf" : %s",
 				    seen_underscore ? "after '_' " : "",
 				    SVfARG(PL_subname),
-                                    sv_uni_display(dsv,
-                                         newSVpvn_flags(d, tmp, SVs_TEMP | SvUTF8(PL_lex_stuff)),
-                                         tmp, UNI_DISPLAY_ISPRINT));
+                                    SvUTF8(PL_lex_stuff)
+                                        ? sv_uni_display(dsv,
+                                            newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
+                                            tmp,
+                                            UNI_DISPLAY_ISPRINT)
+                                        : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
+                                            PERL_PV_ESCAPE_NONASCII));
                     }
                     SvCUR_set(PL_lex_stuff, tmp);
 		    have_proto = TRUE;
author	Father Chrysostomos <sprout@cpan.org>	2012-03-25 13:46:16 -0700
committer	Father Chrysostomos <sprout@cpan.org>	2012-03-25 13:46:16 -0700
commit	70558906b0dcb94c924d98d19c32c8f22f495cde (patch)
tree	a5a5613f812e7d70575733c064ec7f989446ea15
parent	78c28381895e365e220a83fe0515986e1d6c6ea1 (diff)
parent	97eb901d2f2a70940d8b0cfa133eb3cdfee12f30 (diff)
download	perl-70558906b0dcb94c924d98d19c32c8f22f495cde.tar.gz