summaryrefslogtreecommitdiff
path: root/mg.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2014-06-03 18:39:40 -0600
committerKarl Williamson <khw@cpan.org>2014-06-05 12:23:02 -0600
commit5320b60d881861d12d3f678c90a6eafe50077814 (patch)
tree452faaa5b3c203286444ab7221dde16d04bb4ab3 /mg.c
parent2c6ee1a7a1ce7cff7755f9aa43a65b8278dd82a1 (diff)
downloadperl-5320b60d881861d12d3f678c90a6eafe50077814.tar.gz
Revert "PATCH: [perl #119499] "$!" with UTF-8 flag"
This reverts commit b17e32ea3ba5ef7362d2a3d1a433661afb897786. With this commit, the stringification of $! will have the UTF-8 flag set when the text is actually non-ASCII UTF-8. The reverted commit itself reverted code that was to fix bugs with this discrepancy of the UTF-8 flag, but which caused backward-compatibility problems with existing code. Several things have happened in the interim which allows us to experimentally resotre the previously reverted changes. One is that this is early in the 5.21 cycle, and we have plenty of time to see what negative consequences this may cause. Two is that the returned text will only be in UTF-8 if the stringification happens within the scope of 'use locale'. This means that the negative effects won't happen for code, like ack, that is otherwise locale unaware. Third, the 'locale' pragma has been enhanced to allow the program to only have locale awareness of LC_MESSAGES. Code that needs to continue the 5.20 and earlier behavior can do the stringification within the scopes of both 'use bytes' and 'use locale ":messages". No other Perl operations will be affected by locale; only $! and $^E stringification. The 'bytes' pragma causes the UTF-8 flag to not be set, just as in previous Perl releases.
Diffstat (limited to 'mg.c')
-rw-r--r--mg.c24
1 files changed, 9 insertions, 15 deletions
diff --git a/mg.c b/mg.c
index 80f5a7b004..e62e9e6c9b 100644
--- a/mg.c
+++ b/mg.c
@@ -753,24 +753,19 @@ S_fixup_errno_string(pTHX_ SV* sv)
if(strEQ(SvPVX(sv), "")) {
sv_catpv(sv, UNKNOWN_ERRNO_MSG);
}
-#if 0
- /* This is disabled to get v5.20 out the door. It means that $! behaves as
- * if in the scope of both 'use locale' and 'use bytes'. This can cause
- * mixed encodings and double utf8 upgrading, See towards the end of the
- * thread for [perl #119499] */
else {
/* In some locales the error string may come back as UTF-8, in which
* case we should turn on that flag. This didn't use to happen, and to
- * avoid any possible backward compatibility issues, we don't turn on
- * the flag unless we have to. So the flag stays off for an entirely
- * ASCII string. We assume that if the string looks like UTF-8, it
- * really is UTF-8: "text in any other encoding that uses bytes with
- * the high bit set is extremely unlikely to pass a UTF-8 validity
- * test" (http://en.wikipedia.org/wiki/Charset_detection). There is a
- * potential that we will get it wrong however, especially on short
- * error message text. (If it turns out to be necessary, we could also
- * keep track if the current LC_MESSAGES locale is UTF-8) */
+ * avoid as many possible backward compatibility issues as possible, we
+ * don't turn on the flag unless we have to. So the flag stays off for
+ * an entirely ASCII string. We assume that if the string looks like
+ * UTF-8, it really is UTF-8: "text in any other encoding that uses
+ * bytes with the high bit set is extremely unlikely to pass a UTF-8
+ * validity test" (http://en.wikipedia.org/wiki/Charset_detection).
+ * There is a potential that we will get it wrong however, especially
+ * on short error message text. (If it turns out to be necessary, we
+ * could also keep track if the current LC_MESSAGES locale is UTF-8) */
if (! IN_BYTES /* respect 'use bytes' */
&& ! is_ascii_string((U8*) SvPVX_const(sv), SvCUR(sv))
&& is_utf8_string((U8*) SvPVX_const(sv), SvCUR(sv)))
@@ -778,7 +773,6 @@ S_fixup_errno_string(pTHX_ SV* sv)
SvUTF8_on(sv);
}
}
-#endif
}
#ifdef VMS