summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2007-10-06 10:23:26 +0000
committerNicholas Clark <nick@ccl4.org>2007-10-06 10:23:26 +0000
commit7fddd94457983d86b562b409f0a846c0a764f8d7 (patch)
tree58edf1082167bfbff71447af78ea3b954d188681 /regcomp.c
parenta5849ce59200ae4eedc45d2d16a7d1a3b6fc0ee2 (diff)
downloadperl-7fddd94457983d86b562b409f0a846c0a764f8d7.tar.gz
Revert one hunk of change 32034 that had the possibility of being buggy
(the sprintf "%c" code will work correctly when the SV is UTF-8). Audit all the rest for UTF-8 correctness, and force SvUTF-8_off() in utf8.c to ensure correctness. (The string is reset to "", so this will not be a behaviour change.) p4raw-id: //depot/perl@32040
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c12
1 files changed, 11 insertions, 1 deletions
diff --git a/regcomp.c b/regcomp.c
index b3025611eb..f649188429 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -9674,7 +9674,17 @@ clear_re(pTHX_ void *r)
STATIC void
S_put_byte(pTHX_ SV *sv, int c)
{
- if (isCNTRL(c) || c == 255 || !isPRINT(c))
+ /* Our definition of isPRINT() ignores locales, so only bytes that are
+ not part of UTF-8 are considered printable. I assume that the same
+ holds for UTF-EBCDIC.
+ Also, code point 255 is not printable in either (it's E0 in EBCDIC,
+ which Wikipedia says:
+
+ EO, or Eight Ones, is an 8-bit EBCDIC character code represented as all
+ ones (binary 1111 1111, hexadecimal FF). It is similar, but not
+ identical, to the ASCII delete (DEL) or rubout control character.
+ ) So the old condition can be simplified to !isPRINT(c) */
+ if (!isPRINT(c))
Perl_sv_catpvf(aTHX_ sv, "\\%o", c);
else {
const unsigned char string = (unsigned char) c;