summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2010-12-06 12:16:24 -0700
committerFather Chrysostomos <sprout@cpan.org>2010-12-07 18:52:43 -0800
commit6bbba9040c7840209170b2ff9a1d7b03ae1cbdc1 (patch)
tree787e26318fb1eec1e83be40c6d370239f6b3b2f8
parentb77393f6288f64bf00f41fef15da0fac4085bfd2 (diff)
downloadperl-6bbba9040c7840209170b2ff9a1d7b03ae1cbdc1.tar.gz
regexec.c: Fix locale and \s
The handling for locale \s and \S both assume that the character in ASCII platforms at 0x20 is a space. This is not necessarily so. I'm guessing that the code was originally just copied and pasted from the non-locale space handling code without thinking. That code hard-coded in the space character, probably to avoid an expensive swash fetch for a common situation.
-rw-r--r--regexec.c8
1 files changed, 4 insertions, 4 deletions
diff --git a/regexec.c b/regexec.c
index b04beadb66..c1f1ae26ea 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1645,7 +1645,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
);
case SPACEL:
REXEC_FBC_CSCAN_TAINT(
- *s == ' ' || isSPACE_LC_utf8((U8*)s),
+ isSPACE_LC_utf8((U8*)s),
isSPACE_LC(*s)
);
case NSPACE:
@@ -1656,7 +1656,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
);
case NSPACEL:
REXEC_FBC_CSCAN_TAINT(
- !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
+ !isSPACE_LC_utf8((U8*)s),
!isSPACE_LC(*s)
);
case DIGIT:
@@ -6036,7 +6036,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
if (utf8_target) {
loceol = PL_regeol;
while (hardcount < max && scan < loceol &&
- (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
+ isSPACE_LC_utf8((U8*)scan)) {
scan += UTF8SKIP(scan);
hardcount++;
}
@@ -6071,7 +6071,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
if (utf8_target) {
loceol = PL_regeol;
while (hardcount < max && scan < loceol &&
- !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
+ !isSPACE_LC_utf8((U8*)scan)) {
scan += UTF8SKIP(scan);
hardcount++;
}