From 6bbba9040c7840209170b2ff9a1d7b03ae1cbdc1 Mon Sep 17 00:00:00 2001
From: Karl Williamson <public@khwilliamson.com>
Date: Mon, 6 Dec 2010 12:16:24 -0700
Subject: regexec.c: Fix locale and \s

The handling for locale \s and \S both assume that the character in
ASCII platforms at 0x20 is a space.  This is not necessarily so.

I'm guessing that the code was originally just copied and pasted from
the non-locale space handling code without thinking.  That code hard-coded
in the space character, probably to avoid an expensive swash fetch for a
common situation.
---
 regexec.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/regexec.c b/regexec.c
index b04beadb66..c1f1ae26ea 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1645,7 +1645,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
 	    );
 	case SPACEL:
 	    REXEC_FBC_CSCAN_TAINT(
-		*s == ' ' || isSPACE_LC_utf8((U8*)s),
+		isSPACE_LC_utf8((U8*)s),
 		isSPACE_LC(*s)
 	    );
 	case NSPACE:
@@ -1656,7 +1656,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
 	    );
 	case NSPACEL:
 	    REXEC_FBC_CSCAN_TAINT(
-		!(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
+		!isSPACE_LC_utf8((U8*)s),
 		!isSPACE_LC(*s)
 	    );
 	case DIGIT:
@@ -6036,7 +6036,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
 	if (utf8_target) {
 	    loceol = PL_regeol;
 	    while (hardcount < max && scan < loceol &&
-		   (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
+		   isSPACE_LC_utf8((U8*)scan)) {
 		scan += UTF8SKIP(scan);
 		hardcount++;
 	    }
@@ -6071,7 +6071,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
 	if (utf8_target) {
 	    loceol = PL_regeol;
 	    while (hardcount < max && scan < loceol &&
-		   !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
+		   !isSPACE_LC_utf8((U8*)scan)) {
 		scan += UTF8SKIP(scan);
 		hardcount++;
 	    }
-- 
cgit v1.2.1