From 6bbba9040c7840209170b2ff9a1d7b03ae1cbdc1 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 6 Dec 2010 12:16:24 -0700 Subject: regexec.c: Fix locale and \s The handling for locale \s and \S both assume that the character in ASCII platforms at 0x20 is a space. This is not necessarily so. I'm guessing that the code was originally just copied and pasted from the non-locale space handling code without thinking. That code hard-coded in the space character, probably to avoid an expensive swash fetch for a common situation. --- regexec.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/regexec.c b/regexec.c index b04beadb66..c1f1ae26ea 100644 --- a/regexec.c +++ b/regexec.c @@ -1645,7 +1645,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, ); case SPACEL: REXEC_FBC_CSCAN_TAINT( - *s == ' ' || isSPACE_LC_utf8((U8*)s), + isSPACE_LC_utf8((U8*)s), isSPACE_LC(*s) ); case NSPACE: @@ -1656,7 +1656,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, ); case NSPACEL: REXEC_FBC_CSCAN_TAINT( - !(*s == ' ' || isSPACE_LC_utf8((U8*)s)), + !isSPACE_LC_utf8((U8*)s), !isSPACE_LC(*s) ); case DIGIT: @@ -6036,7 +6036,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth) if (utf8_target) { loceol = PL_regeol; while (hardcount < max && scan < loceol && - (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) { + isSPACE_LC_utf8((U8*)scan)) { scan += UTF8SKIP(scan); hardcount++; } @@ -6071,7 +6071,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth) if (utf8_target) { loceol = PL_regeol; while (hardcount < max && scan < loceol && - !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) { + !isSPACE_LC_utf8((U8*)scan)) { scan += UTF8SKIP(scan); hardcount++; } -- cgit v1.2.1