diff options
-rw-r--r-- | regcomp.c | 3 | ||||
-rw-r--r-- | regexec.c | 35 |
2 files changed, 18 insertions, 20 deletions
@@ -1764,7 +1764,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) r->reganch = pm->op_pmflags & PMf_COMPILETIME; /* Again? */ pm->op_pmflags = RExC_flags16; if (UTF) - r->reganch |= ROPT_UTF8; + r->reganch |= ROPT_UTF8; /* Unicode in it? */ r->regstclass = NULL; if (RExC_naughty >= 10) /* Probably an expensive pattern. */ r->reganch |= ROPT_NAUGHTY; @@ -3168,6 +3168,7 @@ tryagain: RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen); } else RExC_size += STR_SZ(newlen) - STR_SZ(oldlen); + RExC_utf8 = 1; } return(ret); @@ -2204,43 +2204,40 @@ S_regmatch(pTHX_ regnode *prog) s = STRING(scan); ln = STR_LEN(scan); if (do_utf8 != (UTF!=0)) { + /* The target and the pattern have differing "utf8ness". */ char *l = locinput; char *e = s + ln; STRLEN len; - if (do_utf8) + if (do_utf8) { + /* The target is utf8, the pattern is not utf8. */ while (s < e) { - UV uv; - if (l >= PL_regeol) - sayNO; - uv = NATIVE_TO_UNI(*(U8*)s); - if (UTF8_IS_START(uv)) { - len = UTF8SKIP(s); - if (memNE(s, l, len)) - sayNO; - l += len; - s += len; - } else { - if (uv != utf8_to_uvchr((U8*)l, &len)) - sayNO; - l += len; - s ++; - } + sayNO; + if (NATIVE_TO_UNI(*(U8*)s) != + utf8_to_uvchr((U8*)l, &len)) + sayNO; + l += len; + s ++; } - else + } + else { + /* The target is not utf8, the pattern is utf8. */ while (s < e) { if (l >= PL_regeol) sayNO; - if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len)) + if (NATIVE_TO_UNI(*((U8*)l)) != + utf8_to_uvchr((U8*)s, &len)) sayNO; s += len; l ++; } + } locinput = l; nextchr = UCHARAT(locinput); break; } + /* The target and the pattern have the same "utf8ness". */ /* Inline the first character, for speed. */ if (UCHARAT(s) != nextchr) sayNO; |