summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-11-06 03:05:34 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-11-06 03:05:34 +0000
commita72c75842468bcd2a7cf17032844c4040a5a31e2 (patch)
treef1d67259d9b154926eb495b329d3239f96b9be7c /regexec.c
parent545666dba9cc33d16d0b8341e36facdb43c44913 (diff)
downloadperl-a72c75842468bcd2a7cf17032844c4040a5a31e2.tar.gz
Implement the encoding pragma for regex literals.
p4raw-id: //depot/perl@12864
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c44
1 files changed, 24 insertions, 20 deletions
diff --git a/regexec.c b/regexec.c
index 198e99ead2..60d93f7ad7 100644
--- a/regexec.c
+++ b/regexec.c
@@ -408,7 +408,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
PL_reg_flags |= RF_utf8;
if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
- DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
+ DEBUG_r(PerlIO_printf(Perl_debug_log,
+ "String too short... [re_intuit_start]\n"));
goto fail;
}
strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
@@ -1474,19 +1475,10 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
}
minlen = prog->minlen;
- if (do_utf8 && !(prog->reganch & ROPT_CANY_SEEN)) {
- if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) {
- DEBUG_r(PerlIO_printf(Perl_debug_log,
- "Too short (in characters)...\n"));
- goto phooey;
- }
- }
- else {
- if (strend - startpos < minlen) {
- DEBUG_r(PerlIO_printf(Perl_debug_log,
- "Too short (in bytes)...\n"));
- goto phooey;
- }
+ if (strend - startpos < minlen) {
+ DEBUG_r(PerlIO_printf(Perl_debug_log,
+ "String too short [regexec_flags]...\n"));
+ goto phooey;
}
/* Check validity of program. */
@@ -2215,14 +2207,26 @@ S_regmatch(pTHX_ regnode *prog)
char *l = locinput;
char *e = s + ln;
STRLEN len;
+
if (do_utf8)
while (s < e) {
+ UV uv;
+
if (l >= PL_regeol)
sayNO;
- if (*((U8*)s) != utf8_to_uvchr((U8*)l, &len))
- sayNO;
- s += len;
- l += len;
+ uv = NATIVE_TO_UNI(*(U8*)s);
+ if (UTF8_IS_START(uv)) {
+ len = UTF8SKIP(s);
+ if (memNE(s, l, len))
+ sayNO;
+ l += len;
+ s += len;
+ } else {
+ if (uv != utf8_to_uvchr((U8*)l, &len))
+ sayNO;
+ l += len;
+ s ++;
+ }
}
else
while (s < e) {
@@ -2230,8 +2234,8 @@ S_regmatch(pTHX_ regnode *prog)
sayNO;
if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
sayNO;
- s+=len;
- l++;
+ s += len;
+ l ++;
}
locinput = l;
nextchr = UCHARAT(locinput);