From e2e799e5172c5070fc2dea2f53b2d660fbd52204 Mon Sep 17 00:00:00 2001 From: Jarkko Hietaniemi Date: Sat, 5 Jan 2002 17:21:12 +0000 Subject: Ooops. Retract the regex parts of #14090. p4raw-id: //depot/perl@14091 --- embed.fnc | 3 +-- embed.h | 4 +--- proto.h | 3 +-- regcomp.c | 37 +++++++--------------------- regcomp.h | 4 +--- regexec.c | 82 ++++++++++++++++----------------------------------------------- 6 files changed, 34 insertions(+), 99 deletions(-) diff --git a/embed.fnc b/embed.fnc index e534f52f73..da7e2cee75 100644 --- a/embed.fnc +++ b/embed.fnc @@ -584,7 +584,7 @@ Ap |void |push_scope p |OP* |ref |OP* o|I32 type p |OP* |refkids |OP* o|I32 type Ap |void |regdump |regexp* r -Ap |SV* |regclass_swash |struct regnode *n|bool doinit|SV **listsvp|SV **altsvp +Ap |SV* |regclass_swash |struct regnode *n|bool doinit|SV **initsvp Ap |I32 |pregexec |regexp* prog|char* stringarg \ |char* strend|char* strbeg|I32 minend \ |SV* screamer|U32 nosave @@ -1134,7 +1134,6 @@ s |I32 |regrepeat |regnode *p|I32 max s |I32 |regrepeat_hard |regnode *p|I32 max|I32 *lp s |I32 |regtry |regexp *prog|char *startpos s |bool |reginclass |regnode *n|U8 *p|bool do_utf8sv_is_utf8 -s |bool |reginclasslen |regnode *n|U8 *p|STRLEN *lenp|bool do_utf8sv_is_utf8 s |CHECKPOINT|regcppush |I32 parenfloor s |char*|regcppop s |char*|regcp_set_to |I32 ss diff --git a/embed.h b/embed.h index 6203634e92..8a5cc4e3c5 100644 --- a/embed.h +++ b/embed.h @@ -1049,7 +1049,6 @@ #define regrepeat_hard S_regrepeat_hard #define regtry S_regtry #define reginclass S_reginclass -#define reginclasslen S_reginclasslen #define regcppush S_regcppush #define regcppop S_regcppop #define regcp_set_to S_regcp_set_to @@ -2105,7 +2104,7 @@ #define ref(a,b) Perl_ref(aTHX_ a,b) #define refkids(a,b) Perl_refkids(aTHX_ a,b) #define regdump(a) Perl_regdump(aTHX_ a) -#define regclass_swash(a,b,c,d) Perl_regclass_swash(aTHX_ a,b,c,d) +#define regclass_swash(a,b,c) Perl_regclass_swash(aTHX_ a,b,c) #define pregexec(a,b,c,d,e,f,g) Perl_pregexec(aTHX_ a,b,c,d,e,f,g) #define pregfree(a) Perl_pregfree(aTHX_ a) #define pregcomp(a,b,c) Perl_pregcomp(aTHX_ a,b,c) @@ -2589,7 +2588,6 @@ #define regrepeat_hard(a,b,c) S_regrepeat_hard(aTHX_ a,b,c) #define regtry(a,b) S_regtry(aTHX_ a,b) #define reginclass(a,b,c) S_reginclass(aTHX_ a,b,c) -#define reginclasslen(a,b,c,d) S_reginclasslen(aTHX_ a,b,c,d) #define regcppush(a) S_regcppush(aTHX_ a) #define regcppop() S_regcppop(aTHX) #define regcp_set_to(a) S_regcp_set_to(aTHX_ a) diff --git a/proto.h b/proto.h index ea837ec209..52d634ee01 100644 --- a/proto.h +++ b/proto.h @@ -619,7 +619,7 @@ PERL_CALLCONV void Perl_push_scope(pTHX); PERL_CALLCONV OP* Perl_ref(pTHX_ OP* o, I32 type); PERL_CALLCONV OP* Perl_refkids(pTHX_ OP* o, I32 type); PERL_CALLCONV void Perl_regdump(pTHX_ regexp* r); -PERL_CALLCONV SV* Perl_regclass_swash(pTHX_ struct regnode *n, bool doinit, SV **listsvp, SV **altsvp); +PERL_CALLCONV SV* Perl_regclass_swash(pTHX_ struct regnode *n, bool doinit, SV **initsvp); PERL_CALLCONV I32 Perl_pregexec(pTHX_ regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, U32 nosave); PERL_CALLCONV void Perl_pregfree(pTHX_ struct regexp* r); PERL_CALLCONV regexp* Perl_pregcomp(pTHX_ char* exp, char* xend, PMOP* pm); @@ -1164,7 +1164,6 @@ STATIC I32 S_regrepeat(pTHX_ regnode *p, I32 max); STATIC I32 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp); STATIC I32 S_regtry(pTHX_ regexp *prog, char *startpos); STATIC bool S_reginclass(pTHX_ regnode *n, U8 *p, bool do_utf8sv_is_utf8); -STATIC bool S_reginclasslen(pTHX_ regnode *n, U8 *p, STRLEN *lenp, bool do_utf8sv_is_utf8); STATIC CHECKPOINT S_regcppush(pTHX_ I32 parenfloor); STATIC char* S_regcppop(pTHX); STATIC char* S_regcp_set_to(pTHX_ I32 ss); diff --git a/regcomp.c b/regcomp.c index e81bc0aa69..aacae22edb 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3427,8 +3427,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) SV *listsv = Nullsv; register char *e; UV n; - bool optimize_invert = TRUE; - AV* unicode_alternate = 0; + bool optimize_invert = TRUE; ret = reganode(pRExC_state, ANYOF, 0); @@ -4029,35 +4028,18 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) /* If folding and foldable and a single * character, insert also the folded version * to the charclass. */ - if (f != value) { - if (foldlen == UNISKIP(f)) - Perl_sv_catpvf(aTHX_ listsv, - "%04"UVxf"\n", f); - else { - /* Any multicharacter foldings - * require the following transform: - * [ABCDEF] -> (?:[ABCabcDEFd]|pq|rst) - * where E folds into "pq" and F folds - * into "rst", all other characters - * fold to single characters. */ - SV *sv; - - if (!unicode_alternate) - unicode_alternate = newAV(); - sv = newSVpvn((char*)foldbuf, foldlen); - SvUTF8_on(sv); - av_push(unicode_alternate, sv); - } - } + if (f != value && foldlen == UNISKIP(f)) + Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", f); /* If folding and the value is one of the Greek * sigmas insert a few more sigmas to make the * folding rules of the sigmas to work right. * Note that not all the possible combinations * are handled here: some of them are handled - * by the standard folding rules, and some of - * them (literal or EXACTF cases) are handled - * during runtime in regexec.c:S_find_byclass(). */ + * handled by the standard folding rules, and + * some of them (literal or EXACTF cases) are + * handled during runtime in + * regexec.c:S_find_byclass(). */ if (value == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA) { Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA); @@ -4114,7 +4096,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) av_store(av, 0, listsv); av_store(av, 1, NULL); - av_store(av, 2, (SV*)unicode_alternate); rv = newRV_noinc((SV*)av); n = add_data(pRExC_state, 1, "s"); RExC_rx->data->data[n] = (void*)rv; @@ -4644,7 +4625,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) { SV *lv; - SV *sw = regclass_swash(o, FALSE, &lv, 0); + SV *sw = regclass_swash(o, FALSE, &lv); if (lv) { if (sw) { @@ -4798,7 +4779,7 @@ Perl_pregfree(pTHX_ struct regexp *r) new_comppad = NULL; break; case 'n': - break; + break; default: Perl_croak(aTHX_ "panic: regfree data code '%c'", r->data->what[n]); } diff --git a/regcomp.h b/regcomp.h index 9053242fb4..16cf957816 100644 --- a/regcomp.h +++ b/regcomp.h @@ -365,9 +365,7 @@ typedef struct re_scream_pos_data_s * n - Root of op tree for (?{EVAL}) item * o - Start op for (?{EVAL}) item * p - Pad for (?{EVAL} item - * s - swash for unicode-style character class, and the multicharacter - * strings resulting from casefolding the single-character entries - * in the character class + * s - swash for unicode-style character class * 20010712 mjd@plover.com * (Remember to update re_dup() and pregfree() if you add any items.) */ diff --git a/regexec.c b/regexec.c index 5f2588887f..fe9ad4baca 100644 --- a/regexec.c +++ b/regexec.c @@ -2369,13 +2369,11 @@ S_regmatch(pTHX_ regnode *prog) break; case ANYOF: if (do_utf8) { - STRLEN inclasslen = PL_regeol - locinput; - - if (!reginclasslen(scan, (U8*)locinput, &inclasslen, do_utf8)) + if (!reginclass(scan, (U8*)locinput, do_utf8)) sayNO; if (locinput >= PL_regeol) sayNO; - locinput += inclasslen; + locinput += PL_utf8skip[nextchr]; nextchr = UCHARAT(locinput); } else { @@ -4109,11 +4107,10 @@ S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp) */ SV * -Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp) +Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp) { - SV *sw = NULL; - SV *si = NULL; - SV *alt = NULL; + SV *sw = NULL; + SV *si = NULL; if (PL_regdata && PL_regdata->count) { U32 n = ARG(node); @@ -4121,11 +4118,10 @@ Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV if (PL_regdata->what[n] == 's') { SV *rv = (SV*)PL_regdata->data[n]; AV *av = (AV*)SvRV((SV*)rv); - SV **a, **b; + SV **a; - si = *av_fetch(av, 0, FALSE); - a = av_fetch(av, 1, FALSE); - b = av_fetch(av, 2, FALSE); + si = *av_fetch(av, 0, FALSE); + a = av_fetch(av, 1, FALSE); if (a) sw = *a; @@ -4133,15 +4129,11 @@ Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV sw = swash_init("utf8", "", si, 1, 0); (void)av_store(av, 1, sw); } - if (b) - alt = *b; } } - if (listsvp) - *listsvp = si; - if (altsvp) - *altsvp = alt; + if (initsvp) + *initsvp = si; return sw; } @@ -4151,20 +4143,16 @@ Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV */ STATIC bool -S_reginclasslen(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8) +S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8) { char flags = ANYOF_FLAGS(n); bool match = FALSE; UV c; STRLEN len = 0; - STRLEN plen; c = do_utf8 ? utf8_to_uvchr(p, &len) : *p; - plen = lenp ? *lenp : UNISKIP(c); if (do_utf8 || (flags & ANYOF_UNICODE)) { - if (lenp) - *lenp = 0; if (do_utf8 && !ANYOF_RUNTIME(n)) { if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c)) match = TRUE; @@ -4172,46 +4160,24 @@ S_reginclasslen(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, registe if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256) match = TRUE; if (!match) { - AV *av; - SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av); + SV *sw = regclass_swash(n, TRUE, 0); if (sw) { if (swash_fetch(sw, p, do_utf8)) match = TRUE; else if (flags & ANYOF_FOLD) { - U8 tmpbuf[UTF8_MAXLEN_FOLD+1]; - STRLEN tmplen; - - if (!match && lenp && av) { - I32 i; - - for (i = 0; i <= av_len(av); i++) { - SV* sv = *av_fetch(av, i, FALSE); - STRLEN len; - char *s = SvPV(sv, len); - - if (len <= plen && memEQ(s, p, len)) { - *lenp = len; - match = TRUE; - break; - } - } - } - if (!match) { - to_utf8_fold(p, tmpbuf, &tmplen); - if (swash_fetch(sw, tmpbuf, do_utf8)) - match = TRUE; - } - if (!match) { - to_utf8_upper(p, tmpbuf, &tmplen); - if (swash_fetch(sw, tmpbuf, do_utf8)) - match = TRUE; - } + U8 foldbuf[UTF8_MAXLEN_FOLD+1]; + STRLEN foldlen; + + to_utf8_fold(p, foldbuf, &foldlen); + if (swash_fetch(sw, foldbuf, do_utf8)) + match = TRUE; + to_utf8_upper(p, foldbuf, &foldlen); + if (swash_fetch(sw, foldbuf, do_utf8)) + match = TRUE; } } } - if (match && lenp && *lenp == 0) - *lenp = UNISKIP(c); } if (!match && c < 256) { if (ANYOF_BITMAP_TEST(n, c)) @@ -4272,12 +4238,6 @@ S_reginclasslen(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, registe return (flags & ANYOF_INVERT) ? !match : match; } -STATIC bool -S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8) -{ - return S_reginclasslen(aTHX_ n, p, 0, do_utf8); -} - STATIC U8 * S_reghop(pTHX_ U8 *s, I32 off) { -- cgit v1.2.1