diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-08-24 14:00:22 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-08-25 23:21:29 -0600 |
commit | 83199d386f82b5fcc56cdeded547bf6bad800018 (patch) | |
tree | 37c15447f6e4fb7e10d086ce4d355f5d6636f4f3 | |
parent | de574e73f549b6438c8dfcf8623486003abaca82 (diff) | |
download | perl-83199d386f82b5fcc56cdeded547bf6bad800018.tar.gz |
utf8.c: Revise internal API of swash_init()
This revises the API for the version of swash_init() that is usable
by core Perl. The external interface is unaffected. There is now a
flags parameter to allow for future growth. And the core internal-only
function that returns if a swash has a user-defined property in it or
not has been removed. This information is now returned via the new
flags parameter upon initialization, and is unavailable afterwards.
This is to prepare for the flexibility to change the swash that is
needed in future commits.
-rw-r--r-- | embed.fnc | 7 | ||||
-rw-r--r-- | embed.h | 1 | ||||
-rw-r--r-- | proto.h | 8 | ||||
-rw-r--r-- | regcomp.c | 8 | ||||
-rw-r--r-- | regexec.c | 9 | ||||
-rw-r--r-- | utf8.c | 59 | ||||
-rw-r--r-- | utf8.h | 3 |
7 files changed, 49 insertions, 46 deletions
@@ -1418,15 +1418,14 @@ EXMpR |SV* |_add_range_to_invlist |NULLOK SV* invlist|const UV start|const UV en EXMp |void |_invlist_populate_swatch |NN SV* const invlist|const UV start|const UV end|NN U8* swatch #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) -EXp |SV* |_core_swash_init|NN const char* pkg|NN const char* name|NN SV* listsv|I32 minbits \ - |I32 none|bool return_if_undef|NULLOK SV* invlist \ - |bool passed_in_invlist_has_user_defined_property +EXp |SV* |_core_swash_init|NN const char* pkg|NN const char* name \ + |NN SV* listsv|I32 minbits|I32 none|bool return_if_undef \ + |NULLOK SV* invlist|NULLOK U8* const flags_p EXMpR |SV* |_invlist_contents|NN SV* const invlist EiMR |UV* |_get_invlist_len_addr |NN SV* invlist EiMR |UV |_invlist_len |NN SV* const invlist EMiR |bool |_invlist_contains_cp|NN SV* const invlist|const UV cp EXpMR |IV |_invlist_search |NN SV* const invlist|const UV cp -EXMpR |bool |_is_swash_user_defined|NN SV* const swash EXMpR |SV* |_get_swash_invlist|NN SV* const swash #endif Ap |void |taint_env @@ -959,7 +959,6 @@ #define _invlist_contents(a) Perl__invlist_contents(aTHX_ a) #define _invlist_len(a) S__invlist_len(aTHX_ a) #define _invlist_search(a,b) Perl__invlist_search(aTHX_ a,b) -#define _is_swash_user_defined(a) Perl__is_swash_user_defined(aTHX_ a) # endif # if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C) #define _add_range_to_invlist(a,b,c) Perl__add_range_to_invlist(aTHX_ a,b,c) @@ -6669,7 +6669,7 @@ STATIC I32 S_study_chunk(pTHX_ struct RExC_state_t *pRExC_state, regnode **scanp #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) -PERL_CALLCONV SV* Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV* listsv, I32 minbits, I32 none, bool return_if_undef, SV* invlist, bool passed_in_invlist_has_user_defined_property) +PERL_CALLCONV SV* Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV* listsv, I32 minbits, I32 none, bool return_if_undef, SV* invlist, U8* const flags_p) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3); @@ -6712,12 +6712,6 @@ PERL_CALLCONV IV Perl__invlist_search(pTHX_ SV* const invlist, const UV cp) #define PERL_ARGS_ASSERT__INVLIST_SEARCH \ assert(invlist) -PERL_CALLCONV bool Perl__is_swash_user_defined(pTHX_ SV* const swash) - __attribute__warn_unused_result__ - __attribute__nonnull__(pTHX_1); -#define PERL_ARGS_ASSERT__IS_SWASH_USER_DEFINED \ - assert(swash) - #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C) PERL_CALLCONV SV* Perl__add_range_to_invlist(pTHX_ SV* invlist, const UV start, const UV end) @@ -11541,6 +11541,8 @@ parseit: case 'P': { char *e; + U8 swash_init_flags = 0; + if (RExC_parse >= RExC_end) vFAIL2("Empty \\%c{}", (U8)value); if (*RExC_parse == '{') { @@ -11597,7 +11599,8 @@ parseit: 0, /* not tr/// */ TRUE, /* this routine will handle undefined properties */ - NULL, FALSE /* No inversion list */ + NULL, /* No inversion list */ + &swash_init_flags ); if ( ! swash || ! SvROK(swash) @@ -11632,7 +11635,8 @@ parseit: * the swash is from a user-defined property, then this * whole character class should be regarded as such */ has_user_defined_property = - _is_swash_user_defined(swash); + (swash_init_flags + & _CORE_SWASH_INIT_USER_DEFINED_PROPERTY); /* Invert if asking for the complement */ if (value == 'P') { @@ -6693,7 +6693,7 @@ S_core_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bo SV * const rv = MUTABLE_SV(data->data[n]); AV * const av = MUTABLE_AV(SvRV(rv)); SV **const ary = AvARRAY(av); - bool invlist_has_user_defined_property; + U8 swash_init_flags = 0; si = *ary; /* ary[0] = the string to initialize the swash with */ @@ -6702,11 +6702,12 @@ S_core_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bo * that inversion list has any user-defined properties in it. */ if (av_len(av) >= 3) { invlist = ary[3]; - invlist_has_user_defined_property = cBOOL(SvUV(ary[4])); + if (SvUV(ary[4])) { + swash_init_flags |= _CORE_SWASH_INIT_USER_DEFINED_PROPERTY; + } } else { invlist = NULL; - invlist_has_user_defined_property = FALSE; } /* Element [1] is reserved for the set-up swash. If already there, @@ -6724,7 +6725,7 @@ S_core_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bo FALSE, /* is error if can't find property */ invlist, - invlist_has_user_defined_property); + &swash_init_flags); (void)av_store(av, 1, sw); } @@ -2912,11 +2912,11 @@ Perl_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, I32 minbits * public interface, and returning a copy prevents others from doing * mischief on the original */ - return newSVsv(_core_swash_init(pkg, name, listsv, minbits, none, FALSE, NULL, FALSE)); + return newSVsv(_core_swash_init(pkg, name, listsv, minbits, none, FALSE, NULL, NULL)); } SV* -Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, I32 minbits, I32 none, bool return_if_undef, SV* invlist, bool passed_in_invlist_has_user_defined_property) +Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, I32 minbits, I32 none, bool return_if_undef, SV* invlist, U8* const flags_p) { /* Initialize and return a swash, creating it if necessary. It does this * by calling utf8_heavy.pl in the general case. @@ -2938,8 +2938,12 @@ Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, I32 m * return_if_undef is TRUE if the routine shouldn't croak if it can't find * the requested property * invlist is an inversion list to initialize the swash with (or NULL) - * has_user_defined_property is TRUE if <invlist> has some component that - * came from a user-defined property + * flags_p if non-NULL is the address of various input and output flag bits + * to the routine, as follows: ('I' means is input to the routine; + * 'O' means output from the routine. Only flags marked O are + * meaningful on return.) + * _CORE_SWASH_INIT_USER_DEFINED_PROPERTY indicates if the swash + * came from a user-defined property. (I O) * * Thus there are three possible inputs to find the swash: <name>, * <listsv>, and <invlist>. At least one must be specified. The result @@ -2950,6 +2954,7 @@ Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, I32 m dVAR; SV* retval = &PL_sv_undef; + HV* swash_hv = NULL; assert(listsv != &PL_sv_undef || strNE(name, "") || invlist); assert(! invlist || minbits == 1); @@ -3031,19 +3036,36 @@ Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, I32 m } } /* End of calling the module to find the swash */ + /* If this operation fetched a swash, and we will need it later, get it */ + if (retval != &PL_sv_undef + && (minbits == 1 || (flags_p + && ! (*flags_p + & _CORE_SWASH_INIT_USER_DEFINED_PROPERTY)))) + { + swash_hv = MUTABLE_HV(SvRV(retval)); + + /* If we don't already know that there is a user-defined component to + * this swash, and the user has indicated they wish to know if there is + * one (by passing <flags_p>), find out */ + if (flags_p && ! (*flags_p & _CORE_SWASH_INIT_USER_DEFINED_PROPERTY)) { + SV** user_defined = hv_fetchs(swash_hv, "USER_DEFINED", FALSE); + if (user_defined && SvUV(*user_defined)) { + *flags_p |= _CORE_SWASH_INIT_USER_DEFINED_PROPERTY; + } + } + } + /* Make sure there is an inversion list for binary properties */ if (minbits == 1) { SV** swash_invlistsvp = NULL; SV* swash_invlist = NULL; bool invlist_in_swash_is_valid = FALSE; - HV* swash_hv = NULL; /* If this operation fetched a swash, get its already existing - * inversion list or create one for it */ - if (retval != &PL_sv_undef) { - swash_hv = MUTABLE_HV(SvRV(retval)); + * inversion list, or create one for it */ - swash_invlistsvp = hv_fetchs(swash_hv, "INVLIST", FALSE); + if (swash_hv) { + swash_invlistsvp = hv_fetchs(swash_hv, "I", FALSE); if (swash_invlistsvp) { swash_invlist = *swash_invlistsvp; invlist_in_swash_is_valid = TRUE; @@ -3073,12 +3095,6 @@ Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, I32 m retval = newRV_inc(MUTABLE_SV(swash_hv)); swash_invlist = invlist; } - - if (passed_in_invlist_has_user_defined_property) { - if (! hv_stores(swash_hv, "USER_DEFINED", newSVuv(1))) { - Perl_croak(aTHX_ "panic: hv_store() unexpectedly failed"); - } - } } /* Here, we have computed the union of all the passed-in data. It may @@ -4121,19 +4137,6 @@ Perl__swash_to_invlist(pTHX_ SV* const swash) return invlist; } -bool -Perl__is_swash_user_defined(pTHX_ SV* const swash) -{ - SV** ptr = hv_fetchs(MUTABLE_HV(SvRV(swash)), "USER_DEFINED", FALSE); - - PERL_ARGS_ASSERT__IS_SWASH_USER_DEFINED; - - if (! ptr) { - return FALSE; - } - return cBOOL(SvUV(*ptr)); -} - SV* Perl__get_swash_invlist(pTHX_ SV* const swash) { @@ -22,6 +22,9 @@ #define FOLD_FLAGS_FULL 0x2 #define FOLD_FLAGS_NOMIX_ASCII 0x4 +/* For _core_swash_init(), internal core use only */ +#define _CORE_SWASH_INIT_USER_DEFINED_PROPERTY 0x1 + #define to_uni_fold(c, p, lenp) _to_uni_fold_flags(c, p, lenp, FOLD_FLAGS_FULL) #define to_utf8_fold(c, p, lenp) _to_utf8_fold_flags(c, p, lenp, \ FOLD_FLAGS_FULL, NULL) |