summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-12-31 11:54:44 -0700
committerKarl Williamson <public@khwilliamson.com>2013-01-11 11:50:35 -0700
commit6b976e32dcccdff3dde7682ad9314dc5cd918e17 (patch)
tree4c44eacac564ebc264ea2e25bdec5a0c29d8e9d6 /regcomp.c
parent3c6cc85ea284b6b479dc5c19e0ca344a5e1103dc (diff)
downloadperl-6b976e32dcccdff3dde7682ad9314dc5cd918e17.tar.gz
regcomp.c: Use a parameter to simplify some code
When parsing \p{} outside of a bracketed character class, code in regcomp.c has pretended it is a bracketed character class by changing and restoring the parsing pointers, and then calling the charclass handler. This code can be simplified by instead passing a flag to the handler meaning to just parse one item. The faking is simpler there, with no restoring necessary. Also we can eliminate the duplicate handling of special cases. Future commits will make more extensive use of this mechanism.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c35
1 files changed, 13 insertions, 22 deletions
diff --git a/regcomp.c b/regcomp.c
index 496fb8f04f..7382753384 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -10099,7 +10099,8 @@ tryagain:
case '[':
{
char * const oregcomp_parse = ++RExC_parse;
- ret = regclass(pRExC_state, flagp,depth+1);
+ ret = regclass(pRExC_state, flagp,depth+1,
+ FALSE /* means parse the whole char class */ );
if (*RExC_parse != ']') {
RExC_parse = oregcomp_parse;
vFAIL("Unmatched [");
@@ -10287,32 +10288,15 @@ tryagain:
case 'p':
case 'P':
{
- char* const oldregxend = RExC_end;
#ifdef DEBUGGING
char* parse_start = RExC_parse - 2;
#endif
- if (RExC_parse[1] == '{') {
- /* a lovely hack--pretend we saw [\pX] instead */
- RExC_end = strchr(RExC_parse, '}');
- if (!RExC_end) {
- const U8 c = (U8)*RExC_parse;
- RExC_parse += 2;
- RExC_end = oldregxend;
- vFAIL2("Missing right brace on \\%c{}", c);
- }
- RExC_end++;
- }
- else {
- RExC_end = RExC_parse + 2;
- if (RExC_end > oldregxend)
- RExC_end = oldregxend;
- }
RExC_parse--;
- ret = regclass(pRExC_state, flagp,depth+1);
+ ret = regclass(pRExC_state, flagp,depth+1,
+ TRUE /* means just parse this element */ );
- RExC_end = oldregxend;
RExC_parse--;
Set_Node_Offset(ret, parse_start + 2);
@@ -11239,7 +11223,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value, SV *free_me)
#define HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION (SvCUR(listsv) != initial_listsv_len)
STATIC regnode *
-S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
+S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1)
{
/* parse a bracketed class specification. Most of these will produce an ANYOF node;
* but something like [a] will produce an EXACT node; [aA], an EXACTFish
@@ -11283,6 +11267,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
AV * multi_char_matches = NULL; /* Code points that fold to more than one
character; used under /i */
UV n;
+ char * stop_ptr = RExC_end; /* where to stop parsing */
/* Unicode properties are stored in a swash; this holds the current one
* being parsed. If this swash is the only above-latin1 component of the
@@ -11375,12 +11360,18 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
}
}
+ /* If the caller wants us to just parse a single element, accomplish this
+ * by faking the loop ending condition */
+ if (stop_at_1 && RExC_end > RExC_parse) {
+ stop_ptr = RExC_parse + 1;
+ }
+
/* allow 1st char to be ] (allowing it to be - is dealt with later) */
if (UCHARAT(RExC_parse) == ']')
goto charclassloop;
parseit:
- while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != ']') {
+ while (RExC_parse < stop_ptr && UCHARAT(RExC_parse) != ']') {
charclassloop: