diff options
author | Karl Williamson <khw@cpan.org> | 2015-01-23 11:20:30 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2015-01-23 21:54:42 -0700 |
commit | f59fa626ecf8377ec531b277e5bd1c0f5958916d (patch) | |
tree | 2271a851475abbabdfd182176f4737ed038d6940 /regcomp.c | |
parent | c52b8b12af74c72a617744f9e9dfef7ea49a16c7 (diff) | |
download | perl-f59fa626ecf8377ec531b277e5bd1c0f5958916d.tar.gz |
regcomp.c: Minor optimizations
\d, [:digit:], and [:xdigit:] don't match anything in the upper Latin1
range. Therefore whether or not the target string is UTF-8 or not
doesn't change what they match, hence the /d modifier acts exactly like
the /u modifier for them. At run-time /u executes fewer branches
because it doesn't have to test if the target string is in UTF-8 or not,
so treating these as if /u had instead been specified saves some
runtime.
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 25 |
1 files changed, 24 insertions, 1 deletions
@@ -11791,7 +11791,15 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) /* FALLTHROUGH */ case 'd': arg = ANYOF_DIGIT; - goto join_posix; + if (! DEPENDS_SEMANTICS) { + goto join_posix; + } + + /* \d doesn't have any matches in the upper Latin1 range, hence /d + * is equivalent to /u. Changing to /u saves some branches at + * runtime */ + op = POSIXU; + goto join_posix_op_known; case 'R': ret = reg_node(pRExC_state, LNBREAK); @@ -14896,6 +14904,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, #endif goto join_posix; + /* The following don't have any matches in the upper Latin1 + * range, hence /d is equivalent to /u for them. Making it /u + * saves some branches at runtime */ + case ANYOF_DIGIT: + case ANYOF_NDIGIT: + case ANYOF_XDIGIT: + case ANYOF_NXDIGIT: + if (! DEPENDS_SEMANTICS) { + goto treat_as_default; + } + + op = POSIXU; + goto join_posix; + case ANYOF_NCASED: case ANYOF_LOWER: case ANYOF_NLOWER: @@ -14916,6 +14938,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, * We take advantage of the enum ordering of the charset * modifiers to get the exact node type, */ default: + treat_as_default: op = POSIXD + get_regex_charset(RExC_flags); if (op > POSIXA) { /* /aa is same as /a */ op = POSIXA; |