summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2015-01-23 11:20:30 -0700
committerKarl Williamson <khw@cpan.org>2015-01-23 21:54:42 -0700
commitf59fa626ecf8377ec531b277e5bd1c0f5958916d (patch)
tree2271a851475abbabdfd182176f4737ed038d6940 /regcomp.c
parentc52b8b12af74c72a617744f9e9dfef7ea49a16c7 (diff)
downloadperl-f59fa626ecf8377ec531b277e5bd1c0f5958916d.tar.gz
regcomp.c: Minor optimizations
\d, [:digit:], and [:xdigit:] don't match anything in the upper Latin1 range. Therefore whether or not the target string is UTF-8 or not doesn't change what they match, hence the /d modifier acts exactly like the /u modifier for them. At run-time /u executes fewer branches because it doesn't have to test if the target string is in UTF-8 or not, so treating these as if /u had instead been specified saves some runtime.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c25
1 files changed, 24 insertions, 1 deletions
diff --git a/regcomp.c b/regcomp.c
index 02695f9448..d7c52b3188 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -11791,7 +11791,15 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
/* FALLTHROUGH */
case 'd':
arg = ANYOF_DIGIT;
- goto join_posix;
+ if (! DEPENDS_SEMANTICS) {
+ goto join_posix;
+ }
+
+ /* \d doesn't have any matches in the upper Latin1 range, hence /d
+ * is equivalent to /u. Changing to /u saves some branches at
+ * runtime */
+ op = POSIXU;
+ goto join_posix_op_known;
case 'R':
ret = reg_node(pRExC_state, LNBREAK);
@@ -14896,6 +14904,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
#endif
goto join_posix;
+ /* The following don't have any matches in the upper Latin1
+ * range, hence /d is equivalent to /u for them. Making it /u
+ * saves some branches at runtime */
+ case ANYOF_DIGIT:
+ case ANYOF_NDIGIT:
+ case ANYOF_XDIGIT:
+ case ANYOF_NXDIGIT:
+ if (! DEPENDS_SEMANTICS) {
+ goto treat_as_default;
+ }
+
+ op = POSIXU;
+ goto join_posix;
+
case ANYOF_NCASED:
case ANYOF_LOWER:
case ANYOF_NLOWER:
@@ -14916,6 +14938,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
* We take advantage of the enum ordering of the charset
* modifiers to get the exact node type, */
default:
+ treat_as_default:
op = POSIXD + get_regex_charset(RExC_flags);
if (op > POSIXA) { /* /aa is same as /a */
op = POSIXA;