summaryrefslogtreecommitdiff
path: root/regcomp.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-03-08 17:06:47 -0700
committerKarl Williamson <public@khwilliamson.com>2011-03-08 23:22:17 -0700
commitc613755a4b4fc8e64a77639d47d7e208fee68edc (patch)
tree79d619f3808d2f33e5d8613e59e16ebf74c3fc03 /regcomp.h
parentf0c16e54b3b5efbb4380952c7ba5e8d7626d7cae (diff)
downloadperl-c613755a4b4fc8e64a77639d47d7e208fee68edc.tar.gz
regex: /l in combo with others in syn start class
Now that regexes can be combinations of different charset modifiers, a synthetic start class can match locale and non-locale both. locale should generally match only things in the bitmap for code points < 256. But a synthetic start class with a non-locale component can match such code points. This patch makes an exception for synthetic nodes that will be resolved if it passes and is matched again for real.
Diffstat (limited to 'regcomp.h')
-rw-r--r--regcomp.h22
1 files changed, 10 insertions, 12 deletions
diff --git a/regcomp.h b/regcomp.h
index 18c8f6f745..9ffca0e969 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -337,20 +337,18 @@ struct regnode_charclass_class {
#define ANYOF_LARGE ANYOF_CLASS /* Same; name retained for back compat */
/* EOS, meaning that it can match an empty string too, is used for the
- * synthetic start class (ssc) only. It looks like it could share the INVERT
- * bit, as the ssc is never inverted. But doing that caused this reges to
- * not match:
- * 'foo/file.fob' =~ m,^(?=[^\.])[^/]* /(?=[^\.])[^/]*\.fo[^/]$,;
- * (except the space between the * and the / above shouldn't be there; it was
- * inserted to make this comment continue on.)
- * Rather than try to figure out what was going on in the optimizer, I (khw)
- * found a way to save a different bit. But my original line of reasoning was
- * "The bit just needs to be turned off before regexec.c gets a hold of it so
- * that regexec.c doesn't think it's inverted, but this happens automatically,
- * as if the ssc can match an EOS, the ssc is discarded, and never passed to
- * regexec.c" */
+ * synthetic start class only. */
#define ANYOF_EOS 0x10
+/* ? Is this node the synthetic start class (ssc). This bit is shared with
+ * ANYOF_EOS, as the latter is used only for the ssc, and then not used by
+ * regexec.c. And, the code is structured so that if it is set, the ssc is
+ * not used, so it is guaranteed to be 0 for the ssc by the time regexec.c
+ * gets executed, and 0 for a non-ssc ANYOF node, as it only ever gets set for
+ * a potential ssc candidate. Thus setting it to 1 after it has been
+ * determined that the ssc will be used is not ambiguous */
+#define ANYOF_IS_SYNTHETIC ANYOF_EOS
+
/* Can match something outside the bitmap that isn't in utf8 */
#define ANYOF_NONBITMAP_NON_UTF8 0x20