diff options
author | Karl Williamson <public@khwilliamson.com> | 2011-02-25 20:10:47 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2011-02-25 21:06:10 -0700 |
commit | 137165a601b852a9679983cdfe8d35be29f0939c (patch) | |
tree | ac4901262ef0166195893dac42877cfe2a2cf33b /regcomp.h | |
parent | 273404573a60cac38df0db9b95c63fcaac37419d (diff) | |
download | perl-137165a601b852a9679983cdfe8d35be29f0939c.tar.gz |
Free up bit in ANYOF flags
This is the foundation for fixing the regression RT #82610. My analysis
was wrong that two bits could be shared, at least not without further
work. This changes to use a different mechanism to pass needed
information to regexec.c so that another bit can be freed up and, in a
later commit, the two bits can become unshared again.
The bit that is freed up is ANYOF_UTF8, which basically said there is
something that is matched outside the ANYOF bitmap, and requires the
target string to be in utf8. This changes things so the existence of
something besides the bitmap indicates this, and so no flag is needed.
The flag bit ANYOF_NONBITMAP_NON_UTF8 remains to indicate that there is
something that should be matched outside the bitmap even if the target
string isn't in utf8.
Diffstat (limited to 'regcomp.h')
-rw-r--r-- | regcomp.h | 23 |
1 files changed, 16 insertions, 7 deletions
@@ -310,6 +310,22 @@ struct regnode_charclass_class { #define SIZE_ONLY (RExC_emit == &PL_regdummy) +/* If the bitmap doesn't fully represent what this ANYOF node can match, the + * ARG is set to this special value (since 0, 1, ... are legal, but will never + * reach this high). */ +#define ANYOF_NONBITMAP_EMPTY ((U32) -1) + +/* The information used to be stored as as combination of the ANYOF_UTF8 and + * ANYOF_NONBITMAP_NON_UTF8 bits in the flags field, but was moved out of there + * to free up a bit for other uses. This tries to hide the change from + * existing code as much as possible. Now, the data structure that goes in ARG + * is not allocated unless it is needed, and that is what is used to determine + * if there is something outside the bitmap. The code now assumes that if + * that structure exists, that any UTF-8 encoded string should be tried against + * it, but a non-UTF8-encoded string will be tried only if the + * ANYOF_NONBITMAP_NON_UTF8 bit is also set. */ +#define ANYOF_NONBITMAP(node) (ARG(node) != ANYOF_NONBITMAP_EMPTY) + /* Flags for node->flags of ANYOF. These are in short supply, so some games * are done to share them, as described below. If necessary, the ANYOF_LOCALE * and ANYOF_CLASS bits could be shared with a space penalty for locale nodes @@ -347,16 +363,9 @@ struct regnode_charclass_class { #define ANYOF_CLASS 0x08 #define ANYOF_LARGE ANYOF_CLASS /* Same; name retained for back compat */ -/* Can match something outside the bitmap that is expressible only in utf8 */ -#define ANYOF_UTF8 0x10 - /* Can match something outside the bitmap that isn't in utf8 */ #define ANYOF_NONBITMAP_NON_UTF8 0x20 -/* Set if the bitmap doesn't fully represent what this node can match */ -#define ANYOF_NONBITMAP (ANYOF_UTF8|ANYOF_NONBITMAP_NON_UTF8) -#define ANYOF_UNICODE ANYOF_NONBITMAP /* old name, for back compat */ - /* Matches every code point 0x100 and above*/ #define ANYOF_UNICODE_ALL 0x40 |