summaryrefslogtreecommitdiff
path: root/handy.h
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2014-05-11 17:41:48 -0600
committerKarl Williamson <khw@cpan.org>2014-06-01 08:56:22 -0600
commitf12c011824bed682b979bcf53cf4106b7e4d7f31 (patch)
tree0f73c8175087f68a1b52a4b5b9c4b0725870a071 /handy.h
parentcce29a1df20fdbe37080fe3ef4982f7f53db93ba (diff)
downloadperl-f12c011824bed682b979bcf53cf4106b7e4d7f31.tar.gz
regcomp.c: Skip work that is a no-op
There are a few characters in the Latin1 range that can be folded to by above-Latin1 characters. Some of these are folded to as part of a single character fold, like KELVIN SIGN folds to 'k'. More are folded to as part of a multi-character fold. Until this commit, there wasn't a quick way to distinguish between the two classes. A couple of places only want the single-character ones. It is more efficient to look for just those than to include the multi-char ones which end up not doing anything. This uses a bit in l1_char_class_tab.h to indicate those characters that are in the desired class.
Diffstat (limited to 'handy.h')
-rw-r--r--handy.h21
1 files changed, 12 insertions, 9 deletions
diff --git a/handy.h b/handy.h
index 740ebe526d..6810ef8499 100644
--- a/handy.h
+++ b/handy.h
@@ -952,14 +952,15 @@ patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
/* The members of the third group below do not need to be coordinated with data
* structures in regcomp.[ch] and regexec.c. */
-# define _CC_IDFIRST 17
-# define _CC_CHARNAME_CONT 18
-# define _CC_NONLATIN1_FOLD 19
-# define _CC_QUOTEMETA 20
-# define _CC_NON_FINAL_FOLD 21
-# define _CC_IS_IN_SOME_FOLD 22
+# define _CC_IDFIRST 17
+# define _CC_CHARNAME_CONT 18
+# define _CC_NONLATIN1_FOLD 19
+# define _CC_NONLATIN1_SIMPLE_FOLD 20
+# define _CC_QUOTEMETA 21
+# define _CC_NON_FINAL_FOLD 22
+# define _CC_IS_IN_SOME_FOLD 23
# define _CC_BACKSLASH_FOO_LBRACE_IS_META 31 /* temp, see mk_PL_charclass.pl */
-/* Unused: 23-30
+/* Unused: 24-30
* If more bits are needed, one could add a second word for non-64bit
* QUAD_IS_INT systems, using some #ifdefs to distinguish between having a 2nd
* word or not. The IS_IN_SOME_FOLD bit is the most easily expendable, as it
@@ -1081,8 +1082,10 @@ EXTCONST U32 PL_charclass[];
# define isWORDCHAR_L1(c) _generic_isCC(c, _CC_WORDCHAR)
# define isIDFIRST_L1(c) _generic_isCC(c, _CC_IDFIRST)
- /* Either participates in a fold with a character above 255, or is a
- * multi-char fold */
+ /* Participates in a single-character fold with a character above 255 */
+# define _HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_SIMPLE_FOLD)))
+
+ /* Like the above, but also can be part of a multi-char fold */
# define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_FOLD)))
# define _isQUOTEMETA(c) _generic_isCC(c, _CC_QUOTEMETA)