summaryrefslogtreecommitdiff
path: root/handy.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-12-12 09:17:50 -0700
committerKarl Williamson <public@khwilliamson.com>2012-12-22 11:11:27 -0700
commit922e8cb4d0c8566afd151f6ffc58369c567e6407 (patch)
treebc5574211a6bc33cbf5940b74f865fdb3e200fab /handy.h
parent15861f948fe52aa0c72233cd9dfc0190bfa3fbb1 (diff)
downloadperl-922e8cb4d0c8566afd151f6ffc58369c567e6407.tar.gz
Add generic _is_(uni|utf8)_FOO() function
This function uses table lookup to replace 9 more specific functions, which can be deprecated. They should not have been exposed to the public API in the first place
Diffstat (limited to 'handy.h')
-rw-r--r--handy.h63
1 files changed, 45 insertions, 18 deletions
diff --git a/handy.h b/handy.h
index 077952c42f..f4e978caeb 100644
--- a/handy.h
+++ b/handy.h
@@ -802,6 +802,26 @@ typedef enum {
#define POSIX_SWASH_COUNT _FIRST_NON_SWASH_CC
+#if defined(PERL_IN_UTF8_C)
+# if _CC_WORDCHAR != 0 || _CC_DIGIT != 1 || _CC_ALPHA != 2 || _CC_LOWER != 3 \
+ || _CC_UPPER != 4 || _CC_PUNCT != 5 || _CC_PRINT != 6 \
+ || _CC_ALPHANUMERIC != 7 || _CC_GRAPH != 8
+ #error Need to adjust order of swash_property_names[]
+# endif
+
+static const char* const swash_property_names[] = {
+ "XPosixWord",
+ "XPosixDigit",
+ "XPosixAlpha",
+ "XPosixLower",
+ "XPosixUpper",
+ "XPosixPunct",
+ "XPosixPrint",
+ "XPosixAlnum",
+ "XPosixGraph"
+};
+#endif
+
#define PL_utf8_alnum PL_utf8_swash_ptrs[_CC_WORDCHAR]
#define PL_utf8_alnumc PL_utf8_swash_ptrs[_CC_ALPHANUMERIC]
#define PL_utf8_alpha PL_utf8_swash_ptrs[_CC_ALPHA]
@@ -1107,26 +1127,29 @@ EXTCONST U32 PL_charclass[];
#define _generic_uni(classnum, function, c) ((c) < 256 \
? _generic_isCC(c, classnum) \
: function(c))
+#define _generic_uni_swash(classnum, c) ((c) < 256 \
+ ? _generic_isCC(c, classnum) \
+ : _is_uni_FOO(classnum, c))
#define isALNUM_uni(c) isWORDCHAR_uni(c)
-#define isALPHA_uni(c) _generic_uni(_CC_ALPHA, is_uni_alpha, c)
-#define isALPHANUMERIC_uni(c) _generic_uni(_CC_ALPHANUMERIC, is_uni_alnumc, c)
+#define isALPHA_uni(c) _generic_uni_swash(_CC_ALPHA, c)
+#define isALPHANUMERIC_uni(c) _generic_uni_swash(_CC_ALPHANUMERIC, c)
#define isASCII_uni(c) isASCII(c)
#define isBLANK_uni(c) _generic_uni(_CC_BLANK, is_HORIZWS_cp_high, c)
#define isCNTRL_uni(c) isCNTRL_L1(c) /* All controls are in Latin1 */
-#define isDIGIT_uni(c) _generic_uni(_CC_DIGIT, is_uni_digit, c)
-#define isGRAPH_uni(c) _generic_uni(_CC_GRAPH, is_uni_graph, c)
+#define isDIGIT_uni(c) _generic_uni_swash(_CC_DIGIT, c)
+#define isGRAPH_uni(c) _generic_uni_swash(_CC_GRAPH, c)
#define isIDFIRST_uni(c) _generic_uni(_CC_IDFIRST, _is_uni_perl_idstart, c)
-#define isLOWER_uni(c) _generic_uni(_CC_LOWER, is_uni_lower, c)
-#define isPRINT_uni(c) _generic_uni(_CC_PRINT, is_uni_print, c)
+#define isLOWER_uni(c) _generic_uni_swash(_CC_LOWER, c)
+#define isPRINT_uni(c) _generic_uni_swash(_CC_PRINT, c)
/* Posix and regular space are identical above Latin1 */
#define isPSXSPC_uni(c) _generic_uni(_CC_PSXSPC, is_XPERLSPACE_cp_high, c)
-#define isPUNCT_uni(c) _generic_uni(_CC_PUNCT, is_uni_punct, c)
+#define isPUNCT_uni(c) _generic_uni_swash(_CC_PUNCT, c)
#define isSPACE_uni(c) _generic_uni(_CC_SPACE, is_XPERLSPACE_cp_high, c)
-#define isUPPER_uni(c) _generic_uni(_CC_UPPER, is_uni_upper, c)
+#define isUPPER_uni(c) _generic_uni_swash(_CC_UPPER, c)
#define isVERTWS_uni(c) _generic_uni(_CC_VERTSPACE, is_VERTWS_cp_high, c)
-#define isWORDCHAR_uni(c) _generic_uni(_CC_WORDCHAR, is_uni_alnum, c)
+#define isWORDCHAR_uni(c) _generic_uni_swash(_CC_WORDCHAR, c)
#define isXDIGIT_uni(c) _generic_uni(_CC_XDIGIT, is_XDIGIT_cp_high, c)
#define toFOLD_uni(c,s,l) to_uni_fold(c,s,l)
@@ -1180,6 +1203,11 @@ EXTCONST U32 PL_charclass[];
#define _generic_utf8(classnum, function, p) \
_generic_utf8_utf8(classnum, p, function(p))
+/* Like the above, but passes classnum to _isFOO_utf8() */
+#define _generic_swash_utf8(classnum, p) \
+ _generic_utf8_utf8(classnum, p, _is_utf8_FOO(classnum, p))
+
+
/* Like the above, but should be used only when it is known that there are no
* characters in the range 128-255 which the class is TRUE for. Hence it can
* skip the tests for this range */
@@ -1199,9 +1227,8 @@ EXTCONST U32 PL_charclass[];
* "if-else-if-else ..." */
#define isALNUM_utf8(p) isWORDCHAR_utf8(p) /* back compat */
-#define isALPHA_utf8(p) _generic_utf8(_CC_ALPHA, is_utf8_alpha, p)
-#define isALPHANUMERIC_utf8(p) _generic_utf8(_CC_ALPHANUMERIC, \
- is_utf8_alnumc, p)
+#define isALPHA_utf8(p) _generic_swash_utf8(_CC_ALPHA, p)
+#define isALPHANUMERIC_utf8(p) _generic_swash_utf8(_CC_ALPHANUMERIC, p)
#define isASCII_utf8(p) isASCII(*p) /* Because ASCII is invariant under
utf8, the non-utf8 macro works
*/
@@ -1209,7 +1236,7 @@ EXTCONST U32 PL_charclass[];
#define isCNTRL_utf8(p) _generic_utf8_utf8(_CC_CNTRL, p, 0)
#define isDIGIT_utf8(p) _generic_utf8_no_upper_latin1(_CC_DIGIT, \
is_utf8_digit, p)
-#define isGRAPH_utf8(p) _generic_utf8(_CC_GRAPH, is_utf8_graph, p)
+#define isGRAPH_utf8(p) _generic_swash_utf8(_CC_GRAPH, p)
#define isIDCONT_utf8(p) _generic_utf8(_CC_WORDCHAR, is_utf8_xidcont, p)
/* To prevent S_scan_word in toke.c from hanging, we have to make sure that
@@ -1221,17 +1248,17 @@ EXTCONST U32 PL_charclass[];
#define isIDFIRST_utf8(p) _generic_utf8(_CC_IDFIRST, \
_is_utf8_perl_idstart, p)
-#define isLOWER_utf8(p) _generic_utf8(_CC_LOWER, is_utf8_lower, p)
-#define isPRINT_utf8(p) _generic_utf8(_CC_PRINT, is_utf8_print, p)
+#define isLOWER_utf8(p) _generic_swash_utf8(_CC_LOWER, p)
+#define isPRINT_utf8(p) _generic_swash_utf8(_CC_PRINT, p)
/* Posix and regular space are identical above Latin1 */
#define isPSXSPC_utf8(p) _generic_utf8(_CC_PSXSPC, is_XPERLSPACE_high, p)
-#define isPUNCT_utf8(p) _generic_utf8(_CC_PUNCT, is_utf8_punct, p)
+#define isPUNCT_utf8(p) _generic_swash_utf8(_CC_PUNCT, p)
#define isSPACE_utf8(p) _generic_utf8(_CC_SPACE, is_XPERLSPACE_high, p)
-#define isUPPER_utf8(p) _generic_utf8(_CC_UPPER, is_utf8_upper, p)
+#define isUPPER_utf8(p) _generic_swash_utf8(_CC_UPPER, p)
#define isVERTWS_utf8(p) _generic_utf8(_CC_VERTSPACE, is_VERTWS_high, p)
-#define isWORDCHAR_utf8(p) _generic_utf8(_CC_WORDCHAR, is_utf8_alnum, p)
+#define isWORDCHAR_utf8(p) _generic_swash_utf8(_CC_WORDCHAR, p)
#define isXDIGIT_utf8(p) _generic_utf8_no_upper_latin1(_CC_XDIGIT, \
is_XDIGIT_high, p)