Change some UTF-EBCDIC macro handling defns

This commit changes the definitions of some macros for UTF-8 handling on EBCDIC platforms. The previous definitions transformed the bytes into I8 and did tests on the transformed values. The change is to use previously unused bits in l1_char_class_tab.h so the transform isn't needed, and generally only one branch is. These macros are called from the inner loops of, for example, regex backtracking.
author: Karl Williamson <khw@cpan.org> 2015-09-04 09:57:11 -0600
committer: Karl Williamson <khw@cpan.org> 2015-09-04 10:21:17 -0600
commit: e4fd731240d0d51e9cda61101a7d593dd9660e22 (patch)
tree: f650ee0ba38884ef21d804b61ead185af721db64 /utfebcdic.h
parent: 0f1913794d9137557b4ae7771a8a24ab8b5ee247 (diff)
download: perl-e4fd731240d0d51e9cda61101a7d593dd9660e22.tar.gz
1 files changed, 19 insertions, 14 deletions
diff --git a/utfebcdic.h b/utfebcdic.h
index d9e1402ce2..1df7b3827f 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -172,23 +172,28 @@ END_EXTERN_C
 
 #define UNI_IS_INVARIANT(c)		(((UV)(c)) <  0xA0)
 
-/* UTF-EBCDIC semantic macros - transform back into I8 and then compare
+/* UTF-EBCDIC semantic macros - We used to transform back into I8 and then
+ * compare, but now only have to do a single lookup by using a bit in
+ * l1_char_class_tab.h.
  * Comments as to the meaning of each are given at their corresponding utf8.h
  * definitions. */
 
-#define UTF8_IS_START(c)		(NATIVE_UTF8_TO_I8(c) >= 0xC5     \
-                                         && NATIVE_UTF8_TO_I8(c) != 0xE0)
-#define UTF8_IS_CONTINUATION(c)		((NATIVE_UTF8_TO_I8(c) & 0xE0) == 0xA0)
-#define UTF8_IS_CONTINUED(c) 		(NATIVE_UTF8_TO_I8(c) >= 0xA0)
-
-#define UTF8_IS_DOWNGRADEABLE_START(c)	(NATIVE_UTF8_TO_I8(c) >= 0xC5     \
-                                         && NATIVE_UTF8_TO_I8(c) <= 0xC7)
-/* Saying it this way adds a runtime test, but removes 2 run-time lookups */
-/*#define UTF8_IS_DOWNGRADEABLE_START(c)  ((c) == I8_TO_NATIVE_UTF8(0xC5)     \
-                                         || (c) == I8_TO_NATIVE_UTF8(0xC6)  \
-                                         || (c) == I8_TO_NATIVE_UTF8(0xC7))
-*/
-#define UTF8_IS_ABOVE_LATIN1(c)	(NATIVE_UTF8_TO_I8(c) >= 0xC8)
+#define UTF8_IS_START(c)		_generic_isCC(c, _CC_UTF8_IS_START)
+#define UTF8_IS_CONTINUATION(c)		_generic_isCC(c, _CC_UTF8_IS_CONTINUATION)
+
+/* Equivalent to ! UVCHR_IS_INVARIANT(c) */
+#define UTF8_IS_CONTINUED(c) 		cBOOL(FITS_IN_8_BITS(c)                 \
+   && ! (PL_charclass[(U8) (c)] & (_CC_mask(_CC_ASCII) | _CC_mask(_CC_CNTRL))))
+
+#define UTF8_IS_DOWNGRADEABLE_START(c)   _generic_isCC(c,                       \
+                                              _CC_UTF8_IS_DOWNGRADEABLE_START)
+
+/* Equivalent to (UTF8_IS_START(c) && ! UTF8_IS_DOWNGRADEABLE_START(c))
+ * Makes sure that the START bit is set and the DOWNGRADEABLE bit isn't */
+#define UTF8_IS_ABOVE_LATIN1(c) cBOOL(FITS_IN_8_BITS(c)                         \
+  && ((PL_charclass[(U8) (c)] & ( _CC_mask(_CC_UTF8_IS_START)                   \
+                                 |_CC_mask(_CC_UTF8_IS_DOWNGRADEABLE_START)))   \
+                        == _CC_mask(_CC_UTF8_IS_START)))
 
 /* Can't exceed 7 on EBCDIC platforms */
 #define UTF_START_MARK(len) (0xFF & (0xFE << (7-(len))))
author	Karl Williamson <khw@cpan.org>	2015-09-04 09:57:11 -0600
committer	Karl Williamson <khw@cpan.org>	2015-09-04 10:21:17 -0600
commit	e4fd731240d0d51e9cda61101a7d593dd9660e22 (patch)
tree	f650ee0ba38884ef21d804b61ead185af721db64 /utfebcdic.h
parent	0f1913794d9137557b4ae7771a8a24ab8b5ee247 (diff)
download	perl-e4fd731240d0d51e9cda61101a7d593dd9660e22.tar.gz