summaryrefslogtreecommitdiff
path: root/regcharclass.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-09-03 16:59:09 -0600
committerKarl Williamson <public@khwilliamson.com>2012-09-13 21:14:03 -0600
commit685289b5657b776e8a3871de68a57785e6ccd797 (patch)
treeeffb3627422467ccc329131b12c3de913ff5b8a4 /regcharclass.h
parentb1af8fefbdf1c044271e0b9d8898e2d808ab7879 (diff)
downloadperl-685289b5657b776e8a3871de68a57785e6ccd797.tar.gz
Use macro not swash for utf8 quotemeta
The rules for matching whether an above-Latin1 code point are now saved in a macro generated from a trie by regen/regcharclass.pl, and these are now used by pp.c to test these cases. This allows removal of a wrapper subroutine, and also there is no need for dynamic loading at run-time into a swash. This macro is about as big as I'm comfortable compiling in, but it saves the building of a hash that can grow over time, and removes a subroutine and interpreter variables. Indeed, performance benchmarks show that it is about the same speed as a hash, but it does not require having to load the rules in from disk the first time it is used.
Diffstat (limited to 'regcharclass.h')
-rw-r--r--regcharclass.h65
1 files changed, 65 insertions, 0 deletions
diff --git a/regcharclass.h b/regcharclass.h
index 91ab67806d..a7e79ada56 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -460,6 +460,71 @@
: 0 ) \
: 0 )
+/*
+ QUOTEMETA: Meta-characters that \Q should quote
+
+ \p{_Perl_Quotemeta}
+*/
+/*** GENERATED CODE ***/
+#define is_QUOTEMETA_high(s) \
+( ( 0xCD == ((U8*)s)[0] ) ? \
+ ( ( 0x8F == ((U8*)s)[1] ) ? 2 : 0 ) \
+: ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0x85 == ((U8*)s)[1] ) ? \
+ ( ( 0x9F == ((U8*)s)[2] || 0xA0 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( 0x9A == ((U8*)s)[1] ) ? \
+ ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( 0x9E == ((U8*)s)[1] ) ? \
+ ( ( 0xB4 == ((U8*)s)[2] || 0xB5 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( 0xA0 == ((U8*)s)[1] ) ? \
+ ( ( 0x8B <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8E ) ? 3 : 0 ) \
+ : 0 ) \
+: ( 0xE2 == ((U8*)s)[0] ) ? \
+ ( ( 0x80 == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBE ) ? 3 : 0 ) \
+ : ( 0x81 == ((U8*)s)[1] ) ? \
+ ( ( ( 0x81 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x93 ) || ( 0x95 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xAF ) ) ? 3 : 0 )\
+ : ( 0x86 == ((U8*)s)[1] ) ? \
+ ( ( 0x90 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x87 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x90 ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x91 == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x9F ) ? 3 : 0 ) \
+ : ( 0x94 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x9C ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x9D == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xB5 ) ? 3 : 0 ) \
+ : ( 0x9E == ((U8*)s)[1] ) ? \
+ ( ( 0x94 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( ( 0x9F <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xAF ) || 0xB8 == ((U8*)s)[1] || 0xB9 == ((U8*)s)[1] ) ?\
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : 0 ) \
+: ( 0xE3 == ((U8*)s)[0] ) ? \
+ ( ( 0x80 == ((U8*)s)[1] ) ? \
+ ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x83 ) || ( 0x88 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xA0 ) || 0xB0 == ((U8*)s)[2] ) ? 3 : 0 )\
+ : ( 0x85 == ((U8*)s)[1] ) ? \
+ ( ( 0xA4 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : 0 ) \
+: ( 0xEF == ((U8*)s)[0] ) ? \
+ ( ( 0xB4 == ((U8*)s)[1] ) ? \
+ ( ( 0xBE == ((U8*)s)[2] || 0xBF == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( 0xB8 == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8F ) ? 3 : 0 ) \
+ : ( 0xB9 == ((U8*)s)[1] ) ? \
+ ( ( 0x85 == ((U8*)s)[2] || 0x86 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( 0xBB == ((U8*)s)[1] ) ? \
+ ( ( 0xBF == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( 0xBE == ((U8*)s)[1] ) ? \
+ ( ( 0xA0 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( 0xBF == ((U8*)s)[1] ) ? \
+ ( ( 0xB0 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xB8 ) ? 3 : 0 ) \
+ : 0 ) \
+: ( 0xF0 == ((U8*)s)[0] ) ? \
+ ( ( ( ( 0x9D == ((U8*)s)[1] ) && ( 0x85 == ((U8*)s)[2] ) ) && ( 0xB3 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0xBA ) ) ? 4 : 0 )\
+: ( 0xF3 == ((U8*)s)[0] ) ? \
+ ( ( ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ) && ( 0x80 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0xBF ) ) ? 4 : 0 )\
+: 0 )
+
#endif /* H_REGCHARCLASS */