From e1d1eefb8c88e0dcaf2bb9e6c04d7f6192be966f Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Mon, 23 Apr 2007 01:34:55 +0200 Subject: Change meaning of \v, \V, and add \h, \H to match Perl6, add \R to match PCRE and unicode tr18 Message-ID: <9b18b3110704221434g43457742p28cab00289f83639@mail.gmail.com> p4raw-id: //depot/perl@31026 --- regcharclass.h | 250 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 250 insertions(+) create mode 100644 regcharclass.h (limited to 'regcharclass.h') diff --git a/regcharclass.h b/regcharclass.h new file mode 100644 index 0000000000..3fc92d7bce --- /dev/null +++ b/regcharclass.h @@ -0,0 +1,250 @@ +/****** WARNING WARNING WARNING ********/ +/* */ +/* Autogenerated code, do not modify! */ +/* */ +/****** WARNING WARNING WARNING ********/ + +/* LNBREAK Line Break: \j \J + Codepoints: 0x0A, 0x0B, 0x0C, 0x0D, 0x0D.0x0A, 0x85, 0x2028, 0x2029 + */ +#define is_LNBREAK(s,is_utf8) /*** Line Break: \j \J ***/ \ +( ( ((U8*)s)[0]==13 ) ? \ + ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) : \ + ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) ? 1 : \ +( (is_utf8) ? \ + ( ( ((U8*)s)[0]==194 ) ? \ + ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \ + (((( ((U8*)s)[0]==226 ) && ( ((U8*)s)[1]==128 )) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) ) :\ + ( ((U8*)s)[0]==133 ) ) ) ) + +#define is_LNBREAK_safe(s,e,is_utf8) /*** Line Break: \j \J ***/ \ +( ( (e) - (s) > 2 ) ? \ + ( ( ((U8*)s)[0]==13 ) ? \ + ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) : \ + ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) ? 1 : \ +( (is_utf8) ? \ + ( ( ((U8*)s)[0]==194 ) ? \ + ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \ + (((( ((U8*)s)[0]==226 ) && ( ((U8*)s)[1]==128 )) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) ) :\ + ( ((U8*)s)[0]==133 ) ) ) ) : \ +( ( (e) - (s) > 1 ) ? \ + ( ( ((U8*)s)[0]==13 ) ? \ + ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) : \ + ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) ? 1 : \ +( (is_utf8) ? \ + ((( ((U8*)s)[0]==194 ) && ( ((U8*)s)[1]==133 )) ? 2 : 0) : \ + ( ((U8*)s)[0]==133 ) ) ) ) : \ +( ( (e) - (s) > 0 ) ? \ + ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ? 1 : \ +( (!is_utf8) ? \ + ( ((U8*)s)[0]==133 ) : 0 ) ) : 0 ) ) ) + +#define is_LNBREAK_utf8(s) /*** Line Break: \j \J ***/ \ +( ( ((U8*)s)[0]==194 ) ? \ + ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \ + ( ( ((U8*)s)[0]==226 ) ? \ + ((( ((U8*)s)[1]==128 ) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) :\ + ( ( ((U8*)s)[0]==13 ) ? \ + ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) : \ + (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) ) ) ) + +#define is_LNBREAK_utf8_safe(s,e) /*** Line Break: \j \J ***/ \ +( ( (e) - (s) > 2 ) ? \ + ( ( ((U8*)s)[0]==194 ) ? \ + ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \ + ( ( ((U8*)s)[0]==226 ) ? \ + ((( ((U8*)s)[1]==128 ) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) :\ + ( ( ((U8*)s)[0]==13 ) ? \ + ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) : \ + (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) ) ) ) : \ +( ( (e) - (s) > 1 ) ? \ + ( ( ((U8*)s)[0]==194 ) ? \ + ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \ + ( ( ((U8*)s)[0]==13 ) ? \ + ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) : \ + (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) ) ) : \ +( ( (e) - (s) > 0 ) ? \ + (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) : 0 ) ) ) + +#define is_LNBREAK_latin1(s) /*** Line Break: \j \J ***/ \ +( ( ((U8*)s)[0]==13 ) ? \ + ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) : \ + ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) || ((U8*)s)[0]==133 ) ) + +#define is_LNBREAK_latin1_safe(s,e) /*** Line Break: \j \J ***/ \ +( ( (e) - (s) > 1 ) ? \ + ( ( ((U8*)s)[0]==13 ) ? \ + ( ( ((U8*)s)[1]==10 ) ? 2 : 1 ) : \ + ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=12) || ((U8*)s)[0]==133 ) ) : \ +( ( (e) - (s) > 0 ) ? \ + ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) || ((U8*)s)[0]==133 ) : 0 ) ) + +#define is_LNBREAK_cp(cp) /*** Line Break: \j \J ***/ \ +( (10<=cp && cp<=13) || ( cp > 13 && ( cp==133 || ( cp > 133 && ( cp==8232 || ( cp > 8232 && cp==8233 ) ) ) ) ) ) + +/****** WARNING WARNING WARNING ********/ +/* */ +/* Autogenerated code, do not modify! */ +/* */ +/****** WARNING WARNING WARNING ********/ + +/* HORIZWS Horizontal Whitespace: \h \H + Codepoints: 0x09, 0x20, 0xA0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, + 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202F, + 0x205F, 0x3000 + */ +#define is_HORIZWS(s,is_utf8) /*** Horizontal Whitespace: \h \H ***/ \ +( ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ? 1 : \ +( (is_utf8) ? \ + ( ( ((U8*)s)[0]==194 ) ? \ + ( ( ((U8*)s)[1]==160 ) ? 2 : 0 ) : \ + ( ( ((U8*)s)[0]==225 ) ? \ + ( ( ((U8*)s)[1]==160 ) ? \ + ( ( ((U8*)s)[2]==142 ) ? 3 : 0 ) : \ + ((( ((U8*)s)[1]==154 ) && ( ((U8*)s)[2]==128 )) ? 3 : 0) ) : \ + ( ( ((U8*)s)[0]==226 ) ? \ + ( ( ((U8*)s)[1]==129 ) ? \ + ( ( ((U8*)s)[2]==159 ) ? 3 : 0 ) : \ + ((( ((U8*)s)[1]==128 ) && ( (128<=((U8*)s)[2] && ((U8*)s)[2]<=138) || ((U8*)s)[2]==175 )) ? 3 : 0) ) :\ + (((( ((U8*)s)[0]==227 ) && ( ((U8*)s)[1]==128 )) && ( ((U8*)s)[2]==128 )) ? 3 : 0) ) ) ) :\ + ( ((U8*)s)[0]==160 ) ) ) + +#define is_HORIZWS_safe(s,e,is_utf8) /*** Horizontal Whitespace: \h \H ***/ \ +( ( (e) - (s) > 2 ) ? \ + ( ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ? 1 : \ +( (is_utf8) ? \ + ( ( ((U8*)s)[0]==194 ) ? \ + ( ( ((U8*)s)[1]==160 ) ? 2 : 0 ) : \ + ( ( ((U8*)s)[0]==225 ) ? \ + ( ( ((U8*)s)[1]==160 ) ? \ + ( ( ((U8*)s)[2]==142 ) ? 3 : 0 ) : \ + ((( ((U8*)s)[1]==154 ) && ( ((U8*)s)[2]==128 )) ? 3 : 0) ) : \ + ( ( ((U8*)s)[0]==226 ) ? \ + ( ( ((U8*)s)[1]==129 ) ? \ + ( ( ((U8*)s)[2]==159 ) ? 3 : 0 ) : \ + ((( ((U8*)s)[1]==128 ) && ( (128<=((U8*)s)[2] && ((U8*)s)[2]<=138) || ((U8*)s)[2]==175 )) ? 3 : 0) ) :\ + (((( ((U8*)s)[0]==227 ) && ( ((U8*)s)[1]==128 )) && ( ((U8*)s)[2]==128 )) ? 3 : 0) ) ) ) :\ + ( ((U8*)s)[0]==160 ) ) ) : \ +( ( (e) - (s) > 1 ) ? \ + ( ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ? 1 : \ +( (is_utf8) ? \ + ((( ((U8*)s)[0]==194 ) && ( ((U8*)s)[1]==160 )) ? 2 : 0) : \ + ( ((U8*)s)[0]==160 ) ) ) : \ +( ( (e) - (s) > 0 ) ? \ + ( ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ? 1 : \ +( (!is_utf8) ? \ + ( ((U8*)s)[0]==160 ) : 0 ) ) : 0 ) ) ) + +#define is_HORIZWS_utf8(s) /*** Horizontal Whitespace: \h \H ***/ \ +( ( ((U8*)s)[0]==194 ) ? \ + ( ( ((U8*)s)[1]==160 ) ? 2 : 0 ) : \ + ( ( ((U8*)s)[0]==225 ) ? \ + ( ( ((U8*)s)[1]==160 ) ? \ + ( ( ((U8*)s)[2]==142 ) ? 3 : 0 ) : \ + ((( ((U8*)s)[1]==154 ) && ( ((U8*)s)[2]==128 )) ? 3 : 0) ) : \ + ( ( ((U8*)s)[0]==226 ) ? \ + ( ( ((U8*)s)[1]==129 ) ? \ + ( ( ((U8*)s)[2]==159 ) ? 3 : 0 ) : \ + ((( ((U8*)s)[1]==128 ) && ( (128<=((U8*)s)[2] && ((U8*)s)[2]<=138) || ((U8*)s)[2]==175 )) ? 3 : 0) ) :\ + ( ( ((U8*)s)[0]==227 ) ? \ + ((( ((U8*)s)[1]==128 ) && ( ((U8*)s)[2]==128 )) ? 3 : 0) : \ + ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ) ) ) ) + +#define is_HORIZWS_utf8_safe(s,e) /*** Horizontal Whitespace: \h \H ***/ \ +( ( (e) - (s) > 2 ) ? \ + ( ( ((U8*)s)[0]==194 ) ? \ + ( ( ((U8*)s)[1]==160 ) ? 2 : 0 ) : \ + ( ( ((U8*)s)[0]==225 ) ? \ + ( ( ((U8*)s)[1]==160 ) ? \ + ( ( ((U8*)s)[2]==142 ) ? 3 : 0 ) : \ + ((( ((U8*)s)[1]==154 ) && ( ((U8*)s)[2]==128 )) ? 3 : 0) ) : \ + ( ( ((U8*)s)[0]==226 ) ? \ + ( ( ((U8*)s)[1]==129 ) ? \ + ( ( ((U8*)s)[2]==159 ) ? 3 : 0 ) : \ + ((( ((U8*)s)[1]==128 ) && ( (128<=((U8*)s)[2] && ((U8*)s)[2]<=138) || ((U8*)s)[2]==175 )) ? 3 : 0) ) :\ + ( ( ((U8*)s)[0]==227 ) ? \ + ((( ((U8*)s)[1]==128 ) && ( ((U8*)s)[2]==128 )) ? 3 : 0) : \ + ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ) ) ) ) : \ +( ( (e) - (s) > 1 ) ? \ + ( ( ((U8*)s)[0]==194 ) ? \ + ( ( ((U8*)s)[1]==160 ) ? 2 : 0 ) : \ + ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) ) : \ +( ( (e) - (s) > 0 ) ? \ + ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 ) : 0 ) ) ) + +#define is_HORIZWS_latin1(s) /*** Horizontal Whitespace: \h \H ***/ \ +( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 || ((U8*)s)[0]==160 ) + +#define is_HORIZWS_latin1_safe(s,e) /*** Horizontal Whitespace: \h \H ***/ \ +( ( (e) - (s) > 0 ) ? \ + ( ((U8*)s)[0]==9 || ((U8*)s)[0]==32 || ((U8*)s)[0]==160 ) : 0 ) + +#define is_HORIZWS_cp(cp) /*** Horizontal Whitespace: \h \H ***/ \ +( cp==9 || ( cp > 9 && ( cp==32 || ( cp > 32 && ( cp==160 || ( cp > 160 && ( cp==5760 || ( cp > 5760 && ( cp==6158 || ( cp > 6158 && ( (8192<=cp && cp<=8202) || ( cp > 8202 && ( cp==8239 || ( cp > 8239 && ( cp==8287 || ( cp > 8287 && cp==12288 ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) + +/****** WARNING WARNING WARNING ********/ +/* */ +/* Autogenerated code, do not modify! */ +/* */ +/****** WARNING WARNING WARNING ********/ + +/* VERTWS Vertical Whitespace: \v \V + Codepoints: 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 + */ +#define is_VERTWS(s,is_utf8) /*** Vertical Whitespace: \v \V ***/ \ +( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ? 1 : \ +( (is_utf8) ? \ + ( ( ((U8*)s)[0]==194 ) ? \ + ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \ + (((( ((U8*)s)[0]==226 ) && ( ((U8*)s)[1]==128 )) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) ) :\ + ( ((U8*)s)[0]==133 ) ) ) + +#define is_VERTWS_safe(s,e,is_utf8) /*** Vertical Whitespace: \v \V ***/ \ +( ( (e) - (s) > 2 ) ? \ + ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ? 1 : \ +( (is_utf8) ? \ + ( ( ((U8*)s)[0]==194 ) ? \ + ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \ + (((( ((U8*)s)[0]==226 ) && ( ((U8*)s)[1]==128 )) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) ) :\ + ( ((U8*)s)[0]==133 ) ) ) : \ +( ( (e) - (s) > 1 ) ? \ + ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ? 1 : \ +( (is_utf8) ? \ + ((( ((U8*)s)[0]==194 ) && ( ((U8*)s)[1]==133 )) ? 2 : 0) : \ + ( ((U8*)s)[0]==133 ) ) ) : \ +( ( (e) - (s) > 0 ) ? \ + ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ? 1 : \ +( (!is_utf8) ? \ + ( ((U8*)s)[0]==133 ) : 0 ) ) : 0 ) ) ) + +#define is_VERTWS_utf8(s) /*** Vertical Whitespace: \v \V ***/ \ +( ( ((U8*)s)[0]==194 ) ? \ + ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \ + ( ( ((U8*)s)[0]==226 ) ? \ + ((( ((U8*)s)[1]==128 ) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) :\ + (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ) ) + +#define is_VERTWS_utf8_safe(s,e) /*** Vertical Whitespace: \v \V ***/ \ +( ( (e) - (s) > 2 ) ? \ + ( ( ((U8*)s)[0]==194 ) ? \ + ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \ + ( ( ((U8*)s)[0]==226 ) ? \ + ((( ((U8*)s)[1]==128 ) && ( ((U8*)s)[2]==168 || ((U8*)s)[2]==169 )) ? 3 : 0) :\ + (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ) ) : \ +( ( (e) - (s) > 1 ) ? \ + ( ( ((U8*)s)[0]==194 ) ? \ + ( ( ((U8*)s)[1]==133 ) ? 2 : 0 ) : \ + (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) ) : \ +( ( (e) - (s) > 0 ) ? \ + (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) : 0 ) ) ) + +#define is_VERTWS_latin1(s) /*** Vertical Whitespace: \v \V ***/ \ +( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) || ((U8*)s)[0]==133 ) + +#define is_VERTWS_latin1_safe(s,e) /*** Vertical Whitespace: \v \V ***/ \ +( ( (e) - (s) > 0 ) ? \ + ( (10<=((U8*)s)[0] && ((U8*)s)[0]<=13) || ((U8*)s)[0]==133 ) : 0 ) + +#define is_VERTWS_cp(cp) /*** Vertical Whitespace: \v \V ***/ \ +( (10<=cp && cp<=13) || ( cp > 13 && ( cp==133 || ( cp > 133 && ( cp==8232 || ( cp > 8232 && cp==8233 ) ) ) ) ) ) + -- cgit v1.2.1