diff options
author | Christian Persch <chpe@gnome.org> | 2012-02-12 21:20:33 +0100 |
---|---|---|
committer | Matthias Clasen <mclasen@redhat.com> | 2012-02-26 22:09:05 -0500 |
commit | 706b72db21d25ec26ed7411e5221550ffb08ff21 (patch) | |
tree | f2444bba58afacffa213619971cfd29a14146426 /glib/pcre | |
parent | 75dffb99b3d11f4ae9d8a72a055641482a727b75 (diff) | |
download | glib-706b72db21d25ec26ed7411e5221550ffb08ff21.tar.gz |
regex: Use glib for unicode data
Use g_unichar_type() and g_unichar_get_script() instead of pcre tables.
Diffstat (limited to 'glib/pcre')
-rw-r--r-- | glib/pcre/Makefile.am | 1 | ||||
-rw-r--r-- | glib/pcre/pcre_compile.c | 26 | ||||
-rw-r--r-- | glib/pcre/pcre_dfa_exec.c | 96 | ||||
-rw-r--r-- | glib/pcre/pcre_exec.c | 26 | ||||
-rw-r--r-- | glib/pcre/pcre_internal.h | 11 | ||||
-rw-r--r-- | glib/pcre/pcre_tables.c | 16 | ||||
-rw-r--r-- | glib/pcre/pcre_xclass.c | 24 | ||||
-rw-r--r-- | glib/pcre/ucp.h | 265 |
8 files changed, 239 insertions, 226 deletions
diff --git a/glib/pcre/Makefile.am b/glib/pcre/Makefile.am index 21da5c5aa..1981953ea 100644 --- a/glib/pcre/Makefile.am +++ b/glib/pcre/Makefile.am @@ -51,7 +51,6 @@ libpcre_la_SOURCES = \ pcre_string_utils.c \ pcre_study.c \ pcre_tables.c \ - pcre_ucd.c \ pcre_valid_utf8.c \ pcre_version.c \ pcre_xclass.c \ diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c index eb985dfdf..b44055aac 100644 --- a/glib/pcre/pcre_compile.c +++ b/glib/pcre/pcre_compile.c @@ -2890,43 +2890,43 @@ Returns: TRUE if auto-possessifying is OK static BOOL check_char_prop(int c, int ptype, int pdata, BOOL negated) { -const ucd_record *prop = GET_UCD(c); +const pcre_uint8 chartype = UCD_CHARTYPE(c); switch(ptype) { case PT_LAMP: - return (prop->chartype == ucp_Lu || - prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt) == negated; + return (chartype == ucp_Lu || + chartype == ucp_Ll || + chartype == ucp_Lt) == negated; case PT_GC: - return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated; + return (pdata == PRIV(ucp_gentype)[chartype]) == negated; case PT_PC: - return (pdata == prop->chartype) == negated; + return (pdata == chartype) == negated; case PT_SC: - return (pdata == prop->script) == negated; + return (pdata == UCD_SCRIPT(c)) == negated; /* These are specials */ case PT_ALNUM: - return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated; + return (PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N) == negated; case PT_SPACE: /* Perl space */ - return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z || + return (PRIV(ucp_gentype)[chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) == negated; case PT_PXSPACE: /* POSIX space */ - return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z || + return (PRIV(ucp_gentype)[chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) == negated; case PT_WORD: - return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || + return (PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE) == negated; } return FALSE; diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c index 21d7be693..41ff65bea 100644 --- a/glib/pcre/pcre_dfa_exec.c +++ b/glib/pcre/pcre_dfa_exec.c @@ -1015,7 +1015,7 @@ for (;;) if (clen > 0) { BOOL OK; - const ucd_record * prop = GET_UCD(c); + const pcre_uint8 chartype = UCD_CHARTYPE(c); switch(code[1]) { case PT_ANY: @@ -1023,43 +1023,43 @@ for (;;) break; case PT_LAMP: - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt; + OK = chartype == ucp_Lu || chartype == ucp_Ll || + chartype == ucp_Lt; break; case PT_GC: - OK = PRIV(ucp_gentype)[prop->chartype] == code[2]; + OK = PRIV(ucp_gentype)[chartype] == code[2]; break; case PT_PC: - OK = prop->chartype == code[2]; + OK = chartype == code[2]; break; case PT_SC: - OK = prop->script == code[2]; + OK = UCD_SCRIPT(c) == code[2]; break; /* These are specials for combination cases. */ case PT_ALNUM: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N; break; case PT_SPACE: /* Perl space */ - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || + OK = PRIV(ucp_gentype)[chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; break; case PT_PXSPACE: /* POSIX space */ - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || + OK = PRIV(ucp_gentype)[chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_FF || c == CHAR_CR; break; case PT_WORD: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE; break; @@ -1209,7 +1209,7 @@ for (;;) if (clen > 0) { BOOL OK; - const ucd_record * prop = GET_UCD(c); + const pcre_uint8 chartype = UCD_CHARTYPE(c); switch(code[2]) { case PT_ANY: @@ -1217,43 +1217,43 @@ for (;;) break; case PT_LAMP: - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt; + OK = chartype == ucp_Lu || chartype == ucp_Ll || + chartype == ucp_Lt; break; case PT_GC: - OK = PRIV(ucp_gentype)[prop->chartype] == code[3]; + OK = PRIV(ucp_gentype)[chartype] == code[3]; break; case PT_PC: - OK = prop->chartype == code[3]; + OK = chartype == code[3]; break; case PT_SC: - OK = prop->script == code[3]; + OK = UCD_SCRIPT(c) == code[3]; break; /* These are specials for combination cases. */ case PT_ALNUM: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N; break; case PT_SPACE: /* Perl space */ - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || + OK = PRIV(ucp_gentype)[chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; break; case PT_PXSPACE: /* POSIX space */ - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || + OK = PRIV(ucp_gentype)[chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_FF || c == CHAR_CR; break; case PT_WORD: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE; break; @@ -1456,7 +1456,7 @@ for (;;) if (clen > 0) { BOOL OK; - const ucd_record * prop = GET_UCD(c); + const pcre_uint8 chartype = UCD_CHARTYPE(c); switch(code[2]) { case PT_ANY: @@ -1464,43 +1464,43 @@ for (;;) break; case PT_LAMP: - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt; + OK = chartype == ucp_Lu || chartype == ucp_Ll || + chartype == ucp_Lt; break; case PT_GC: - OK = PRIV(ucp_gentype)[prop->chartype] == code[3]; + OK = PRIV(ucp_gentype)[chartype] == code[3]; break; case PT_PC: - OK = prop->chartype == code[3]; + OK = chartype == code[3]; break; case PT_SC: - OK = prop->script == code[3]; + OK = UCD_SCRIPT(c) == code[3]; break; /* These are specials for combination cases. */ case PT_ALNUM: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N; break; case PT_SPACE: /* Perl space */ - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || + OK = PRIV(ucp_gentype)[chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; break; case PT_PXSPACE: /* POSIX space */ - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || + OK = PRIV(ucp_gentype)[chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_FF || c == CHAR_CR; break; case PT_WORD: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE; break; @@ -1728,7 +1728,7 @@ for (;;) if (clen > 0) { BOOL OK; - const ucd_record * prop = GET_UCD(c); + const pcre_uint8 chartype = UCD_CHARTYPE(c); switch(code[1 + IMM2_SIZE + 1]) { case PT_ANY: @@ -1736,43 +1736,43 @@ for (;;) break; case PT_LAMP: - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt; + OK = chartype == ucp_Lu || chartype == ucp_Ll || + chartype == ucp_Lt; break; case PT_GC: - OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2]; + OK = PRIV(ucp_gentype)[chartype] == code[1 + IMM2_SIZE + 2]; break; case PT_PC: - OK = prop->chartype == code[1 + IMM2_SIZE + 2]; + OK = chartype == code[1 + IMM2_SIZE + 2]; break; case PT_SC: - OK = prop->script == code[1 + IMM2_SIZE + 2]; + OK = UCD_SCRIPT(c) == code[1 + IMM2_SIZE + 2]; break; /* These are specials for combination cases. */ case PT_ALNUM: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N; break; case PT_SPACE: /* Perl space */ - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || + OK = PRIV(ucp_gentype)[chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; break; case PT_PXSPACE: /* POSIX space */ - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || + OK = PRIV(ucp_gentype)[chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_FF || c == CHAR_CR; break; case PT_WORD: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE; break; diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c index b71535355..8eb3162b2 100644 --- a/glib/pcre/pcre_exec.c +++ b/glib/pcre/pcre_exec.c @@ -2507,7 +2507,7 @@ for (;;) } GETCHARINCTEST(c, eptr); { - const ucd_record *prop = GET_UCD(c); + const pcre_uint8 chartype = UCD_CHARTYPE(c); switch(ecode[1]) { @@ -2516,44 +2516,44 @@ for (;;) break; case PT_LAMP: - if ((prop->chartype == ucp_Lu || - prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt) == (op == OP_NOTPROP)) + if ((chartype == ucp_Lu || + chartype == ucp_Ll || + chartype == ucp_Lt) == (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); break; case PT_GC: - if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP)) + if ((ecode[2] != PRIV(ucp_gentype)[chartype]) == (op == OP_PROP)) RRETURN(MATCH_NOMATCH); break; case PT_PC: - if ((ecode[2] != prop->chartype) == (op == OP_PROP)) + if ((ecode[2] != chartype) == (op == OP_PROP)) RRETURN(MATCH_NOMATCH); break; case PT_SC: - if ((ecode[2] != prop->script) == (op == OP_PROP)) + if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP)) RRETURN(MATCH_NOMATCH); break; /* These are specials */ case PT_ALNUM: - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP)) + if ((PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N) == (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); break; case PT_SPACE: /* Perl space */ - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || + if ((PRIV(ucp_gentype)[chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) == (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); break; case PT_PXSPACE: /* POSIX space */ - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || + if ((PRIV(ucp_gentype)[chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) == (op == OP_NOTPROP)) @@ -2561,8 +2561,8 @@ for (;;) break; case PT_WORD: - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || + if ((PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE) == (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); break; diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h index e5a4b6a52..41c7ee3f5 100644 --- a/glib/pcre/pcre_internal.h +++ b/glib/pcre/pcre_internal.h @@ -2315,15 +2315,12 @@ extern const int PRIV(ucp_typerange)[]; #ifdef SUPPORT_UCP /* UCD access macros */ -#define UCD_BLOCK_SIZE 128 -#define GET_UCD(ch) (PRIV(ucd_records) + \ - PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \ - UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE]) +unsigned int _pcre_ucp_othercase(const unsigned int c); -#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype -#define UCD_SCRIPT(ch) GET_UCD(ch)->script +#define UCD_CHARTYPE(ch) (pcre_uint8)g_unichar_type((gunichar)(ch)) +#define UCD_SCRIPT(ch) (pcre_uint8)g_unichar_get_script((gunichar)(ch)) #define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)] -#define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case) +#define UCD_OTHERCASE(ch) (_pcre_ucp_othercase(ch)) #endif /* SUPPORT_UCP */ diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c index c8134ec31..47becc7e6 100644 --- a/glib/pcre/pcre_tables.c +++ b/glib/pcre/pcre_tables.c @@ -563,6 +563,22 @@ const ucp_type_table PRIV(utt)[] = { const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); +unsigned int +_pcre_ucp_othercase(const unsigned int c) +{ + int other_case = NOTACHAR; + + if (g_unichar_islower(c)) + other_case = g_unichar_toupper(c); + else if (g_unichar_isupper(c)) + other_case = g_unichar_tolower(c); + + if (other_case == c) + other_case = NOTACHAR; + + return other_case; +} + #endif /* SUPPORT_UTF */ /* End of pcre_tables.c */ diff --git a/glib/pcre/pcre_xclass.c b/glib/pcre/pcre_xclass.c index dca7a3994..e5a55d7bc 100644 --- a/glib/pcre/pcre_xclass.c +++ b/glib/pcre/pcre_xclass.c @@ -127,7 +127,7 @@ while ((t = *data++) != XCL_END) #ifdef SUPPORT_UCP else /* XCL_PROP & XCL_NOTPROP */ { - const ucd_record *prop = GET_UCD(c); + const pcre_uint8 chartype = UCD_CHARTYPE(c); switch(*data) { @@ -136,46 +136,46 @@ while ((t = *data++) != XCL_END) break; case PT_LAMP: - if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated; + if ((chartype == ucp_Lu || chartype == ucp_Ll || + chartype == ucp_Lt) == (t == XCL_PROP)) return !negated; break; case PT_GC: - if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP)) + if ((data[1] == PRIV(ucp_gentype)[chartype]) == (t == XCL_PROP)) return !negated; break; case PT_PC: - if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated; + if ((data[1] == chartype) == (t == XCL_PROP)) return !negated; break; case PT_SC: - if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated; + if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated; break; case PT_ALNUM: - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP)) + if ((PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N) == (t == XCL_PROP)) return !negated; break; case PT_SPACE: /* Perl space */ - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || + if ((PRIV(ucp_gentype)[chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP)) return !negated; break; case PT_PXSPACE: /* POSIX space */ - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || + if ((PRIV(ucp_gentype)[chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP)) return !negated; break; case PT_WORD: - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE) + if ((PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE) == (t == XCL_PROP)) return !negated; break; diff --git a/glib/pcre/ucp.h b/glib/pcre/ucp.h index 59c3bec3d..53a48c915 100644 --- a/glib/pcre/ucp.h +++ b/glib/pcre/ucp.h @@ -10,6 +10,7 @@ the UCD access macros. New values that are added for new releases of Unicode should always be at the end of each enum, for backwards compatibility. */ /* These are the general character categories. */ +#include "gunicode.h" enum { ucp_C, /* Other */ @@ -24,148 +25,148 @@ enum { /* These are the particular character types. */ enum { - ucp_Cc, /* Control */ - ucp_Cf, /* Format */ - ucp_Cn, /* Unassigned */ - ucp_Co, /* Private use */ - ucp_Cs, /* Surrogate */ - ucp_Ll, /* Lower case letter */ - ucp_Lm, /* Modifier letter */ - ucp_Lo, /* Other letter */ - ucp_Lt, /* Title case letter */ - ucp_Lu, /* Upper case letter */ - ucp_Mc, /* Spacing mark */ - ucp_Me, /* Enclosing mark */ - ucp_Mn, /* Non-spacing mark */ - ucp_Nd, /* Decimal number */ - ucp_Nl, /* Letter number */ - ucp_No, /* Other number */ - ucp_Pc, /* Connector punctuation */ - ucp_Pd, /* Dash punctuation */ - ucp_Pe, /* Close punctuation */ - ucp_Pf, /* Final punctuation */ - ucp_Pi, /* Initial punctuation */ - ucp_Po, /* Other punctuation */ - ucp_Ps, /* Open punctuation */ - ucp_Sc, /* Currency symbol */ - ucp_Sk, /* Modifier symbol */ - ucp_Sm, /* Mathematical symbol */ - ucp_So, /* Other symbol */ - ucp_Zl, /* Line separator */ - ucp_Zp, /* Paragraph separator */ - ucp_Zs /* Space separator */ + ucp_Cc = G_UNICODE_CONTROL, /* Control */ + ucp_Cf = G_UNICODE_FORMAT, /* Format */ + ucp_Cn = G_UNICODE_UNASSIGNED, /* Unassigned */ + ucp_Co = G_UNICODE_PRIVATE_USE, /* Private use */ + ucp_Cs = G_UNICODE_SURROGATE, /* Surrogate */ + ucp_Ll = G_UNICODE_LOWERCASE_LETTER, /* Lower case letter */ + ucp_Lm = G_UNICODE_MODIFIER_LETTER, /* Modifier letter */ + ucp_Lo = G_UNICODE_OTHER_LETTER, /* Other letter */ + ucp_Lt = G_UNICODE_TITLECASE_LETTER, /* Title case letter */ + ucp_Lu = G_UNICODE_UPPERCASE_LETTER, /* Upper case letter */ + ucp_Mc = G_UNICODE_SPACING_MARK, /* Spacing mark */ + ucp_Me = G_UNICODE_ENCLOSING_MARK, /* Enclosing mark */ + ucp_Mn = G_UNICODE_NON_SPACING_MARK, /* Non-spacing mark */ + ucp_Nd = G_UNICODE_DECIMAL_NUMBER, /* Decimal number */ + ucp_Nl = G_UNICODE_LETTER_NUMBER, /* Letter number */ + ucp_No = G_UNICODE_OTHER_NUMBER, /* Other number */ + ucp_Pc = G_UNICODE_CONNECT_PUNCTUATION, /* Connector punctuation */ + ucp_Pd = G_UNICODE_DASH_PUNCTUATION, /* Dash punctuation */ + ucp_Pe = G_UNICODE_CLOSE_PUNCTUATION, /* Close punctuation */ + ucp_Pf = G_UNICODE_FINAL_PUNCTUATION, /* Final punctuation */ + ucp_Pi = G_UNICODE_INITIAL_PUNCTUATION, /* Initial punctuation */ + ucp_Po = G_UNICODE_OTHER_PUNCTUATION, /* Other punctuation */ + ucp_Ps = G_UNICODE_OPEN_PUNCTUATION, /* Open punctuation */ + ucp_Sc = G_UNICODE_CURRENCY_SYMBOL, /* Currency symbol */ + ucp_Sk = G_UNICODE_MODIFIER_SYMBOL, /* Modifier symbol */ + ucp_Sm = G_UNICODE_MATH_SYMBOL, /* Mathematical symbol */ + ucp_So = G_UNICODE_OTHER_SYMBOL, /* Other symbol */ + ucp_Zl = G_UNICODE_LINE_SEPARATOR, /* Line separator */ + ucp_Zp = G_UNICODE_PARAGRAPH_SEPARATOR, /* Paragraph separator */ + ucp_Zs = G_UNICODE_SPACE_SEPARATOR /* Space separator */ }; /* These are the script identifications. */ enum { - ucp_Arabic, - ucp_Armenian, - ucp_Bengali, - ucp_Bopomofo, - ucp_Braille, - ucp_Buginese, - ucp_Buhid, - ucp_Canadian_Aboriginal, - ucp_Cherokee, - ucp_Common, - ucp_Coptic, - ucp_Cypriot, - ucp_Cyrillic, - ucp_Deseret, - ucp_Devanagari, - ucp_Ethiopic, - ucp_Georgian, - ucp_Glagolitic, - ucp_Gothic, - ucp_Greek, - ucp_Gujarati, - ucp_Gurmukhi, - ucp_Han, - ucp_Hangul, - ucp_Hanunoo, - ucp_Hebrew, - ucp_Hiragana, - ucp_Inherited, - ucp_Kannada, - ucp_Katakana, - ucp_Kharoshthi, - ucp_Khmer, - ucp_Lao, - ucp_Latin, - ucp_Limbu, - ucp_Linear_B, - ucp_Malayalam, - ucp_Mongolian, - ucp_Myanmar, - ucp_New_Tai_Lue, - ucp_Ogham, - ucp_Old_Italic, - ucp_Old_Persian, - ucp_Oriya, - ucp_Osmanya, - ucp_Runic, - ucp_Shavian, - ucp_Sinhala, - ucp_Syloti_Nagri, - ucp_Syriac, - ucp_Tagalog, - ucp_Tagbanwa, - ucp_Tai_Le, - ucp_Tamil, - ucp_Telugu, - ucp_Thaana, - ucp_Thai, - ucp_Tibetan, - ucp_Tifinagh, - ucp_Ugaritic, - ucp_Yi, + ucp_Arabic = G_UNICODE_SCRIPT_ARABIC, + ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN, + ucp_Bengali = G_UNICODE_SCRIPT_BENGALI, + ucp_Bopomofo = G_UNICODE_SCRIPT_BOPOMOFO, + ucp_Braille = G_UNICODE_SCRIPT_BRAILLE, + ucp_Buginese = G_UNICODE_SCRIPT_BUGINESE, + ucp_Buhid = G_UNICODE_SCRIPT_BUHID, + ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, + ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE, + ucp_Common = G_UNICODE_SCRIPT_COMMON, + ucp_Coptic = G_UNICODE_SCRIPT_COPTIC, + ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT, + ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC, + ucp_Deseret = G_UNICODE_SCRIPT_DESERET, + ucp_Devanagari = G_UNICODE_SCRIPT_DEVANAGARI, + ucp_Ethiopic = G_UNICODE_SCRIPT_ETHIOPIC, + ucp_Georgian = G_UNICODE_SCRIPT_GEORGIAN, + ucp_Glagolitic = G_UNICODE_SCRIPT_GLAGOLITIC, + ucp_Gothic = G_UNICODE_SCRIPT_GOTHIC, + ucp_Greek = G_UNICODE_SCRIPT_GREEK, + ucp_Gujarati = G_UNICODE_SCRIPT_GUJARATI, + ucp_Gurmukhi = G_UNICODE_SCRIPT_GURMUKHI, + ucp_Han = G_UNICODE_SCRIPT_HAN, + ucp_Hangul = G_UNICODE_SCRIPT_HANGUL, + ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO, + ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW, + ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA, + ucp_Inherited = G_UNICODE_SCRIPT_INHERITED, + ucp_Kannada = G_UNICODE_SCRIPT_KANNADA, + ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA, + ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI, + ucp_Khmer = G_UNICODE_SCRIPT_KHMER, + ucp_Lao = G_UNICODE_SCRIPT_LAO, + ucp_Latin = G_UNICODE_SCRIPT_LATIN, + ucp_Limbu = G_UNICODE_SCRIPT_LIMBU, + ucp_Linear_B = G_UNICODE_SCRIPT_LINEAR_B, + ucp_Malayalam = G_UNICODE_SCRIPT_MALAYALAM, + ucp_Mongolian = G_UNICODE_SCRIPT_MONGOLIAN, + ucp_Myanmar = G_UNICODE_SCRIPT_MYANMAR, + ucp_New_Tai_Lue = G_UNICODE_SCRIPT_NEW_TAI_LUE, + ucp_Ogham = G_UNICODE_SCRIPT_OGHAM, + ucp_Old_Italic = G_UNICODE_SCRIPT_OLD_ITALIC, + ucp_Old_Persian = G_UNICODE_SCRIPT_OLD_PERSIAN, + ucp_Oriya = G_UNICODE_SCRIPT_ORIYA, + ucp_Osmanya = G_UNICODE_SCRIPT_OSMANYA, + ucp_Runic = G_UNICODE_SCRIPT_RUNIC, + ucp_Shavian = G_UNICODE_SCRIPT_SHAVIAN, + ucp_Sinhala = G_UNICODE_SCRIPT_SINHALA, + ucp_Syloti_Nagri = G_UNICODE_SCRIPT_SYLOTI_NAGRI, + ucp_Syriac = G_UNICODE_SCRIPT_SYRIAC, + ucp_Tagalog = G_UNICODE_SCRIPT_TAGALOG, + ucp_Tagbanwa = G_UNICODE_SCRIPT_TAGBANWA, + ucp_Tai_Le = G_UNICODE_SCRIPT_TAI_LE, + ucp_Tamil = G_UNICODE_SCRIPT_TAMIL, + ucp_Telugu = G_UNICODE_SCRIPT_TELUGU, + ucp_Thaana = G_UNICODE_SCRIPT_THAANA, + ucp_Thai = G_UNICODE_SCRIPT_THAI, + ucp_Tibetan = G_UNICODE_SCRIPT_TIBETAN, + ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH, + ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC, + ucp_Yi = G_UNICODE_SCRIPT_YI, /* New for Unicode 5.0: */ - ucp_Balinese, - ucp_Cuneiform, - ucp_Nko, - ucp_Phags_Pa, - ucp_Phoenician, + ucp_Balinese = G_UNICODE_SCRIPT_BALINESE, + ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM, + ucp_Nko = G_UNICODE_SCRIPT_NKO, + ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA, + ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN, /* New for Unicode 5.1: */ - ucp_Carian, - ucp_Cham, - ucp_Kayah_Li, - ucp_Lepcha, - ucp_Lycian, - ucp_Lydian, - ucp_Ol_Chiki, - ucp_Rejang, - ucp_Saurashtra, - ucp_Sundanese, - ucp_Vai, + ucp_Carian = G_UNICODE_SCRIPT_CARIAN, + ucp_Cham = G_UNICODE_SCRIPT_CHAM, + ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI, + ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA, + ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN, + ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN, + ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI, + ucp_Rejang = G_UNICODE_SCRIPT_REJANG, + ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA, + ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE, + ucp_Vai = G_UNICODE_SCRIPT_VAI, /* New for Unicode 5.2: */ - ucp_Avestan, - ucp_Bamum, - ucp_Egyptian_Hieroglyphs, - ucp_Imperial_Aramaic, - ucp_Inscriptional_Pahlavi, - ucp_Inscriptional_Parthian, - ucp_Javanese, - ucp_Kaithi, - ucp_Lisu, - ucp_Meetei_Mayek, - ucp_Old_South_Arabian, - ucp_Old_Turkic, - ucp_Samaritan, - ucp_Tai_Tham, - ucp_Tai_Viet, + ucp_Avestan = G_UNICODE_SCRIPT_AVESTAN, + ucp_Bamum = G_UNICODE_SCRIPT_BAMUM, + ucp_Egyptian_Hieroglyphs = G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS, + ucp_Imperial_Aramaic = G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC, + ucp_Inscriptional_Pahlavi = G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI, + ucp_Inscriptional_Parthian = G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN, + ucp_Javanese = G_UNICODE_SCRIPT_JAVANESE, + ucp_Kaithi = G_UNICODE_SCRIPT_KAITHI, + ucp_Lisu = G_UNICODE_SCRIPT_LISU, + ucp_Meetei_Mayek = G_UNICODE_SCRIPT_MEETEI_MAYEK, + ucp_Old_South_Arabian = G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN, + ucp_Old_Turkic = G_UNICODE_SCRIPT_OLD_TURKIC, + ucp_Samaritan = G_UNICODE_SCRIPT_SAMARITAN, + ucp_Tai_Tham = G_UNICODE_SCRIPT_TAI_THAM, + ucp_Tai_Viet = G_UNICODE_SCRIPT_TAI_VIET, /* New for Unicode 6.0.0: */ - ucp_Batak, - ucp_Brahmi, - ucp_Mandaic, + ucp_Batak = G_UNICODE_SCRIPT_BATAK, + ucp_Brahmi = G_UNICODE_SCRIPT_BRAHMI, + ucp_Mandaic = G_UNICODE_SCRIPT_MANDAIC, /* New for Unicode 6.1.0: */ - ucp_Chakma, - ucp_Meroitic_Cursive, - ucp_Meroitic_Hieroglyphs, - ucp_Miao, - ucp_Sharada, - ucp_Sora_Sompeng, - ucp_Takri + ucp_Chakma = G_UNICODE_SCRIPT_CHAKMA, + ucp_Meroitic_Cursive = G_UNICODE_SCRIPT_MEROITIC_CURSIVE, + ucp_Meroitic_Hieroglyphs = G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS, + ucp_Miao = G_UNICODE_SCRIPT_MIAO, + ucp_Sharada = G_UNICODE_SCRIPT_SHARADA, + ucp_Sora_Sompeng = G_UNICODE_SCRIPT_SORA_SOMPENG, + ucp_Takri = G_UNICODE_SCRIPT_TAKRI, }; #endif |