diff options
author | bar@gw.udmsearch.izhnet.ru <> | 2002-06-17 22:43:40 +0500 |
---|---|---|
committer | bar@gw.udmsearch.izhnet.ru <> | 2002-06-17 22:43:40 +0500 |
commit | f4f4962d4d64b9b8013aa982778db13ad5934d89 (patch) | |
tree | 0d23f2ed3374c600690f394ae85990763c2869b7 | |
parent | eeb81610721eec0b6e692f39e60396b2c28d8569 (diff) | |
download | mariadb-git-f4f4962d4d64b9b8013aa982778db13ad5934d89.tar.gz |
UCS2 charset has been added
-rw-r--r-- | acconfig.h | 1 | ||||
-rw-r--r-- | configure.in | 8 | ||||
-rw-r--r-- | include/m_ctype.h | 34 | ||||
-rw-r--r-- | sql/share/charsets/Index | 1 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 309 | ||||
-rw-r--r-- | strings/ctype.c | 33 |
6 files changed, 384 insertions, 2 deletions
diff --git a/acconfig.h b/acconfig.h index bcf07015534..cf276d0b848 100644 --- a/acconfig.h +++ b/acconfig.h @@ -94,6 +94,7 @@ #undef HAVE_CHARSET_sjis #undef HAVE_CHARSET_swe7 #undef HAVE_CHARSET_tis620 +#undef HAVE_CHARSET_ucs2 #undef HAVE_CHARSET_ujis #undef HAVE_CHARSET_usa7 #undef HAVE_CHARSET_utf8 diff --git a/configure.in b/configure.in index 53c235fd9e1..c8b3c7af13f 100644 --- a/configure.in +++ b/configure.in @@ -1942,10 +1942,10 @@ AC_DIVERT_PUSH(0) CHARSETS_AVAILABLE="armscii8 big5 cp1251 cp1257 croat czech danish dec8 dos estonia euc_kr gb2312 gbk german1 greek hebrew hp8 hungarian koi8_ru koi8_ukr - latin1 latin1_de latin2 latin5 sjis swe7 tis620 ujis + latin1 latin1_de latin2 latin5 sjis swe7 tis620 ucs2 ujis usa7 utf8 win1250 win1250ch win1251ukr" CHARSETS_DEPRECATED="win1251" -CHARSETS_COMPLEX="big5 czech euc_kr gb2312 gbk latin1_de sjis tis620 ujis utf8 win1250ch" +CHARSETS_COMPLEX="big5 czech euc_kr gb2312 gbk latin1_de sjis tis620 ucs2 ujis utf8 win1250ch" DEFAULT_CHARSET=latin1 AC_DIVERT_POP @@ -2067,6 +2067,10 @@ do tis620) AC_DEFINE(HAVE_CHARSET_tis620) ;; + ucs2) + AC_DEFINE(HAVE_CHARSET_ucs2) + use_mb="yes" + ;; ujis) AC_DEFINE(HAVE_CHARSET_ujis) use_mb="yes" diff --git a/include/m_ctype.h b/include/m_ctype.h index d8593e20fd1..3715402b4f4 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -287,6 +287,40 @@ void my_hash_sort_utf8(struct charset_info_st *cs, const uchar *key, uint len, u #endif + +#ifdef HAVE_CHARSET_ucs2 + +extern uchar ctype_ucs2[]; +extern uchar to_lower_ucs2[]; +extern uchar to_upper_ucs2[]; + +int my_strnncoll_ucs2(CHARSET_INFO *cs, + const uchar *s, uint s_len, const uchar *t, uint t_len); + +int my_strnxfrm_ucs2(CHARSET_INFO *cs, + uchar *dest, uint destlen, const uchar *src, uint srclen); + +int my_ismbchar_ucs2(CHARSET_INFO *cs, const char *b, const char *e); +my_bool my_ismbhead_ucs2(CHARSET_INFO * cs, uint ch); +int my_mbcharlen_ucs2(CHARSET_INFO *cs, uint c); + +void my_caseup_str_ucs2(CHARSET_INFO * cs, char * s); +void my_casedn_str_ucs2(CHARSET_INFO *cs, char * s); +void my_caseup_ucs2(CHARSET_INFO *cs, char *s, uint len); +void my_casedn_ucs2(CHARSET_INFO *cs, char *s, uint len); + +int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t); +int my_strncasecmp_ucs2(CHARSET_INFO *cs, const char *s,const char *t,uint l); + +int my_ucs2_uni (CHARSET_INFO *cs, my_wc_t *p, const uchar *s, const uchar *e); +int my_uni_ucs2 (CHARSET_INFO *cs, my_wc_t wc, uchar *b, uchar *e); + +uint my_hash_caseup_ucs2(struct charset_info_st *cs, const byte *key, uint len); +void my_hash_sort_ucs2(struct charset_info_st *cs, const uchar *key, uint len, ulong *nr1, ulong *nr2); + +#endif + + #define _U 01 /* Upper case */ #define _L 02 /* Lower case */ #define _NMR 04 /* Numeral (digit) */ diff --git a/sql/share/charsets/Index b/sql/share/charsets/Index index 075cdc9872b..52cb6b99705 100644 --- a/sql/share/charsets/Index +++ b/sql/share/charsets/Index @@ -39,3 +39,4 @@ latin1_de 31 armscii8 32 utf8 33 win1250ch 34 +ucs2 35 diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index a4485e1ef50..1cce8819619 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -25,6 +25,10 @@ #define HAVE_UNIDATA #endif +#ifdef HAVE_CHARSET_ucs2 +#define HAVE_UNIDATA +#endif + #ifdef HAVE_UNIDATA static MY_UNICASE_INFO plane00[]={ @@ -1999,4 +2003,309 @@ int main() #endif +#endif /* HAVE_CHARSET_UTF8 */ + + + +#ifdef HAVE_CHARSET_ucs2 + +uchar ctype_ucs2[] = { + 0, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16, + 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16, + 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +uchar to_lower_ucs2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95, + 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 +}; + +uchar to_upper_ucs2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 +}; + + +int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)) , + my_wc_t * pwc, const uchar *s, const uchar *e) +{ + if (s+2 > e) /* Need 2 characters */ + return MY_CS_ILSEQ; + + *pwc= ((unsigned char)s[0]) * 256 + ((unsigned char)s[1]); + return 2; +} + +int my_uni_ucs2 (CHARSET_INFO *cs __attribute__((unused)) , + my_wc_t wc, uchar *r, uchar *e) +{ + if ( r+2 > e ) + return MY_CS_TOOSMALL; + + r[0]=wc >> 8; + r[1]=wc & 0xFF; + return 2; +} + + +void my_caseup_ucs2(CHARSET_INFO *cs, char *s, uint slen) +{ + my_wc_t wc; + int res; + char *e=s+slen; + + while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 ) + { + int plane = (wc>>8) & 0xFF; + wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc; + if (res != my_uni_ucs2(cs,wc,(uchar*)s,(uchar*)e)) + break; + s+=res; + } +} + +uint my_hash_caseup_ucs2(CHARSET_INFO *cs, const byte *s, uint slen) +{ + my_wc_t wc; + register uint nr=1, nr2=4; + int res; + const char *e=s+slen; + + while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 ) + { + int plane = (wc>>8) & 0xFF; + wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc; + nr^= (((nr & 63)+nr2)*(wc & 0xFF))+ (nr << 8); + nr2+=3; + nr^= (((nr & 63)+nr2)*(wc >> 8))+ (nr << 8); + nr2+=3; + + s+=res; + } + + return nr; +} + + +void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, ulong *n1, ulong *n2) +{ + my_wc_t wc; + int res; + const uchar *e=s+slen; + + while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 ) + { + int plane = (wc>>8) & 0xFF; + wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc; + n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8); + n2[0]+=3; + n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8); + n2[0]+=3; + s+=res; + } +} + + +void my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)), + char * s __attribute__((unused))) +{ +} + + + +void my_casedn_ucs2(CHARSET_INFO *cs, char *s, uint slen) +{ + my_wc_t wc; + int res; + char *e=s+slen; + + while ((s < e) && (res=my_ucs2_uni(cs, &wc, (uchar*)s, (uchar*)e))>0) + { + int plane = (wc>>8) & 0xFF; + wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc; + if (res != my_uni_ucs2(cs, wc, (uchar*)s, (uchar*)e)) + { + break; + } + s+=res; + } +} + +void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)), + char * s __attribute__((unused))) +{ +} + + +int my_strnncoll_ucs2(CHARSET_INFO *cs, + const uchar *s, uint slen, const uchar *t, uint tlen) +{ + int s_res,t_res; + my_wc_t s_wc,t_wc; + const uchar *se=s+slen; + const uchar *te=t+tlen; + + while ( s < se && t < te ) + { + int plane; + s_res=my_ucs2_uni(cs,&s_wc, s, se); + t_res=my_ucs2_uni(cs,&t_wc, t, te); + + if ( s_res <= 0 || t_res <= 0 ) + { + /* Incorrect string, compare by char value */ + return ((int)s[0]-(int)t[0]); + } + + plane=(s_wc>>8) & 0xFF; + s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc; + plane=(t_wc>>8) & 0xFF; + t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc; + if ( s_wc != t_wc ) + { + return ((int) s_wc) - ((int) t_wc); + } + + s+=s_res; + t+=t_res; + } + return ( (se-s) - (te-t) ); +} + +int my_strncasecmp_ucs2(CHARSET_INFO *cs, + const char *s, const char *t, uint len) +{ + int s_res,t_res; + my_wc_t s_wc,t_wc; + const char *se=s+len; + const char *te=t+len; + + while ( s < se && t < te ) + { + int plane; + + s_res=my_ucs2_uni(cs,&s_wc, (const uchar*)s, (const uchar*)se); + t_res=my_ucs2_uni(cs,&t_wc, (const uchar*)t, (const uchar*)te); + + if ( s_res <= 0 || t_res <= 0 ) + { + /* Incorrect string, compare by char value */ + return ((int)s[0]-(int)t[0]); + } + + plane=(s_wc>>8) & 0xFF; + s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].tolower : s_wc; + + plane=(t_wc>>8) & 0xFF; + t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].tolower : t_wc; + + if ( s_wc != t_wc ) + return ((int) s_wc) - ((int) t_wc); + + s+=s_res; + t+=t_res; + } + return ( (se-s) - (te-t) ); +} + +int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t) +{ + uint s_len=strlen(s); + uint t_len=strlen(t); + uint len = (s_len > t_len) ? s_len : t_len; + return my_strncasecmp_ucs2(cs, s, t, len); +} + +int my_strnxfrm_ucs2(CHARSET_INFO *cs, + uchar *dst, uint dstlen, const uchar *src, uint srclen) +{ + my_wc_t wc; + int res; + int plane; + uchar *de = dst + dstlen; + const uchar *se = src + srclen; + const uchar *dst_orig = dst; + + while( src < se && dst < de ) + { + if ((res=my_ucs2_uni(cs,&wc, src, se))<0) + { + break; + } + src+=res; + srclen-=res; + + plane=(wc>>8) & 0xFF; + wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc; + + if ((res=my_uni_ucs2(cs,wc,dst,de)) <0) + { + break; + } + dst+=res; + } + return dst - dst_orig; +} + +int my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)), + const char *b __attribute__((unused)), + const char *e __attribute__((unused))) +{ + return 2; +} + +my_bool my_ismbhead_ucs2(CHARSET_INFO *cs __attribute__((unused)) , + uint ch __attribute__((unused))) +{ + return 1; +} + +int my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) , + uint c __attribute__((unused))) +{ + return 2; +} + #endif diff --git a/strings/ctype.c b/strings/ctype.c index 0c20db35da2..7c37eeff986 100644 --- a/strings/ctype.c +++ b/strings/ctype.c @@ -3635,6 +3635,39 @@ CHARSET_INFO compiled_charsets[] = { }, #endif +#ifdef HAVE_CHARSET_ucs2 + { + 35, /* number */ + "ucs2", /* name */ + ctype_ucs2, /* ctype */ + to_lower_ucs2, /* to_lower */ + to_upper_ucs2, /* to_upper */ + to_upper_ucs2, /* sort_order */ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + 1, /* strxfrm_multiply */ + my_strnncoll_ucs2, /* strnncoll */ + my_strnxfrm_ucs2, /* strnxfrm */ + NULL, /* like_range */ + 2, /* mbmaxlen */ + my_ismbchar_ucs2, /* ismbchar */ + my_ismbhead_ucs2, /* ismbhead */ + my_mbcharlen_ucs2, /* mbcharlen */ + my_ucs2_uni, /* mb_wc */ + my_uni_ucs2, /* wc_mb */ + my_caseup_str_ucs2, + my_casedn_str_ucs2, + my_caseup_ucs2, + my_casedn_ucs2, + my_strcasecmp_ucs2, + my_strncasecmp_ucs2, + my_hash_caseup_ucs2,/* hash_caseup */ + my_hash_sort_ucs2, /* hash_sort */ + 0 + }, +#endif + + #ifdef HAVE_CHARSET_ujis { 12, /* number */ |