summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbar@gw.udmsearch.izhnet.ru <>2002-06-17 22:43:40 +0500
committerbar@gw.udmsearch.izhnet.ru <>2002-06-17 22:43:40 +0500
commitf4f4962d4d64b9b8013aa982778db13ad5934d89 (patch)
tree0d23f2ed3374c600690f394ae85990763c2869b7
parenteeb81610721eec0b6e692f39e60396b2c28d8569 (diff)
downloadmariadb-git-f4f4962d4d64b9b8013aa982778db13ad5934d89.tar.gz
UCS2 charset has been added
-rw-r--r--acconfig.h1
-rw-r--r--configure.in8
-rw-r--r--include/m_ctype.h34
-rw-r--r--sql/share/charsets/Index1
-rw-r--r--strings/ctype-utf8.c309
-rw-r--r--strings/ctype.c33
6 files changed, 384 insertions, 2 deletions
diff --git a/acconfig.h b/acconfig.h
index bcf07015534..cf276d0b848 100644
--- a/acconfig.h
+++ b/acconfig.h
@@ -94,6 +94,7 @@
#undef HAVE_CHARSET_sjis
#undef HAVE_CHARSET_swe7
#undef HAVE_CHARSET_tis620
+#undef HAVE_CHARSET_ucs2
#undef HAVE_CHARSET_ujis
#undef HAVE_CHARSET_usa7
#undef HAVE_CHARSET_utf8
diff --git a/configure.in b/configure.in
index 53c235fd9e1..c8b3c7af13f 100644
--- a/configure.in
+++ b/configure.in
@@ -1942,10 +1942,10 @@ AC_DIVERT_PUSH(0)
CHARSETS_AVAILABLE="armscii8 big5 cp1251 cp1257
croat czech danish dec8 dos estonia euc_kr gb2312 gbk
german1 greek hebrew hp8 hungarian koi8_ru koi8_ukr
- latin1 latin1_de latin2 latin5 sjis swe7 tis620 ujis
+ latin1 latin1_de latin2 latin5 sjis swe7 tis620 ucs2 ujis
usa7 utf8 win1250 win1250ch win1251ukr"
CHARSETS_DEPRECATED="win1251"
-CHARSETS_COMPLEX="big5 czech euc_kr gb2312 gbk latin1_de sjis tis620 ujis utf8 win1250ch"
+CHARSETS_COMPLEX="big5 czech euc_kr gb2312 gbk latin1_de sjis tis620 ucs2 ujis utf8 win1250ch"
DEFAULT_CHARSET=latin1
AC_DIVERT_POP
@@ -2067,6 +2067,10 @@ do
tis620)
AC_DEFINE(HAVE_CHARSET_tis620)
;;
+ ucs2)
+ AC_DEFINE(HAVE_CHARSET_ucs2)
+ use_mb="yes"
+ ;;
ujis)
AC_DEFINE(HAVE_CHARSET_ujis)
use_mb="yes"
diff --git a/include/m_ctype.h b/include/m_ctype.h
index d8593e20fd1..3715402b4f4 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -287,6 +287,40 @@ void my_hash_sort_utf8(struct charset_info_st *cs, const uchar *key, uint len, u
#endif
+
+#ifdef HAVE_CHARSET_ucs2
+
+extern uchar ctype_ucs2[];
+extern uchar to_lower_ucs2[];
+extern uchar to_upper_ucs2[];
+
+int my_strnncoll_ucs2(CHARSET_INFO *cs,
+ const uchar *s, uint s_len, const uchar *t, uint t_len);
+
+int my_strnxfrm_ucs2(CHARSET_INFO *cs,
+ uchar *dest, uint destlen, const uchar *src, uint srclen);
+
+int my_ismbchar_ucs2(CHARSET_INFO *cs, const char *b, const char *e);
+my_bool my_ismbhead_ucs2(CHARSET_INFO * cs, uint ch);
+int my_mbcharlen_ucs2(CHARSET_INFO *cs, uint c);
+
+void my_caseup_str_ucs2(CHARSET_INFO * cs, char * s);
+void my_casedn_str_ucs2(CHARSET_INFO *cs, char * s);
+void my_caseup_ucs2(CHARSET_INFO *cs, char *s, uint len);
+void my_casedn_ucs2(CHARSET_INFO *cs, char *s, uint len);
+
+int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t);
+int my_strncasecmp_ucs2(CHARSET_INFO *cs, const char *s,const char *t,uint l);
+
+int my_ucs2_uni (CHARSET_INFO *cs, my_wc_t *p, const uchar *s, const uchar *e);
+int my_uni_ucs2 (CHARSET_INFO *cs, my_wc_t wc, uchar *b, uchar *e);
+
+uint my_hash_caseup_ucs2(struct charset_info_st *cs, const byte *key, uint len);
+void my_hash_sort_ucs2(struct charset_info_st *cs, const uchar *key, uint len, ulong *nr1, ulong *nr2);
+
+#endif
+
+
#define _U 01 /* Upper case */
#define _L 02 /* Lower case */
#define _NMR 04 /* Numeral (digit) */
diff --git a/sql/share/charsets/Index b/sql/share/charsets/Index
index 075cdc9872b..52cb6b99705 100644
--- a/sql/share/charsets/Index
+++ b/sql/share/charsets/Index
@@ -39,3 +39,4 @@ latin1_de 31
armscii8 32
utf8 33
win1250ch 34
+ucs2 35
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index a4485e1ef50..1cce8819619 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -25,6 +25,10 @@
#define HAVE_UNIDATA
#endif
+#ifdef HAVE_CHARSET_ucs2
+#define HAVE_UNIDATA
+#endif
+
#ifdef HAVE_UNIDATA
static MY_UNICASE_INFO plane00[]={
@@ -1999,4 +2003,309 @@ int main()
#endif
+#endif /* HAVE_CHARSET_UTF8 */
+
+
+
+#ifdef HAVE_CHARSET_ucs2
+
+uchar ctype_ucs2[] = {
+ 0,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
+ 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
+ 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+uchar to_lower_ucs2[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
+ 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
+ 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
+ 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
+ 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+ 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+ 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+ 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
+ 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
+ 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
+};
+
+uchar to_upper_ucs2[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+ 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
+ 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+ 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+ 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+ 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
+ 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
+ 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
+};
+
+
+int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)) ,
+ my_wc_t * pwc, const uchar *s, const uchar *e)
+{
+ if (s+2 > e) /* Need 2 characters */
+ return MY_CS_ILSEQ;
+
+ *pwc= ((unsigned char)s[0]) * 256 + ((unsigned char)s[1]);
+ return 2;
+}
+
+int my_uni_ucs2 (CHARSET_INFO *cs __attribute__((unused)) ,
+ my_wc_t wc, uchar *r, uchar *e)
+{
+ if ( r+2 > e )
+ return MY_CS_TOOSMALL;
+
+ r[0]=wc >> 8;
+ r[1]=wc & 0xFF;
+ return 2;
+}
+
+
+void my_caseup_ucs2(CHARSET_INFO *cs, char *s, uint slen)
+{
+ my_wc_t wc;
+ int res;
+ char *e=s+slen;
+
+ while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
+ {
+ int plane = (wc>>8) & 0xFF;
+ wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
+ if (res != my_uni_ucs2(cs,wc,(uchar*)s,(uchar*)e))
+ break;
+ s+=res;
+ }
+}
+
+uint my_hash_caseup_ucs2(CHARSET_INFO *cs, const byte *s, uint slen)
+{
+ my_wc_t wc;
+ register uint nr=1, nr2=4;
+ int res;
+ const char *e=s+slen;
+
+ while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
+ {
+ int plane = (wc>>8) & 0xFF;
+ wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
+ nr^= (((nr & 63)+nr2)*(wc & 0xFF))+ (nr << 8);
+ nr2+=3;
+ nr^= (((nr & 63)+nr2)*(wc >> 8))+ (nr << 8);
+ nr2+=3;
+
+ s+=res;
+ }
+
+ return nr;
+}
+
+
+void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, ulong *n1, ulong *n2)
+{
+ my_wc_t wc;
+ int res;
+ const uchar *e=s+slen;
+
+ while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
+ {
+ int plane = (wc>>8) & 0xFF;
+ wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
+ n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8);
+ n2[0]+=3;
+ n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
+ n2[0]+=3;
+ s+=res;
+ }
+}
+
+
+void my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)),
+ char * s __attribute__((unused)))
+{
+}
+
+
+
+void my_casedn_ucs2(CHARSET_INFO *cs, char *s, uint slen)
+{
+ my_wc_t wc;
+ int res;
+ char *e=s+slen;
+
+ while ((s < e) && (res=my_ucs2_uni(cs, &wc, (uchar*)s, (uchar*)e))>0)
+ {
+ int plane = (wc>>8) & 0xFF;
+ wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
+ if (res != my_uni_ucs2(cs, wc, (uchar*)s, (uchar*)e))
+ {
+ break;
+ }
+ s+=res;
+ }
+}
+
+void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
+ char * s __attribute__((unused)))
+{
+}
+
+
+int my_strnncoll_ucs2(CHARSET_INFO *cs,
+ const uchar *s, uint slen, const uchar *t, uint tlen)
+{
+ int s_res,t_res;
+ my_wc_t s_wc,t_wc;
+ const uchar *se=s+slen;
+ const uchar *te=t+tlen;
+
+ while ( s < se && t < te )
+ {
+ int plane;
+ s_res=my_ucs2_uni(cs,&s_wc, s, se);
+ t_res=my_ucs2_uni(cs,&t_wc, t, te);
+
+ if ( s_res <= 0 || t_res <= 0 )
+ {
+ /* Incorrect string, compare by char value */
+ return ((int)s[0]-(int)t[0]);
+ }
+
+ plane=(s_wc>>8) & 0xFF;
+ s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
+ plane=(t_wc>>8) & 0xFF;
+ t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
+ if ( s_wc != t_wc )
+ {
+ return ((int) s_wc) - ((int) t_wc);
+ }
+
+ s+=s_res;
+ t+=t_res;
+ }
+ return ( (se-s) - (te-t) );
+}
+
+int my_strncasecmp_ucs2(CHARSET_INFO *cs,
+ const char *s, const char *t, uint len)
+{
+ int s_res,t_res;
+ my_wc_t s_wc,t_wc;
+ const char *se=s+len;
+ const char *te=t+len;
+
+ while ( s < se && t < te )
+ {
+ int plane;
+
+ s_res=my_ucs2_uni(cs,&s_wc, (const uchar*)s, (const uchar*)se);
+ t_res=my_ucs2_uni(cs,&t_wc, (const uchar*)t, (const uchar*)te);
+
+ if ( s_res <= 0 || t_res <= 0 )
+ {
+ /* Incorrect string, compare by char value */
+ return ((int)s[0]-(int)t[0]);
+ }
+
+ plane=(s_wc>>8) & 0xFF;
+ s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].tolower : s_wc;
+
+ plane=(t_wc>>8) & 0xFF;
+ t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].tolower : t_wc;
+
+ if ( s_wc != t_wc )
+ return ((int) s_wc) - ((int) t_wc);
+
+ s+=s_res;
+ t+=t_res;
+ }
+ return ( (se-s) - (te-t) );
+}
+
+int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t)
+{
+ uint s_len=strlen(s);
+ uint t_len=strlen(t);
+ uint len = (s_len > t_len) ? s_len : t_len;
+ return my_strncasecmp_ucs2(cs, s, t, len);
+}
+
+int my_strnxfrm_ucs2(CHARSET_INFO *cs,
+ uchar *dst, uint dstlen, const uchar *src, uint srclen)
+{
+ my_wc_t wc;
+ int res;
+ int plane;
+ uchar *de = dst + dstlen;
+ const uchar *se = src + srclen;
+ const uchar *dst_orig = dst;
+
+ while( src < se && dst < de )
+ {
+ if ((res=my_ucs2_uni(cs,&wc, src, se))<0)
+ {
+ break;
+ }
+ src+=res;
+ srclen-=res;
+
+ plane=(wc>>8) & 0xFF;
+ wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
+
+ if ((res=my_uni_ucs2(cs,wc,dst,de)) <0)
+ {
+ break;
+ }
+ dst+=res;
+ }
+ return dst - dst_orig;
+}
+
+int my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)),
+ const char *b __attribute__((unused)),
+ const char *e __attribute__((unused)))
+{
+ return 2;
+}
+
+my_bool my_ismbhead_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
+ uint ch __attribute__((unused)))
+{
+ return 1;
+}
+
+int my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
+ uint c __attribute__((unused)))
+{
+ return 2;
+}
+
#endif
diff --git a/strings/ctype.c b/strings/ctype.c
index 0c20db35da2..7c37eeff986 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -3635,6 +3635,39 @@ CHARSET_INFO compiled_charsets[] = {
},
#endif
+#ifdef HAVE_CHARSET_ucs2
+ {
+ 35, /* number */
+ "ucs2", /* name */
+ ctype_ucs2, /* ctype */
+ to_lower_ucs2, /* to_lower */
+ to_upper_ucs2, /* to_upper */
+ to_upper_ucs2, /* sort_order */
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ 1, /* strxfrm_multiply */
+ my_strnncoll_ucs2, /* strnncoll */
+ my_strnxfrm_ucs2, /* strnxfrm */
+ NULL, /* like_range */
+ 2, /* mbmaxlen */
+ my_ismbchar_ucs2, /* ismbchar */
+ my_ismbhead_ucs2, /* ismbhead */
+ my_mbcharlen_ucs2, /* mbcharlen */
+ my_ucs2_uni, /* mb_wc */
+ my_uni_ucs2, /* wc_mb */
+ my_caseup_str_ucs2,
+ my_casedn_str_ucs2,
+ my_caseup_ucs2,
+ my_casedn_ucs2,
+ my_strcasecmp_ucs2,
+ my_strncasecmp_ucs2,
+ my_hash_caseup_ucs2,/* hash_caseup */
+ my_hash_sort_ucs2, /* hash_sort */
+ 0
+ },
+#endif
+
+
#ifdef HAVE_CHARSET_ujis
{
12, /* number */