From af0a3afe054cc2be3600180cfdeef28761be2f7e Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 7 Jun 2007 17:55:55 +0500 Subject: Bug#28916 LDML doesn't work for utf8 and is not described in the manual - Adding missing initialization for utf8 collations - Minor code clean-ups: renaming variables, moving code into a new separate function. - Adding test, to check that both ucs2 and utf8 user defined collations work (ucs2_test_ci and utf8_test_ci) - Adding Vietnamese collation as a complex user defined collation example. include/m_ctype.h: Renaming variable names to match collation names (for convenience). mysys/charset-def.c: - Removing redundant declarations for variables declared in m_ctype.h - Renaming variable names to match collation names (for convenience). mysys/charset.c: - Renaming "new" to "newcs", to avoid using C reserved word as a variable name - Moving UCA initialization code into a separate function - The bug fix itself: adding initialization of utf8 collations strings/ctype-uca.c: Renaming variable names to match collation names (for convenience). strings/ctype.c: Increasing buffer size to fit tailoring for languages with complex rules (e.g. Vietnamese). mysql-test/r/ctype_ldml.result: Adding test case mysql-test/std_data/Index.xml: Adding Index.xml example with user defined collations. mysql-test/t/ctype_ldml-master.opt: Adding OPT file for the test case, to use the example Index.xml file. mysql-test/t/ctype_ldml.test: Adding test case --- mysys/charset-def.c | 6 ++---- mysys/charset.c | 36 +++++++++++++++++++++++++----------- 2 files changed, 27 insertions(+), 15 deletions(-) (limited to 'mysys') diff --git a/mysys/charset-def.c b/mysys/charset-def.c index 394fe6de1f3..36ff4139d9c 100644 --- a/mysys/charset-def.c +++ b/mysys/charset-def.c @@ -24,7 +24,6 @@ #ifdef HAVE_UCA_COLLATIONS #ifdef HAVE_CHARSET_ucs2 -extern CHARSET_INFO my_charset_ucs2_general_uca; extern CHARSET_INFO my_charset_ucs2_icelandic_uca_ci; extern CHARSET_INFO my_charset_ucs2_latvian_uca_ci; extern CHARSET_INFO my_charset_ucs2_romanian_uca_ci; @@ -46,7 +45,6 @@ extern CHARSET_INFO my_charset_ucs2_hungarian_uca_ci; #endif #ifdef HAVE_CHARSET_utf8 -extern CHARSET_INFO my_charset_utf8_general_uca_ci; extern CHARSET_INFO my_charset_utf8_icelandic_uca_ci; extern CHARSET_INFO my_charset_utf8_latvian_uca_ci; extern CHARSET_INFO my_charset_utf8_romanian_uca_ci; @@ -134,7 +132,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused))) add_compiled_collation(&my_charset_ucs2_general_ci); add_compiled_collation(&my_charset_ucs2_bin); #ifdef HAVE_UCA_COLLATIONS - add_compiled_collation(&my_charset_ucs2_general_uca); + add_compiled_collation(&my_charset_ucs2_unicode_ci); add_compiled_collation(&my_charset_ucs2_icelandic_uca_ci); add_compiled_collation(&my_charset_ucs2_latvian_uca_ci); add_compiled_collation(&my_charset_ucs2_romanian_uca_ci); @@ -168,7 +166,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused))) add_compiled_collation(&my_charset_utf8_general_cs); #endif #ifdef HAVE_UCA_COLLATIONS - add_compiled_collation(&my_charset_utf8_general_uca_ci); + add_compiled_collation(&my_charset_utf8_unicode_ci); add_compiled_collation(&my_charset_utf8_icelandic_uca_ci); add_compiled_collation(&my_charset_utf8_latvian_uca_ci); add_compiled_collation(&my_charset_utf8_romanian_uca_ci); diff --git a/mysys/charset.c b/mysys/charset.c index cce97677b14..9ea17c6515c 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -202,6 +202,19 @@ static my_bool simple_cs_is_full(CHARSET_INFO *cs) } +static void +copy_uca_collation(CHARSET_INFO *to, CHARSET_INFO *from) +{ + to->cset= from->cset; + to->coll= from->coll; + to->strxfrm_multiply= from->strxfrm_multiply; + to->min_sort_char= from->min_sort_char; + to->max_sort_char= from->max_sort_char; + to->mbminlen= from->mbminlen; + to->mbmaxlen= from->mbmaxlen; +} + + static int add_collation(CHARSET_INFO *cs) { if (cs->name && (cs->number || @@ -225,29 +238,30 @@ static int add_collation(CHARSET_INFO *cs) if (!(all_charsets[cs->number]->state & MY_CS_COMPILED)) { - CHARSET_INFO *new= all_charsets[cs->number]; + CHARSET_INFO *newcs= all_charsets[cs->number]; if (cs_copy_data(all_charsets[cs->number],cs)) return MY_XML_ERROR; if (!strcmp(cs->csname,"ucs2") ) { #if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS) - new->cset= my_charset_ucs2_general_uca.cset; - new->coll= my_charset_ucs2_general_uca.coll; - new->strxfrm_multiply= my_charset_ucs2_general_uca.strxfrm_multiply; - new->min_sort_char= my_charset_ucs2_general_uca.min_sort_char; - new->max_sort_char= my_charset_ucs2_general_uca.max_sort_char; - new->mbminlen= 2; - new->mbmaxlen= 2; - new->state |= MY_CS_AVAILABLE | MY_CS_LOADED; + copy_uca_collation(newcs, &my_charset_ucs2_unicode_ci); + newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED; #endif + } + else if (!strcmp(cs->csname, "utf8")) + { +#if defined (HAVE_CHARSET_utf8) && defined(HAVE_UCA_COLLATIONS) + copy_uca_collation(newcs, &my_charset_utf8_unicode_ci); + newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED; +#endif } else { uchar *sort_order= all_charsets[cs->number]->sort_order; simple_cs_init_functions(all_charsets[cs->number]); - new->mbminlen= 1; - new->mbmaxlen= 1; + newcs->mbminlen= 1; + newcs->mbmaxlen= 1; if (simple_cs_is_full(all_charsets[cs->number])) { all_charsets[cs->number]->state |= MY_CS_LOADED; -- cgit v1.2.1