summaryrefslogtreecommitdiff
path: root/mysys
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mysql.com>2010-02-24 13:15:34 +0400
committerAlexander Barkov <bar@mysql.com>2010-02-24 13:15:34 +0400
commit8994fad85db18b4ab31fc67e2f8e15f1203d0b1a (patch)
tree469ee723904a6610a4436b55056ed5e2c19d9972 /mysys
parentd2af6c43c0f7b62a6051d0c5f74c66f98dbefe7a (diff)
downloadmariadb-git-8994fad85db18b4ab31fc67e2f8e15f1203d0b1a.tar.gz
Backporting WL#1213
config/ac-macros/character_sets.m4: - Adding configure definitions for utf8mb4, utf16, utf32 include/config-win.h: - Enabling utf8mb4, utf16, utf32 in Windows build include/m_ctype.h: - Adding new flags - Adding new shared functions prototypes mysql-test/include/ctype_datetime.inc: - Adding test to check that datetime functions work with "real" multibyte character sets. mysql-test/include/ctype_like.inc: - Adding LIKE tests mysql-test/include/have_utf16.inc: New file mysql-test/include/have_utf32.inc: New file mysql-test/include/have_utf8mb4.inc: New file mysql-test/r/ctype_ldml.result: - Adding tests for utf8mb4, utf16, utf32 mysql-test/r/ctype_many.result: - Adding tests to check superset/subset relations between all Unicode character sets. mysql-test/r/ctype_utf16.result: New file mysql-test/r/ctype_utf16_uca.result: New file mysql-test/r/ctype_utf32.result: New file mysql-test/r/ctype_utf32_uca.result: New file mysql-test/r/ctype_utf8.result: - Adding tests for utf8mn3 alias mysql-test/r/ctype_utf8mb4.result: - Adding tests for utf8mb4 mysql-test/r/have_utf16.require: New file mysql-test/r/have_utf32.require: New file mysql-test/r/have_utf8mb4.require: New file mysql-test/std_data/Index.xml: - Adding tests for loadable utf8m4, utf16, utf32 collations mysql-test/suite/sys_vars/r/character_set_client_basic.result: - Adding tests for utf16, utf32. - Fixing new number of character sets mysql-test/suite/sys_vars/r/character_set_connection_basic.result: - Fixing new number of character sets mysql-test/suite/sys_vars/r/character_set_database_basic.result: - Fixing new number of character sets mysql-test/suite/sys_vars/r/character_set_filesystem_basic.result: - Fixing new number of character sets mysql-test/suite/sys_vars/r/character_set_results_basic.result: - Fixing new number of character sets mysql-test/suite/sys_vars/t/character_set_client_basic.test: - Adding tests for new character sets mysql-test/suite/sys_vars/t/character_set_connection_basic.test: - Adding dependency on utf8mb4, utf16, utf32 mysql-test/suite/sys_vars/t/character_set_database_basic.test: - Adding dependency on utf8mb4, utf16, utf32 mysql-test/suite/sys_vars/t/character_set_filesystem_basic.test: - Adding dependency on utf8mb4, utf16, utf32 mysql-test/suite/sys_vars/t/character_set_results_basic.test: - Adding dependency on utf8mb4, utf16, utf32 mysql-test/t/ctype_ldml.test: - Adding tests for dynamic utf8mb4, utf16, utf32 collations mysql-test/t/ctype_many.test: - Adding tests to check superset/subset relations between all Unicode character sets mysql-test/t/ctype_utf16.test: New file mysql-test/t/ctype_utf16_uca.test: New file mysql-test/t/ctype_utf32.test: New file mysql-test/t/ctype_utf32_uca.test: New file mysql-test/t/ctype_utf8.test: - Adding tests for utf8mb4 alias mysql-test/t/ctype_utf8mb4.test: New file mysys/charset-def.c: - Adding initialization of utf8mb4, utf16, utf32 built-int collations mysys/charset.c: - Adding initialization of utf8mb4, utf16, utf32 dynamic collations sql/field.cc: - Fixing "truncated" error with datetime functions: Force conversion in case of non-ascii character sets. sql/item.cc: - Adding superset/subset relation check for utf8mb4/utf8 sql/item_strfunc.cc: - Fixing a problem with CHAR(x USING utf32) sql/sql_string.cc: - Fixing problems with zero padding for UTF32 sql/sql_table.cc: - Fixing buffer size, to make utf32 comma fit. strings/ctype-mb.c: - Making handlers for multi-byte binary collations public strings/ctype-uca.c: - Adding definitions for utf8mb4, utf16, utf32 UCA collations strings/ctype-ucs2.c: - Adding functions which are shared between ucs2, utf16, utf32 - Ading utf16 implementation - Adding utf32 implementation strings/ctype-utf8.c: - Adding functions shared between utf8 and utf8mb4 - Adding implementation of utf8mb4
Diffstat (limited to 'mysys')
-rw-r--r--mysys/charset-def.c155
-rw-r--r--mysys/charset.c69
2 files changed, 219 insertions, 5 deletions
diff --git a/mysys/charset-def.c b/mysys/charset-def.c
index bf2576621ce..9089347f002 100644
--- a/mysys/charset-def.c
+++ b/mysys/charset-def.c
@@ -45,6 +45,53 @@ extern CHARSET_INFO my_charset_ucs2_hungarian_uca_ci;
extern CHARSET_INFO my_charset_ucs2_sinhala_uca_ci;
#endif
+
+#ifdef HAVE_CHARSET_utf32
+extern CHARSET_INFO my_charset_utf32_icelandic_uca_ci;
+extern CHARSET_INFO my_charset_utf32_latvian_uca_ci;
+extern CHARSET_INFO my_charset_utf32_romanian_uca_ci;
+extern CHARSET_INFO my_charset_utf32_slovenian_uca_ci;
+extern CHARSET_INFO my_charset_utf32_polish_uca_ci;
+extern CHARSET_INFO my_charset_utf32_estonian_uca_ci;
+extern CHARSET_INFO my_charset_utf32_spanish_uca_ci;
+extern CHARSET_INFO my_charset_utf32_swedish_uca_ci;
+extern CHARSET_INFO my_charset_utf32_turkish_uca_ci;
+extern CHARSET_INFO my_charset_utf32_czech_uca_ci;
+extern CHARSET_INFO my_charset_utf32_danish_uca_ci;
+extern CHARSET_INFO my_charset_utf32_lithuanian_uca_ci;
+extern CHARSET_INFO my_charset_utf32_slovak_uca_ci;
+extern CHARSET_INFO my_charset_utf32_spanish2_uca_ci;
+extern CHARSET_INFO my_charset_utf32_roman_uca_ci;
+extern CHARSET_INFO my_charset_utf32_persian_uca_ci;
+extern CHARSET_INFO my_charset_utf32_esperanto_uca_ci;
+extern CHARSET_INFO my_charset_utf32_hungarian_uca_ci;
+extern CHARSET_INFO my_charset_utf32_sinhala_uca_ci;
+#endif /* HAVE_CHARSET_utf32 */
+
+
+#ifdef HAVE_CHARSET_utf16
+extern CHARSET_INFO my_charset_utf16_icelandic_uca_ci;
+extern CHARSET_INFO my_charset_utf16_latvian_uca_ci;
+extern CHARSET_INFO my_charset_utf16_romanian_uca_ci;
+extern CHARSET_INFO my_charset_utf16_slovenian_uca_ci;
+extern CHARSET_INFO my_charset_utf16_polish_uca_ci;
+extern CHARSET_INFO my_charset_utf16_estonian_uca_ci;
+extern CHARSET_INFO my_charset_utf16_spanish_uca_ci;
+extern CHARSET_INFO my_charset_utf16_swedish_uca_ci;
+extern CHARSET_INFO my_charset_utf16_turkish_uca_ci;
+extern CHARSET_INFO my_charset_utf16_czech_uca_ci;
+extern CHARSET_INFO my_charset_utf16_danish_uca_ci;
+extern CHARSET_INFO my_charset_utf16_lithuanian_uca_ci;
+extern CHARSET_INFO my_charset_utf16_slovak_uca_ci;
+extern CHARSET_INFO my_charset_utf16_spanish2_uca_ci;
+extern CHARSET_INFO my_charset_utf16_roman_uca_ci;
+extern CHARSET_INFO my_charset_utf16_persian_uca_ci;
+extern CHARSET_INFO my_charset_utf16_esperanto_uca_ci;
+extern CHARSET_INFO my_charset_utf16_hungarian_uca_ci;
+extern CHARSET_INFO my_charset_utf16_sinhala_uca_ci;
+#endif /* HAVE_CHARSET_utf16 */
+
+
#ifdef HAVE_CHARSET_utf8
extern CHARSET_INFO my_charset_utf8_icelandic_uca_ci;
extern CHARSET_INFO my_charset_utf8_latvian_uca_ci;
@@ -70,6 +117,28 @@ extern CHARSET_INFO my_charset_utf8_general_cs;
#endif
#endif
+#ifdef HAVE_CHARSET_utf8mb4
+extern CHARSET_INFO my_charset_utf8mb4_icelandic_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_latvian_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_romanian_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_slovenian_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_polish_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_estonian_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_spanish_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_swedish_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_turkish_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_czech_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_danish_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_lithuanian_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_slovak_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_spanish2_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_roman_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_persian_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_esperanto_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_hungarian_uca_ci;
+extern CHARSET_INFO my_charset_utf8mb4_sinhala_uca_ci;
+#endif /* HAVE_CHARSET_utf8mb4 */
+
#endif /* HAVE_UCA_COLLATIONS */
my_bool init_compiled_charsets(myf flags __attribute__((unused)))
@@ -191,7 +260,91 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
add_compiled_collation(&my_charset_utf8_hungarian_uca_ci);
add_compiled_collation(&my_charset_utf8_sinhala_uca_ci);
#endif
-#endif
+#endif /* HAVE_CHARSET_utf8 */
+
+
+#ifdef HAVE_CHARSET_utf8mb4
+ add_compiled_collation(&my_charset_utf8mb4_general_ci);
+ add_compiled_collation(&my_charset_utf8mb4_bin);
+#ifdef HAVE_UCA_COLLATIONS
+ add_compiled_collation(&my_charset_utf8mb4_unicode_ci);
+ add_compiled_collation(&my_charset_utf8mb4_icelandic_uca_ci);
+ add_compiled_collation(&my_charset_utf8mb4_latvian_uca_ci);
+ add_compiled_collation(&my_charset_utf8mb4_romanian_uca_ci);
+ add_compiled_collation(&my_charset_utf8mb4_slovenian_uca_ci);
+ add_compiled_collation(&my_charset_utf8mb4_polish_uca_ci);
+ add_compiled_collation(&my_charset_utf8mb4_estonian_uca_ci);
+ add_compiled_collation(&my_charset_utf8mb4_spanish_uca_ci);
+ add_compiled_collation(&my_charset_utf8mb4_swedish_uca_ci);
+ add_compiled_collation(&my_charset_utf8mb4_turkish_uca_ci);
+ add_compiled_collation(&my_charset_utf8mb4_czech_uca_ci);
+ add_compiled_collation(&my_charset_utf8mb4_danish_uca_ci);
+ add_compiled_collation(&my_charset_utf8mb4_lithuanian_uca_ci);
+ add_compiled_collation(&my_charset_utf8mb4_slovak_uca_ci);
+ add_compiled_collation(&my_charset_utf8mb4_spanish2_uca_ci);
+ add_compiled_collation(&my_charset_utf8mb4_roman_uca_ci);
+ add_compiled_collation(&my_charset_utf8mb4_persian_uca_ci);
+ add_compiled_collation(&my_charset_utf8mb4_esperanto_uca_ci);
+ add_compiled_collation(&my_charset_utf8mb4_hungarian_uca_ci);
+ add_compiled_collation(&my_charset_utf8mb4_sinhala_uca_ci);
+#endif /* HAVE_UCA_COLLATIONS */
+#endif /* HAVE_CHARSET_utf8mb4 */
+
+
+#ifdef HAVE_CHARSET_utf16
+ add_compiled_collation(&my_charset_utf16_general_ci);
+ add_compiled_collation(&my_charset_utf16_bin);
+#ifdef HAVE_UCA_COLLATIONS
+ add_compiled_collation(&my_charset_utf16_unicode_ci);
+ add_compiled_collation(&my_charset_utf16_icelandic_uca_ci);
+ add_compiled_collation(&my_charset_utf16_latvian_uca_ci);
+ add_compiled_collation(&my_charset_utf16_romanian_uca_ci);
+ add_compiled_collation(&my_charset_utf16_slovenian_uca_ci);
+ add_compiled_collation(&my_charset_utf16_polish_uca_ci);
+ add_compiled_collation(&my_charset_utf16_estonian_uca_ci);
+ add_compiled_collation(&my_charset_utf16_spanish_uca_ci);
+ add_compiled_collation(&my_charset_utf16_swedish_uca_ci);
+ add_compiled_collation(&my_charset_utf16_turkish_uca_ci);
+ add_compiled_collation(&my_charset_utf16_czech_uca_ci);
+ add_compiled_collation(&my_charset_utf16_danish_uca_ci);
+ add_compiled_collation(&my_charset_utf16_lithuanian_uca_ci);
+ add_compiled_collation(&my_charset_utf16_slovak_uca_ci);
+ add_compiled_collation(&my_charset_utf16_spanish2_uca_ci);
+ add_compiled_collation(&my_charset_utf16_roman_uca_ci);
+ add_compiled_collation(&my_charset_utf16_persian_uca_ci);
+ add_compiled_collation(&my_charset_utf16_esperanto_uca_ci);
+ add_compiled_collation(&my_charset_utf16_hungarian_uca_ci);
+ add_compiled_collation(&my_charset_utf16_sinhala_uca_ci);
+#endif /* HAVE_UCA_COLLATIOINS */
+#endif /* HAVE_CHARSET_utf16 */
+
+
+#ifdef HAVE_CHARSET_utf32
+ add_compiled_collation(&my_charset_utf32_general_ci);
+ add_compiled_collation(&my_charset_utf32_bin);
+#ifdef HAVE_UCA_COLLATIONS
+ add_compiled_collation(&my_charset_utf32_unicode_ci);
+ add_compiled_collation(&my_charset_utf32_icelandic_uca_ci);
+ add_compiled_collation(&my_charset_utf32_latvian_uca_ci);
+ add_compiled_collation(&my_charset_utf32_romanian_uca_ci);
+ add_compiled_collation(&my_charset_utf32_slovenian_uca_ci);
+ add_compiled_collation(&my_charset_utf32_polish_uca_ci);
+ add_compiled_collation(&my_charset_utf32_estonian_uca_ci);
+ add_compiled_collation(&my_charset_utf32_spanish_uca_ci);
+ add_compiled_collation(&my_charset_utf32_swedish_uca_ci);
+ add_compiled_collation(&my_charset_utf32_turkish_uca_ci);
+ add_compiled_collation(&my_charset_utf32_czech_uca_ci);
+ add_compiled_collation(&my_charset_utf32_danish_uca_ci);
+ add_compiled_collation(&my_charset_utf32_lithuanian_uca_ci);
+ add_compiled_collation(&my_charset_utf32_slovak_uca_ci);
+ add_compiled_collation(&my_charset_utf32_spanish2_uca_ci);
+ add_compiled_collation(&my_charset_utf32_roman_uca_ci);
+ add_compiled_collation(&my_charset_utf32_persian_uca_ci);
+ add_compiled_collation(&my_charset_utf32_esperanto_uca_ci);
+ add_compiled_collation(&my_charset_utf32_hungarian_uca_ci);
+ add_compiled_collation(&my_charset_utf32_sinhala_uca_ci);
+#endif /* HAVE_UCA_COLLATIONS */
+#endif /* HAVE_CHARSET_utf32 */
/* Copy compiled charsets */
for (cs=compiled_charsets; cs->name; cs++)
diff --git a/mysys/charset.c b/mysys/charset.c
index 0cd4fcff56c..b4991555263 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -252,7 +252,7 @@ static int add_collation(CHARSET_INFO *cs)
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
#endif
}
- else if (!strcmp(cs->csname, "utf8"))
+ else if (!strcmp(cs->csname, "utf8") || !strcmp(cs->csname, "utf8mb3"))
{
#if defined (HAVE_CHARSET_utf8) && defined(HAVE_UCA_COLLATIONS)
copy_uca_collation(newcs, &my_charset_utf8_unicode_ci);
@@ -261,6 +261,28 @@ static int add_collation(CHARSET_INFO *cs)
return MY_XML_ERROR;
#endif
}
+ else if (!strcmp(cs->csname, "utf8mb4"))
+ {
+#if defined (HAVE_CHARSET_utf8mb4) && defined(HAVE_UCA_COLLATIONS)
+ copy_uca_collation(newcs, &my_charset_utf8mb4_unicode_ci);
+ newcs->ctype= my_charset_utf8mb4_unicode_ci.ctype;
+ newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
+#endif
+ }
+ else if (!strcmp(cs->csname, "utf16"))
+ {
+#if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS)
+ copy_uca_collation(newcs, &my_charset_utf16_unicode_ci);
+ newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
+#endif
+ }
+ else if (!strcmp(cs->csname, "utf32"))
+ {
+#if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS)
+ copy_uca_collation(newcs, &my_charset_utf32_unicode_ci);
+ newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
+#endif
+ }
else
{
uchar *sort_order= all_charsets[cs->number]->sort_order;
@@ -433,17 +455,35 @@ static void init_available_charsets(void)
}
+static const char*
+get_collation_name_alias(const char *name, char *buf, size_t bufsize)
+{
+ if (!strncasecmp(name, "utf8mb3_", 8))
+ {
+ my_snprintf(buf, bufsize, "utf8_%s", name + 8);
+ return buf;
+ }
+ return NULL;
+}
+
+
uint get_collation_number(const char *name)
{
+ uint id;
+ char alias[64];
my_pthread_once(&charsets_initialized, init_available_charsets);
- return get_collation_number_internal(name);
+ if ((id= get_collation_number_internal(name)))
+ return id;
+ if ((name= get_collation_name_alias(name, alias, sizeof(alias))))
+ return get_collation_number_internal(name);
+ return 0;
}
-uint get_charset_number(const char *charset_name, uint cs_flags)
+static uint
+get_charset_number_internal(const char *charset_name, uint cs_flags)
{
CHARSET_INFO **cs;
- my_pthread_once(&charsets_initialized, init_available_charsets);
for (cs= all_charsets;
cs < all_charsets + array_elements(all_charsets);
@@ -457,6 +497,27 @@ uint get_charset_number(const char *charset_name, uint cs_flags)
}
+static const char*
+get_charset_name_alias(const char *name)
+{
+ if (!my_strcasecmp(&my_charset_latin1, name, "utf8mb3"))
+ return "utf8";
+ return NULL;
+}
+
+
+uint get_charset_number(const char *charset_name, uint cs_flags)
+{
+ uint id;
+ my_pthread_once(&charsets_initialized, init_available_charsets);
+ if ((id= get_charset_number_internal(charset_name, cs_flags)))
+ return id;
+ if ((charset_name= get_charset_name_alias(charset_name)))
+ return get_charset_number_internal(charset_name, cs_flags);
+ return 0;
+}
+
+
const char *get_charset_name(uint charset_number)
{
CHARSET_INFO *cs;