diff options
Diffstat (limited to 'mysys/charset.c')
-rw-r--r-- | mysys/charset.c | 69 |
1 files changed, 44 insertions, 25 deletions
diff --git a/mysys/charset.c b/mysys/charset.c index 32cfeb56e2d..dbb2749d217 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -722,25 +722,26 @@ void free_charsets(void) static const char* -get_collation_name_alias(const char *name, char *buf, size_t bufsize) +get_collation_name_alias(const char *name, char *buf, size_t bufsize, myf flags) { - if (!strncasecmp(name, "utf8mb3_", 8)) + if (!strncasecmp(name, "utf8_", 5)) { - my_snprintf(buf, bufsize, "utf8_%s", name + 8); + my_snprintf(buf, bufsize, "utf8mb%c_%s", + flags & MY_UTF8_IS_UTF8MB3 ? '3' : '4', name + 5); return buf; } return NULL; } -uint get_collation_number(const char *name) +uint get_collation_number(const char *name, myf flags) { uint id; char alias[64]; my_pthread_once(&charsets_initialized, init_available_charsets); if ((id= get_collation_number_internal(name))) return id; - if ((name= get_collation_name_alias(name, alias, sizeof(alias)))) + if ((name= get_collation_name_alias(name, alias, sizeof(alias),flags))) return get_collation_number_internal(name); return 0; } @@ -763,22 +764,13 @@ get_charset_number_internal(const char *charset_name, uint cs_flags) } -static const char* -get_charset_name_alias(const char *name) -{ - if (!my_strcasecmp(&my_charset_latin1, name, "utf8mb3")) - return "utf8"; - return NULL; -} - - uint get_charset_number(const char *charset_name, uint cs_flags) { uint id; my_pthread_once(&charsets_initialized, init_available_charsets); if ((id= get_charset_number_internal(charset_name, cs_flags))) return id; - if ((charset_name= get_charset_name_alias(charset_name))) + if ((charset_name= !my_strcasecmp(&my_charset_latin1, charset_name, "utf8") ? "utf8mb3" : NULL)) return get_charset_number_internal(charset_name, cs_flags); return 0; } @@ -820,7 +812,7 @@ static CHARSET_INFO *find_collation_data_inheritance_source(CHARSET_INFO *cs) char name[MY_CS_NAME_SIZE + 1]; memcpy(name, beg, end - beg); name[end - beg]= '\0'; - return inheritance_source_by_id(cs, get_collation_number(name)); + return inheritance_source_by_id(cs, get_collation_number(name,MYF(0))); } return NULL; } @@ -944,7 +936,7 @@ my_collation_get_by_name(MY_CHARSET_LOADER *loader, CHARSET_INFO *cs; my_pthread_once(&charsets_initialized, init_available_charsets); - cs_number= get_collation_number(name); + cs_number= get_collation_number(name,flags); my_charset_loader_init_mysys(loader); cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL; @@ -961,7 +953,28 @@ my_collation_get_by_name(MY_CHARSET_LOADER *loader, CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags) { MY_CHARSET_LOADER loader; + my_bool utf8_is_utf8mb3= flags & MY_UTF8_IS_UTF8MB3 ? 1 : 0; + char *copy_of_name= (char*)cs_name; + char start[6], result[64]; + char *temp_cs_name; + my_charset_loader_init_mysys(&loader); + + if (!strcasecmp("utf8",copy_of_name)) + cs_name = (const char*)(utf8_is_utf8mb3 ? "utf8mb3" : "utf8mb4"); + + strncpy(start, cs_name, 5); + temp_cs_name= (char *)(utf8_is_utf8mb3 ? "utf8mb3_":"utf8mb4_"); + + if (!strncasecmp("utf8_", start,5)) + { + copy_of_name+= 5; + result[63]='\0'; + strcpy(result, temp_cs_name); + strcat(result, copy_of_name); + result[strlen(copy_of_name)+strlen(temp_cs_name)]='\0'; + cs_name= (const char *) result; + } return my_collation_get_by_name(&loader, cs_name, flags); } @@ -1005,12 +1018,16 @@ get_charset_by_csname(const char *cs_name, uint cs_flags, myf flags) { MY_CHARSET_LOADER loader; my_charset_loader_init_mysys(&loader); + + if (!strcasecmp("utf8",cs_name)) + cs_name= (const char*)(flags & MY_UTF8_IS_UTF8MB3 ? "utf8mb3" : "utf8mb4"); + return my_charset_get_by_name(&loader, cs_name, cs_flags, flags); } /** - Resolve character set by the character set name (utf8, latin1, ...). + Resolve character set by the character set name (utf8mb3, latin1, ...). The function tries to resolve character set by the specified name. If there is character set with the given name, it is assigned to the "cs" @@ -1027,9 +1044,10 @@ get_charset_by_csname(const char *cs_name, uint cs_flags, myf flags) my_bool resolve_charset(const char *cs_name, CHARSET_INFO *default_cs, - CHARSET_INFO **cs) + CHARSET_INFO **cs, + myf flags) { - *cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0)); + *cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, flags); if (*cs == NULL) { @@ -1059,9 +1077,10 @@ my_bool resolve_charset(const char *cs_name, my_bool resolve_collation(const char *cl_name, CHARSET_INFO *default_cl, - CHARSET_INFO **cl) + CHARSET_INFO **cl, + myf my_flags) { - *cl= get_charset_by_name(cl_name, MYF(0)); + *cl= get_charset_by_name(cl_name, my_flags); if (*cl == NULL) { @@ -1369,7 +1388,7 @@ static const MY_CSET_OS_NAME charsets[] = #ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE {"cp54936", "gb18030", my_cs_exact}, #endif - {"cp65001", "utf8", my_cs_exact}, + {"cp65001", "utf8mb3", my_cs_exact}, #else /* not Windows */ @@ -1453,8 +1472,8 @@ static const MY_CSET_OS_NAME charsets[] = {"US-ASCII", "latin1", my_cs_approx}, - {"utf8", "utf8", my_cs_exact}, - {"utf-8", "utf8", my_cs_exact}, + {"utf8mb3", "utf8mb3", my_cs_exact}, + {"utf-8", "utf8mb3", my_cs_exact}, #endif {NULL, NULL, 0} }; |