diff options
Diffstat (limited to 'mysys/charset.c')
-rw-r--r-- | mysys/charset.c | 183 |
1 files changed, 152 insertions, 31 deletions
diff --git a/mysys/charset.c b/mysys/charset.c index 3a39fce9437..e61d08cbc33 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -97,7 +97,7 @@ static my_bool init_state_maps(CHARSET_INFO *cs) /* Special handling of hex and binary strings */ state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX; - state_map[(uchar)'b']= state_map[(uchar)'b']= (uchar) MY_LEX_IDENT_OR_BIN; + state_map[(uchar)'b']= state_map[(uchar)'B']= (uchar) MY_LEX_IDENT_OR_BIN; state_map[(uchar)'n']= state_map[(uchar)'N']= (uchar) MY_LEX_IDENT_OR_NCHAR; return 0; } @@ -549,10 +549,10 @@ CHARSET_INFO *get_charset_by_csname(const char *cs_name, DBUG_PRINT("enter",("name: '%s'", cs_name)); (void) init_available_charsets(MYF(0)); /* If it isn't initialized */ - + cs_number= get_charset_number(cs_name, cs_flags); cs= cs_number ? get_internal_charset(cs_number, flags) : NULL; - + if (!cs && (flags & MY_WME)) { char index_file[FN_REFLEN]; @@ -564,21 +564,54 @@ CHARSET_INFO *get_charset_by_csname(const char *cs_name, } -ulong escape_string_for_mysql(CHARSET_INFO *charset_info, char *to, +/* + Escape string with backslashes (\) + + SYNOPSIS + escape_string_for_mysql() + charset_info Charset of the strings + to Buffer for escaped string + to_length Length of destination buffer, or 0 + from The string to escape + length The length of the string to escape + + DESCRIPTION + This escapes the contents of a string by adding backslashes before special + characters, and turning others into specific escape sequences, such as + turning newlines into \n and null bytes into \0. + + NOTE + To maintain compatibility with the old C API, to_length may be 0 to mean + "big enough" + + RETURN VALUES + ~0 The escaped string did not fit in the to buffer + >=0 The length of the escaped string +*/ + +ulong escape_string_for_mysql(CHARSET_INFO *charset_info, + char *to, ulong to_length, const char *from, ulong length) { const char *to_start= to; - const char *end; + const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length); + my_bool overflow= FALSE; #ifdef USE_MB my_bool use_mb_flag= use_mb(charset_info); #endif - for (end= from + length; from != end; from++) + for (end= from + length; from < end; from++) { + char escape= 0; #ifdef USE_MB - int l; - if (use_mb_flag && (l= my_ismbchar(charset_info, from, end))) + int tmp_length; + if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end))) { - while (l--) + if (to + tmp_length > to_end) + { + overflow= TRUE; + break; + } + while (tmp_length--) *to++= *from++; from--; continue; @@ -594,48 +627,55 @@ ulong escape_string_for_mysql(CHARSET_INFO *charset_info, char *to, multi-byte character into a valid one. For example, 0xbf27 is not a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \) */ - if (use_mb_flag && (l= my_mbcharlen(charset_info, *from)) > 1) - { - *to++= '\\'; - *to++= *from; - continue; - } + if (use_mb_flag && (tmp_length= my_mbcharlen(charset_info, *from)) > 1) + escape= *from; + else #endif switch (*from) { case 0: /* Must be escaped for 'mysql' */ - *to++= '\\'; - *to++= '0'; + escape= '0'; break; case '\n': /* Must be escaped for logs */ - *to++= '\\'; - *to++= 'n'; + escape= 'n'; break; case '\r': - *to++= '\\'; - *to++= 'r'; + escape= 'r'; break; case '\\': - *to++= '\\'; - *to++= '\\'; + escape= '\\'; break; case '\'': - *to++= '\\'; - *to++= '\''; + escape= '\''; break; case '"': /* Better safe than sorry */ - *to++= '\\'; - *to++= '"'; + escape= '"'; break; case '\032': /* This gives problems on Win32 */ - *to++= '\\'; - *to++= 'Z'; + escape= 'Z'; break; - default: + } + if (escape) + { + if (to + 2 > to_end) + { + overflow= TRUE; + break; + } + *to++= '\\'; + *to++= escape; + } + else + { + if (to + 1 > to_end) + { + overflow= TRUE; + break; + } *to++= *from; } } *to= 0; - return (ulong) (to - to_start); + return overflow ? (ulong)~0 : (ulong) (to - to_start); } @@ -663,3 +703,84 @@ CHARSET_INFO *fs_character_set() return fs_cset_cache; } #endif + +/* + Escape apostrophes by doubling them up + + SYNOPSIS + escape_quotes_for_mysql() + charset_info Charset of the strings + to Buffer for escaped string + to_length Length of destination buffer, or 0 + from The string to escape + length The length of the string to escape + + DESCRIPTION + This escapes the contents of a string by doubling up any apostrophes that + it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in + effect on the server. + + NOTE + To be consistent with escape_string_for_mysql(), to_length may be 0 to + mean "big enough" + + RETURN VALUES + ~0 The escaped string did not fit in the to buffer + >=0 The length of the escaped string +*/ + +ulong escape_quotes_for_mysql(CHARSET_INFO *charset_info, + char *to, ulong to_length, + const char *from, ulong length) +{ + const char *to_start= to; + const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length); + my_bool overflow= FALSE; +#ifdef USE_MB + my_bool use_mb_flag= use_mb(charset_info); +#endif + for (end= from + length; from < end; from++) + { +#ifdef USE_MB + int tmp_length; + if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end))) + { + if (to + tmp_length > to_end) + { + overflow= TRUE; + break; + } + while (tmp_length--) + *to++= *from++; + from--; + continue; + } + /* + We don't have the same issue here with a non-multi-byte character being + turned into a multi-byte character by the addition of an escaping + character, because we are only escaping the ' character with itself. + */ +#endif + if (*from == '\'') + { + if (to + 2 > to_end) + { + overflow= TRUE; + break; + } + *to++= '\''; + *to++= '\''; + } + else + { + if (to + 1 > to_end) + { + overflow= TRUE; + break; + } + *to++= *from; + } + } + *to= 0; + return overflow ? (ulong)~0 : (ulong) (to - to_start); +} |