summaryrefslogtreecommitdiff
path: root/mysys/charset.c
diff options
context:
space:
mode:
Diffstat (limited to 'mysys/charset.c')
-rw-r--r--mysys/charset.c183
1 files changed, 152 insertions, 31 deletions
diff --git a/mysys/charset.c b/mysys/charset.c
index 3a39fce9437..e61d08cbc33 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -97,7 +97,7 @@ static my_bool init_state_maps(CHARSET_INFO *cs)
/* Special handling of hex and binary strings */
state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX;
- state_map[(uchar)'b']= state_map[(uchar)'b']= (uchar) MY_LEX_IDENT_OR_BIN;
+ state_map[(uchar)'b']= state_map[(uchar)'B']= (uchar) MY_LEX_IDENT_OR_BIN;
state_map[(uchar)'n']= state_map[(uchar)'N']= (uchar) MY_LEX_IDENT_OR_NCHAR;
return 0;
}
@@ -549,10 +549,10 @@ CHARSET_INFO *get_charset_by_csname(const char *cs_name,
DBUG_PRINT("enter",("name: '%s'", cs_name));
(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
-
+
cs_number= get_charset_number(cs_name, cs_flags);
cs= cs_number ? get_internal_charset(cs_number, flags) : NULL;
-
+
if (!cs && (flags & MY_WME))
{
char index_file[FN_REFLEN];
@@ -564,21 +564,54 @@ CHARSET_INFO *get_charset_by_csname(const char *cs_name,
}
-ulong escape_string_for_mysql(CHARSET_INFO *charset_info, char *to,
+/*
+ Escape string with backslashes (\)
+
+ SYNOPSIS
+ escape_string_for_mysql()
+ charset_info Charset of the strings
+ to Buffer for escaped string
+ to_length Length of destination buffer, or 0
+ from The string to escape
+ length The length of the string to escape
+
+ DESCRIPTION
+ This escapes the contents of a string by adding backslashes before special
+ characters, and turning others into specific escape sequences, such as
+ turning newlines into \n and null bytes into \0.
+
+ NOTE
+ To maintain compatibility with the old C API, to_length may be 0 to mean
+ "big enough"
+
+ RETURN VALUES
+ ~0 The escaped string did not fit in the to buffer
+ >=0 The length of the escaped string
+*/
+
+ulong escape_string_for_mysql(CHARSET_INFO *charset_info,
+ char *to, ulong to_length,
const char *from, ulong length)
{
const char *to_start= to;
- const char *end;
+ const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
+ my_bool overflow= FALSE;
#ifdef USE_MB
my_bool use_mb_flag= use_mb(charset_info);
#endif
- for (end= from + length; from != end; from++)
+ for (end= from + length; from < end; from++)
{
+ char escape= 0;
#ifdef USE_MB
- int l;
- if (use_mb_flag && (l= my_ismbchar(charset_info, from, end)))
+ int tmp_length;
+ if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
{
- while (l--)
+ if (to + tmp_length > to_end)
+ {
+ overflow= TRUE;
+ break;
+ }
+ while (tmp_length--)
*to++= *from++;
from--;
continue;
@@ -594,48 +627,55 @@ ulong escape_string_for_mysql(CHARSET_INFO *charset_info, char *to,
multi-byte character into a valid one. For example, 0xbf27 is not
a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \)
*/
- if (use_mb_flag && (l= my_mbcharlen(charset_info, *from)) > 1)
- {
- *to++= '\\';
- *to++= *from;
- continue;
- }
+ if (use_mb_flag && (tmp_length= my_mbcharlen(charset_info, *from)) > 1)
+ escape= *from;
+ else
#endif
switch (*from) {
case 0: /* Must be escaped for 'mysql' */
- *to++= '\\';
- *to++= '0';
+ escape= '0';
break;
case '\n': /* Must be escaped for logs */
- *to++= '\\';
- *to++= 'n';
+ escape= 'n';
break;
case '\r':
- *to++= '\\';
- *to++= 'r';
+ escape= 'r';
break;
case '\\':
- *to++= '\\';
- *to++= '\\';
+ escape= '\\';
break;
case '\'':
- *to++= '\\';
- *to++= '\'';
+ escape= '\'';
break;
case '"': /* Better safe than sorry */
- *to++= '\\';
- *to++= '"';
+ escape= '"';
break;
case '\032': /* This gives problems on Win32 */
- *to++= '\\';
- *to++= 'Z';
+ escape= 'Z';
break;
- default:
+ }
+ if (escape)
+ {
+ if (to + 2 > to_end)
+ {
+ overflow= TRUE;
+ break;
+ }
+ *to++= '\\';
+ *to++= escape;
+ }
+ else
+ {
+ if (to + 1 > to_end)
+ {
+ overflow= TRUE;
+ break;
+ }
*to++= *from;
}
}
*to= 0;
- return (ulong) (to - to_start);
+ return overflow ? (ulong)~0 : (ulong) (to - to_start);
}
@@ -663,3 +703,84 @@ CHARSET_INFO *fs_character_set()
return fs_cset_cache;
}
#endif
+
+/*
+ Escape apostrophes by doubling them up
+
+ SYNOPSIS
+ escape_quotes_for_mysql()
+ charset_info Charset of the strings
+ to Buffer for escaped string
+ to_length Length of destination buffer, or 0
+ from The string to escape
+ length The length of the string to escape
+
+ DESCRIPTION
+ This escapes the contents of a string by doubling up any apostrophes that
+ it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in
+ effect on the server.
+
+ NOTE
+ To be consistent with escape_string_for_mysql(), to_length may be 0 to
+ mean "big enough"
+
+ RETURN VALUES
+ ~0 The escaped string did not fit in the to buffer
+ >=0 The length of the escaped string
+*/
+
+ulong escape_quotes_for_mysql(CHARSET_INFO *charset_info,
+ char *to, ulong to_length,
+ const char *from, ulong length)
+{
+ const char *to_start= to;
+ const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
+ my_bool overflow= FALSE;
+#ifdef USE_MB
+ my_bool use_mb_flag= use_mb(charset_info);
+#endif
+ for (end= from + length; from < end; from++)
+ {
+#ifdef USE_MB
+ int tmp_length;
+ if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
+ {
+ if (to + tmp_length > to_end)
+ {
+ overflow= TRUE;
+ break;
+ }
+ while (tmp_length--)
+ *to++= *from++;
+ from--;
+ continue;
+ }
+ /*
+ We don't have the same issue here with a non-multi-byte character being
+ turned into a multi-byte character by the addition of an escaping
+ character, because we are only escaping the ' character with itself.
+ */
+#endif
+ if (*from == '\'')
+ {
+ if (to + 2 > to_end)
+ {
+ overflow= TRUE;
+ break;
+ }
+ *to++= '\'';
+ *to++= '\'';
+ }
+ else
+ {
+ if (to + 1 > to_end)
+ {
+ overflow= TRUE;
+ break;
+ }
+ *to++= *from;
+ }
+ }
+ *to= 0;
+ return overflow ? (ulong)~0 : (ulong) (to - to_start);
+}