summaryrefslogtreecommitdiff
path: root/sql/sql_string.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/sql_string.cc')
-rw-r--r--sql/sql_string.cc246
1 files changed, 207 insertions, 39 deletions
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index 1c9a3cd7fc2..f99dbbcea01 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -1,5 +1,5 @@
-/* Copyright (c) 2000-2007 MySQL AB, 2009 Sun Microsystems, Inc.
- Use is subject to license terms.
+/*
+ Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -12,7 +12,8 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
/* This file is originally from the mysql distribution. Coded by monty */
@@ -33,7 +34,7 @@
required by the string function
*/
-extern gptr sql_alloc(unsigned size);
+extern uchar* sql_alloc(unsigned size);
extern void sql_element_free(void *ptr);
#include "sql_string.h"
@@ -59,11 +60,33 @@ bool String::real_alloc(uint32 arg_length)
}
-/*
-** Check that string is big enough. Set string[alloc_length] to 0
-** (for C functions)
-*/
+/**
+ Allocates a new buffer on the heap for this String.
+
+ - If the String's internal buffer is privately owned and heap allocated,
+ one of the following is performed.
+
+ - If the requested length is greater than what fits in the buffer, a new
+ buffer is allocated, data moved and the old buffer freed.
+
+ - If the requested length is less or equal to what fits in the buffer, a
+ null character is inserted at the appropriate position.
+
+ - If the String does not keep a private buffer on the heap, such a buffer
+ will be allocated and the string copied accoring to its length, as found
+ in String::length().
+
+ For C compatibility, the new string buffer is null terminated.
+
+ @param alloc_length The requested string size in characters, excluding any
+ null terminator.
+ @retval false Either the copy operation is complete or, if the size of the
+ new buffer is smaller than the currently allocated buffer (if one exists),
+ no allocation occured.
+
+ @retval true An error occured when attempting to allocate memory.
+*/
bool String::realloc(uint32 alloc_length)
{
uint32 len=ALIGN_SIZE(alloc_length+1);
@@ -93,29 +116,19 @@ bool String::realloc(uint32 alloc_length)
return FALSE;
}
-bool String::set(longlong num, CHARSET_INFO *cs)
-{
- uint l=20*cs->mbmaxlen+1;
-
- if (alloc(l))
- return TRUE;
- str_length=(uint32) (cs->cset->longlong10_to_str)(cs,Ptr,l,-10,num);
- str_charset=cs;
- return FALSE;
-}
-
-bool String::set(ulonglong num, CHARSET_INFO *cs)
+bool String::set_int(longlong num, bool unsigned_flag, CHARSET_INFO *cs)
{
uint l=20*cs->mbmaxlen+1;
+ int base= unsigned_flag ? 10 : -10;
if (alloc(l))
return TRUE;
- str_length=(uint32) (cs->cset->longlong10_to_str)(cs,Ptr,l,10,num);
+ str_length=(uint32) (cs->cset->longlong10_to_str)(cs,Ptr,l,base,num);
str_charset=cs;
return FALSE;
}
-bool String::set(double num,uint decimals, CHARSET_INFO *cs)
+bool String::set_real(double num,uint decimals, CHARSET_INFO *cs)
{
char buff[331];
uint dummy_errors;
@@ -123,7 +136,8 @@ bool String::set(double num,uint decimals, CHARSET_INFO *cs)
str_charset=cs;
if (decimals >= NOT_FIXED_DEC)
{
- uint32 len= my_sprintf(buff,(buff, "%.15g",num));// Enough for a DATETIME
+ // Enough for a DATETIME
+ uint32 len= sprintf(buff, "%.15g", num);
return copy(buff, len, &my_charset_latin1, cs, &dummy_errors);
}
#ifdef HAVE_FCONVERT
@@ -206,6 +220,17 @@ bool String::copy()
return FALSE;
}
+/**
+ Copies the internal buffer from str. If this String has a private heap
+ allocated buffer where new data does not fit, a new buffer is allocated
+ before copying and the old buffer freed. Character set information is also
+ copied.
+
+ @param str The string whose internal buffer is to be copied.
+
+ @retval false Success.
+ @retval true Memory allocation failed.
+*/
bool String::copy(const String &str)
{
if (alloc(str.str_length))
@@ -333,12 +358,24 @@ bool String::set_or_copy_aligned(const char *str,uint32 arg_length,
return copy_aligned(str, arg_length, offset, cs);
}
- /* Copy with charset convertion */
+
+/**
+ Copies the character data into this String, with optional character set
+ conversion.
+
+ @return
+ FALSE ok
+ TRUE Could not allocate result buffer
+
+*/
bool String::copy(const char *str, uint32 arg_length,
CHARSET_INFO *from_cs, CHARSET_INFO *to_cs, uint *errors)
{
uint32 offset;
+
+ DBUG_ASSERT(!str || str != Ptr);
+
if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
{
*errors= 0;
@@ -504,7 +541,7 @@ bool String::append(FILE* file, uint32 arg_length, myf my_flags)
{
if (realloc(str_length+arg_length))
return TRUE;
- if (my_fread(file, (byte*) Ptr + str_length, arg_length, my_flags))
+ if (my_fread(file, (uchar*) Ptr + str_length, arg_length, my_flags))
{
shrink(str_length);
return TRUE;
@@ -518,7 +555,7 @@ bool String::append(IO_CACHE* file, uint32 arg_length)
{
if (realloc(str_length+arg_length))
return TRUE;
- if (my_b_read(file, (byte*) Ptr + str_length, arg_length))
+ if (my_b_read(file, (uchar*) Ptr + str_length, arg_length))
{
shrink(str_length);
return TRUE;
@@ -643,7 +680,7 @@ bool String::replace(uint32 offset,uint32 arg_length,
{
if (realloc(str_length+(uint32) diff))
return TRUE;
- bmove_upp(Ptr+str_length+diff,Ptr+str_length,
+ bmove_upp((uchar*) Ptr+str_length+diff, (uchar*) Ptr+str_length,
str_length-offset-arg_length);
}
if (to_length)
@@ -675,7 +712,7 @@ void String::qs_append(const char *str, uint32 len)
void String::qs_append(double d)
{
char *buff = Ptr + str_length;
- str_length+= my_sprintf(buff, (buff, "%.15g", d));
+ str_length+= sprintf(buff, "%.15g", d);
}
void String::qs_append(double *d)
@@ -721,8 +758,8 @@ void String::qs_append(uint i)
int sortcmp(const String *s,const String *t, CHARSET_INFO *cs)
{
return cs->coll->strnncollsp(cs,
- (unsigned char *) s->ptr(),s->length(),
- (unsigned char *) t->ptr(),t->length(), 0);
+ (uchar *) s->ptr(),s->length(),
+ (uchar *) t->ptr(),t->length(), 0);
}
@@ -735,7 +772,7 @@ int sortcmp(const String *s,const String *t, CHARSET_INFO *cs)
t Second string
NOTE:
- Strings are compared as a stream of unsigned chars
+ Strings are compared as a stream of uchars
RETURN
< 0 s < t
@@ -803,10 +840,8 @@ copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
const uchar *from_end= (const uchar*) from+from_length;
char *to_start= to;
uchar *to_end= (uchar*) to+to_length;
- int (*mb_wc)(struct charset_info_st *, my_wc_t *, const uchar *,
- const uchar *) = from_cs->cset->mb_wc;
- int (*wc_mb)(struct charset_info_st *, my_wc_t, uchar *s, uchar *e)=
- to_cs->cset->wc_mb;
+ my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
+ my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
uint error_count= 0;
while (1)
@@ -850,6 +885,68 @@ outp:
}
+/**
+ Copy string with HEX-encoding of "bad" characters.
+
+ @details This functions copies the string pointed by "src"
+ to the string pointed by "dst". Not more than "srclen" bytes
+ are read from "src". Any sequences of bytes representing
+ a not-well-formed substring (according to cs) are hex-encoded,
+ and all well-formed substrings (according to cs) are copied as is.
+ Not more than "dstlen" bytes are written to "dst". The number
+ of bytes written to "dst" is returned.
+
+ @param cs character set pointer of the destination string
+ @param[out] dst destination string
+ @param dstlen size of dst
+ @param src source string
+ @param srclen length of src
+
+ @retval result length
+*/
+
+size_t
+my_copy_with_hex_escaping(CHARSET_INFO *cs,
+ char *dst, size_t dstlen,
+ const char *src, size_t srclen)
+{
+ const char *srcend= src + srclen;
+ char *dst0= dst;
+
+ for ( ; src < srcend ; )
+ {
+ size_t chlen;
+ if ((chlen= my_ismbchar(cs, src, srcend)))
+ {
+ if (dstlen < chlen)
+ break; /* purecov: inspected */
+ memcpy(dst, src, chlen);
+ src+= chlen;
+ dst+= chlen;
+ dstlen-= chlen;
+ }
+ else if (*src & 0x80)
+ {
+ if (dstlen < 4)
+ break; /* purecov: inspected */
+ *dst++= '\\';
+ *dst++= 'x';
+ *dst++= _dig_vec_upper[((unsigned char) *src) >> 4];
+ *dst++= _dig_vec_upper[((unsigned char) *src) & 15];
+ src++;
+ dstlen-= 4;
+ }
+ else
+ {
+ if (dstlen < 1)
+ break; /* purecov: inspected */
+ *dst++= *src++;
+ dstlen--;
+ }
+ }
+ return dst - dst0;
+}
+
/*
copy a string,
with optional character set conversion,
@@ -948,10 +1045,8 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
{
int cnvres;
my_wc_t wc;
- int (*mb_wc)(struct charset_info_st *, my_wc_t *,
- const uchar *, const uchar *)= from_cs->cset->mb_wc;
- int (*wc_mb)(struct charset_info_st *, my_wc_t,
- uchar *s, uchar *e)= to_cs->cset->wc_mb;
+ my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
+ my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
const uchar *from_end= (const uchar*) from + from_length;
uchar *to_end= (uchar*) to + to_length;
char *to_start= to;
@@ -1060,3 +1155,76 @@ void String::swap(String &s)
swap_variables(bool, alloced, s.alloced);
swap_variables(CHARSET_INFO*, str_charset, s.str_charset);
}
+
+
+/**
+ Convert string to printable ASCII string
+
+ @details This function converts input string "from" replacing non-ASCII bytes
+ with hexadecimal sequences ("\xXX") optionally appending "..." to the end of
+ the resulting string.
+ This function used in the ER_TRUNCATED_WRONG_VALUE_FOR_FIELD error messages,
+ e.g. when a string cannot be converted to a result charset.
+
+
+ @param to output buffer
+ @param to_len size of the output buffer (8 bytes or greater)
+ @param from input string
+ @param from_len size of the input string
+ @param from_cs input charset
+ @param nbytes maximal number of bytes to convert (from_len if 0)
+
+ @return number of bytes in the output string
+*/
+
+uint convert_to_printable(char *to, size_t to_len,
+ const char *from, size_t from_len,
+ CHARSET_INFO *from_cs, size_t nbytes /*= 0*/)
+{
+ /* needs at least 8 bytes for '\xXX...' and zero byte */
+ DBUG_ASSERT(to_len >= 8);
+
+ char *t= to;
+ char *t_end= to + to_len - 1; // '- 1' is for the '\0' at the end
+ const char *f= from;
+ const char *f_end= from + (nbytes ? min(from_len, nbytes) : from_len);
+ char *dots= to; // last safe place to append '...'
+
+ if (!f || t == t_end)
+ return 0;
+
+ for (; t < t_end && f < f_end; f++)
+ {
+ /*
+ If the source string is ASCII compatible (mbminlen==1)
+ and the source character is in ASCII printable range (0x20..0x7F),
+ then display the character as is.
+
+ Otherwise, if the source string is not ASCII compatible (e.g. UCS2),
+ or the source character is not in the printable range,
+ then print the character using HEX notation.
+ */
+ if (((unsigned char) *f) >= 0x20 &&
+ ((unsigned char) *f) <= 0x7F &&
+ from_cs->mbminlen == 1)
+ {
+ *t++= *f;
+ }
+ else
+ {
+ if (t_end - t < 4) // \xXX
+ break;
+ *t++= '\\';
+ *t++= 'x';
+ *t++= _dig_vec_upper[((unsigned char) *f) >> 4];
+ *t++= _dig_vec_upper[((unsigned char) *f) & 0x0F];
+ }
+ if (t_end - t >= 3) // '...'
+ dots= t;
+ }
+ if (f < from + from_len)
+ memcpy(dots, STRING_WITH_LEN("...\0"));
+ else
+ *t= '\0';
+ return t - to;
+}