diff options
Diffstat (limited to 'sql/sql_string.cc')
-rw-r--r-- | sql/sql_string.cc | 246 |
1 files changed, 207 insertions, 39 deletions
diff --git a/sql/sql_string.cc b/sql/sql_string.cc index 1c9a3cd7fc2..f99dbbcea01 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -1,5 +1,5 @@ -/* Copyright (c) 2000-2007 MySQL AB, 2009 Sun Microsystems, Inc. - Use is subject to license terms. +/* + Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,7 +12,8 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ /* This file is originally from the mysql distribution. Coded by monty */ @@ -33,7 +34,7 @@ required by the string function */ -extern gptr sql_alloc(unsigned size); +extern uchar* sql_alloc(unsigned size); extern void sql_element_free(void *ptr); #include "sql_string.h" @@ -59,11 +60,33 @@ bool String::real_alloc(uint32 arg_length) } -/* -** Check that string is big enough. Set string[alloc_length] to 0 -** (for C functions) -*/ +/** + Allocates a new buffer on the heap for this String. + + - If the String's internal buffer is privately owned and heap allocated, + one of the following is performed. + + - If the requested length is greater than what fits in the buffer, a new + buffer is allocated, data moved and the old buffer freed. + + - If the requested length is less or equal to what fits in the buffer, a + null character is inserted at the appropriate position. + + - If the String does not keep a private buffer on the heap, such a buffer + will be allocated and the string copied accoring to its length, as found + in String::length(). + + For C compatibility, the new string buffer is null terminated. + + @param alloc_length The requested string size in characters, excluding any + null terminator. + @retval false Either the copy operation is complete or, if the size of the + new buffer is smaller than the currently allocated buffer (if one exists), + no allocation occured. + + @retval true An error occured when attempting to allocate memory. +*/ bool String::realloc(uint32 alloc_length) { uint32 len=ALIGN_SIZE(alloc_length+1); @@ -93,29 +116,19 @@ bool String::realloc(uint32 alloc_length) return FALSE; } -bool String::set(longlong num, CHARSET_INFO *cs) -{ - uint l=20*cs->mbmaxlen+1; - - if (alloc(l)) - return TRUE; - str_length=(uint32) (cs->cset->longlong10_to_str)(cs,Ptr,l,-10,num); - str_charset=cs; - return FALSE; -} - -bool String::set(ulonglong num, CHARSET_INFO *cs) +bool String::set_int(longlong num, bool unsigned_flag, CHARSET_INFO *cs) { uint l=20*cs->mbmaxlen+1; + int base= unsigned_flag ? 10 : -10; if (alloc(l)) return TRUE; - str_length=(uint32) (cs->cset->longlong10_to_str)(cs,Ptr,l,10,num); + str_length=(uint32) (cs->cset->longlong10_to_str)(cs,Ptr,l,base,num); str_charset=cs; return FALSE; } -bool String::set(double num,uint decimals, CHARSET_INFO *cs) +bool String::set_real(double num,uint decimals, CHARSET_INFO *cs) { char buff[331]; uint dummy_errors; @@ -123,7 +136,8 @@ bool String::set(double num,uint decimals, CHARSET_INFO *cs) str_charset=cs; if (decimals >= NOT_FIXED_DEC) { - uint32 len= my_sprintf(buff,(buff, "%.15g",num));// Enough for a DATETIME + // Enough for a DATETIME + uint32 len= sprintf(buff, "%.15g", num); return copy(buff, len, &my_charset_latin1, cs, &dummy_errors); } #ifdef HAVE_FCONVERT @@ -206,6 +220,17 @@ bool String::copy() return FALSE; } +/** + Copies the internal buffer from str. If this String has a private heap + allocated buffer where new data does not fit, a new buffer is allocated + before copying and the old buffer freed. Character set information is also + copied. + + @param str The string whose internal buffer is to be copied. + + @retval false Success. + @retval true Memory allocation failed. +*/ bool String::copy(const String &str) { if (alloc(str.str_length)) @@ -333,12 +358,24 @@ bool String::set_or_copy_aligned(const char *str,uint32 arg_length, return copy_aligned(str, arg_length, offset, cs); } - /* Copy with charset convertion */ + +/** + Copies the character data into this String, with optional character set + conversion. + + @return + FALSE ok + TRUE Could not allocate result buffer + +*/ bool String::copy(const char *str, uint32 arg_length, CHARSET_INFO *from_cs, CHARSET_INFO *to_cs, uint *errors) { uint32 offset; + + DBUG_ASSERT(!str || str != Ptr); + if (!needs_conversion(arg_length, from_cs, to_cs, &offset)) { *errors= 0; @@ -504,7 +541,7 @@ bool String::append(FILE* file, uint32 arg_length, myf my_flags) { if (realloc(str_length+arg_length)) return TRUE; - if (my_fread(file, (byte*) Ptr + str_length, arg_length, my_flags)) + if (my_fread(file, (uchar*) Ptr + str_length, arg_length, my_flags)) { shrink(str_length); return TRUE; @@ -518,7 +555,7 @@ bool String::append(IO_CACHE* file, uint32 arg_length) { if (realloc(str_length+arg_length)) return TRUE; - if (my_b_read(file, (byte*) Ptr + str_length, arg_length)) + if (my_b_read(file, (uchar*) Ptr + str_length, arg_length)) { shrink(str_length); return TRUE; @@ -643,7 +680,7 @@ bool String::replace(uint32 offset,uint32 arg_length, { if (realloc(str_length+(uint32) diff)) return TRUE; - bmove_upp(Ptr+str_length+diff,Ptr+str_length, + bmove_upp((uchar*) Ptr+str_length+diff, (uchar*) Ptr+str_length, str_length-offset-arg_length); } if (to_length) @@ -675,7 +712,7 @@ void String::qs_append(const char *str, uint32 len) void String::qs_append(double d) { char *buff = Ptr + str_length; - str_length+= my_sprintf(buff, (buff, "%.15g", d)); + str_length+= sprintf(buff, "%.15g", d); } void String::qs_append(double *d) @@ -721,8 +758,8 @@ void String::qs_append(uint i) int sortcmp(const String *s,const String *t, CHARSET_INFO *cs) { return cs->coll->strnncollsp(cs, - (unsigned char *) s->ptr(),s->length(), - (unsigned char *) t->ptr(),t->length(), 0); + (uchar *) s->ptr(),s->length(), + (uchar *) t->ptr(),t->length(), 0); } @@ -735,7 +772,7 @@ int sortcmp(const String *s,const String *t, CHARSET_INFO *cs) t Second string NOTE: - Strings are compared as a stream of unsigned chars + Strings are compared as a stream of uchars RETURN < 0 s < t @@ -803,10 +840,8 @@ copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, const uchar *from_end= (const uchar*) from+from_length; char *to_start= to; uchar *to_end= (uchar*) to+to_length; - int (*mb_wc)(struct charset_info_st *, my_wc_t *, const uchar *, - const uchar *) = from_cs->cset->mb_wc; - int (*wc_mb)(struct charset_info_st *, my_wc_t, uchar *s, uchar *e)= - to_cs->cset->wc_mb; + my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc; + my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb; uint error_count= 0; while (1) @@ -850,6 +885,68 @@ outp: } +/** + Copy string with HEX-encoding of "bad" characters. + + @details This functions copies the string pointed by "src" + to the string pointed by "dst". Not more than "srclen" bytes + are read from "src". Any sequences of bytes representing + a not-well-formed substring (according to cs) are hex-encoded, + and all well-formed substrings (according to cs) are copied as is. + Not more than "dstlen" bytes are written to "dst". The number + of bytes written to "dst" is returned. + + @param cs character set pointer of the destination string + @param[out] dst destination string + @param dstlen size of dst + @param src source string + @param srclen length of src + + @retval result length +*/ + +size_t +my_copy_with_hex_escaping(CHARSET_INFO *cs, + char *dst, size_t dstlen, + const char *src, size_t srclen) +{ + const char *srcend= src + srclen; + char *dst0= dst; + + for ( ; src < srcend ; ) + { + size_t chlen; + if ((chlen= my_ismbchar(cs, src, srcend))) + { + if (dstlen < chlen) + break; /* purecov: inspected */ + memcpy(dst, src, chlen); + src+= chlen; + dst+= chlen; + dstlen-= chlen; + } + else if (*src & 0x80) + { + if (dstlen < 4) + break; /* purecov: inspected */ + *dst++= '\\'; + *dst++= 'x'; + *dst++= _dig_vec_upper[((unsigned char) *src) >> 4]; + *dst++= _dig_vec_upper[((unsigned char) *src) & 15]; + src++; + dstlen-= 4; + } + else + { + if (dstlen < 1) + break; /* purecov: inspected */ + *dst++= *src++; + dstlen--; + } + } + return dst - dst0; +} + /* copy a string, with optional character set conversion, @@ -948,10 +1045,8 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, { int cnvres; my_wc_t wc; - int (*mb_wc)(struct charset_info_st *, my_wc_t *, - const uchar *, const uchar *)= from_cs->cset->mb_wc; - int (*wc_mb)(struct charset_info_st *, my_wc_t, - uchar *s, uchar *e)= to_cs->cset->wc_mb; + my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc; + my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb; const uchar *from_end= (const uchar*) from + from_length; uchar *to_end= (uchar*) to + to_length; char *to_start= to; @@ -1060,3 +1155,76 @@ void String::swap(String &s) swap_variables(bool, alloced, s.alloced); swap_variables(CHARSET_INFO*, str_charset, s.str_charset); } + + +/** + Convert string to printable ASCII string + + @details This function converts input string "from" replacing non-ASCII bytes + with hexadecimal sequences ("\xXX") optionally appending "..." to the end of + the resulting string. + This function used in the ER_TRUNCATED_WRONG_VALUE_FOR_FIELD error messages, + e.g. when a string cannot be converted to a result charset. + + + @param to output buffer + @param to_len size of the output buffer (8 bytes or greater) + @param from input string + @param from_len size of the input string + @param from_cs input charset + @param nbytes maximal number of bytes to convert (from_len if 0) + + @return number of bytes in the output string +*/ + +uint convert_to_printable(char *to, size_t to_len, + const char *from, size_t from_len, + CHARSET_INFO *from_cs, size_t nbytes /*= 0*/) +{ + /* needs at least 8 bytes for '\xXX...' and zero byte */ + DBUG_ASSERT(to_len >= 8); + + char *t= to; + char *t_end= to + to_len - 1; // '- 1' is for the '\0' at the end + const char *f= from; + const char *f_end= from + (nbytes ? min(from_len, nbytes) : from_len); + char *dots= to; // last safe place to append '...' + + if (!f || t == t_end) + return 0; + + for (; t < t_end && f < f_end; f++) + { + /* + If the source string is ASCII compatible (mbminlen==1) + and the source character is in ASCII printable range (0x20..0x7F), + then display the character as is. + + Otherwise, if the source string is not ASCII compatible (e.g. UCS2), + or the source character is not in the printable range, + then print the character using HEX notation. + */ + if (((unsigned char) *f) >= 0x20 && + ((unsigned char) *f) <= 0x7F && + from_cs->mbminlen == 1) + { + *t++= *f; + } + else + { + if (t_end - t < 4) // \xXX + break; + *t++= '\\'; + *t++= 'x'; + *t++= _dig_vec_upper[((unsigned char) *f) >> 4]; + *t++= _dig_vec_upper[((unsigned char) *f) & 0x0F]; + } + if (t_end - t >= 3) // '...' + dots= t; + } + if (f < from + from_len) + memcpy(dots, STRING_WITH_LEN("...\0")); + else + *t= '\0'; + return t - to; +} |