diff options
Diffstat (limited to 'sql/sql_string.cc')
-rw-r--r-- | sql/sql_string.cc | 286 |
1 files changed, 124 insertions, 162 deletions
diff --git a/sql/sql_string.cc b/sql/sql_string.cc index 57a308f581d..c4f5f315b08 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -12,8 +12,7 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -*/ + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* This file is originally from the mysql distribution. Coded by monty */ @@ -25,21 +24,20 @@ #include <my_sys.h> #include <m_string.h> #include <m_ctype.h> -#ifdef HAVE_FCONVERT -#include <floatingpoint.h> -#endif +#include <mysql_com.h> + #include "sql_string.h" -#ifdef MYSQL_CLIENT -#error Attempt to use server-side sql_string on client. Use client/sql_string.cc -#endif /***************************************************************************** ** String functions *****************************************************************************/ -bool String::real_alloc(uint32 arg_length) +bool String::real_alloc(uint32 length) { - arg_length=ALIGN_SIZE(arg_length+1); + uint32 arg_length= ALIGN_SIZE(length + 1); + DBUG_ASSERT(arg_length > length); + if (arg_length <= length) + return TRUE; /* Overflow */ str_length=0; if (Alloced_length < arg_length) { @@ -81,12 +79,15 @@ bool String::real_alloc(uint32 arg_length) @retval true An error occured when attempting to allocate memory. */ -bool String::realloc(uint32 alloc_length) +bool String::realloc_raw(uint32 alloc_length) { if (Alloced_length <= alloc_length) { char *new_ptr; uint32 len= ALIGN_SIZE(alloc_length+1); + DBUG_ASSERT(len > alloc_length); + if (len <= alloc_length) + return TRUE; /* Overflow */ if (alloced) { if (!(new_ptr= (char*) my_realloc(Ptr,len,MYF(MY_WME)))) @@ -106,7 +107,6 @@ bool String::realloc(uint32 alloc_length) Ptr= new_ptr; Alloced_length= len; } - Ptr[alloc_length]=0; // This make other funcs shorter return FALSE; } @@ -126,83 +126,17 @@ bool String::set_real(double num,uint decimals, CHARSET_INFO *cs) { char buff[FLOATING_POINT_BUFFER]; uint dummy_errors; + size_t len; str_charset=cs; if (decimals >= NOT_FIXED_DEC) { - uint32 len= my_sprintf(buff,(buff, "%.15g",num));// Enough for a DATETIME + len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL); return copy(buff, len, &my_charset_latin1, cs, &dummy_errors); } -#ifdef HAVE_FCONVERT - int decpt,sign; - char *pos,*to; - - VOID(fconvert(num,(int) decimals,&decpt,&sign,buff+1)); - if (!my_isdigit(&my_charset_latin1, buff[1])) - { // Nan or Inf - pos=buff+1; - if (sign) - { - buff[0]='-'; - pos=buff; - } - uint dummy_errors; - return copy(pos,(uint32) strlen(pos), &my_charset_latin1, cs, &dummy_errors); - } - if (alloc((uint32) ((uint32) decpt+3+decimals))) - return TRUE; - to=Ptr; - if (sign) - *to++='-'; - - pos=buff+1; - if (decpt < 0) - { /* value is < 0 */ - *to++='0'; - if (!decimals) - goto end; - *to++='.'; - if ((uint32) -decpt > decimals) - decpt= - (int) decimals; - decimals=(uint32) ((int) decimals+decpt); - while (decpt++ < 0) - *to++='0'; - } - else if (decpt == 0) - { - *to++= '0'; - if (!decimals) - goto end; - *to++='.'; - } - else - { - while (decpt-- > 0) - *to++= *pos++; - if (!decimals) - goto end; - *to++='.'; - } - while (decimals--) - *to++= *pos++; - -end: - *to=0; - str_length=(uint32) (to-Ptr); - return FALSE; -#else -#ifdef HAVE_SNPRINTF - buff[sizeof(buff)-1]=0; // Safety - IF_DBUG(int num_chars= ) - snprintf(buff, sizeof(buff)-1, "%.*f",(int) decimals, num); - DBUG_ASSERT(num_chars > 0); - DBUG_ASSERT(num_chars < (int) sizeof(buff)); -#else - sprintf(buff,"%.*f",(int) decimals,num); -#endif - return copy(buff,(uint32) strlen(buff), &my_charset_latin1, cs, + len= my_fcvt(num, decimals, buff, NULL); + return copy(buff, (uint32) len, &my_charset_latin1, cs, &dummy_errors); -#endif } @@ -318,8 +252,8 @@ bool String::copy_aligned(const char *str,uint32 arg_length, uint32 offset, CHARSET_INFO *cs) { /* How many bytes are in incomplete character */ - offset= cs->mbmaxlen - offset; /* How many zeros we should prepend */ - DBUG_ASSERT(offset && offset != cs->mbmaxlen); + offset= cs->mbminlen - offset; /* How many zeros we should prepend */ + DBUG_ASSERT(offset && offset != cs->mbminlen); uint32 aligned_length= arg_length + offset; if (alloc(aligned_length)) @@ -503,6 +437,16 @@ bool String::append(const char *s) } + +bool String::append_ulonglong(ulonglong val) +{ + if (realloc(str_length+MAX_BIGINT_WIDTH+2)) + return TRUE; + char *end= (char*) longlong10_to_str(val, (char*) Ptr + str_length, 10); + str_length= end - Ptr; + return FALSE; +} + /* Append a string in the given charset to the string with character set recoding @@ -510,11 +454,25 @@ bool String::append(const char *s) bool String::append(const char *s,uint32 arg_length, CHARSET_INFO *cs) { - uint32 dummy_offset; + uint32 offset; - if (needs_conversion(arg_length, cs, str_charset, &dummy_offset)) + if (needs_conversion(arg_length, cs, str_charset, &offset)) { - uint32 add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen; + uint32 add_length; + if ((cs == &my_charset_bin) && offset) + { + DBUG_ASSERT(str_charset->mbminlen > offset); + offset= str_charset->mbminlen - offset; // How many characters to pad + add_length= arg_length + offset; + if (realloc(str_length + add_length)) + return TRUE; + bzero((char*) Ptr + str_length, offset); + memcpy(Ptr + str_length + offset, s, arg_length); + str_length+= add_length; + return FALSE; + } + + add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen; uint dummy_errors; if (realloc_with_extra_if_needed(str_length + add_length)) return TRUE; @@ -531,22 +489,6 @@ bool String::append(const char *s,uint32 arg_length, CHARSET_INFO *cs) return FALSE; } - -#ifdef TO_BE_REMOVED -bool String::append(FILE* file, uint32 arg_length, myf my_flags) -{ - if (realloc_with_extra_if_needed(str_length+arg_length)) - return TRUE; - if (my_fread(file, (uchar*) Ptr + str_length, arg_length, my_flags)) - { - shrink(str_length); - return TRUE; - } - str_length+=arg_length; - return FALSE; -} -#endif - bool String::append(IO_CACHE* file, uint32 arg_length) { if (realloc_with_extra_if_needed(str_length+arg_length)) @@ -708,7 +650,8 @@ void String::qs_append(const char *str, uint32 len) void String::qs_append(double d) { char *buff = Ptr + str_length; - str_length+= my_sprintf(buff, (buff, "%.15g", d)); + str_length+= my_gcvt(d, MY_GCVT_ARG_DOUBLE, FLOATING_POINT_BUFFER - 1, buff, + NULL); } void String::qs_append(double *d) @@ -882,6 +825,65 @@ outp: } +/* + Optimized for quick copying of ASCII characters in the range 0x00..0x7F. +*/ +uint32 +copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, + const char *from, uint32 from_length, CHARSET_INFO *from_cs, + uint *errors) +{ + /* + If any of the character sets is not ASCII compatible, + immediately switch to slow mb_wc->wc_mb method. + */ + if ((to_cs->state | from_cs->state) & MY_CS_NONASCII) + return copy_and_convert_extended(to, to_length, to_cs, + from, from_length, from_cs, errors); + + uint32 length= min(to_length, from_length), length2= length; + +#if defined(__i386__) + /* + Special loop for i386, it allows to refer to a + non-aligned memory block as UINT32, which makes + it possible to copy four bytes at once. This + gives about 10% performance improvement comparing + to byte-by-byte loop. + */ + for ( ; length >= 4; length-= 4, from+= 4, to+= 4) + { + if ((*(uint32*)from) & 0x80808080) + break; + *((uint32*) to)= *((const uint32*) from); + } +#endif + + for (; ; *to++= *from++, length--) + { + if (!length) + { + *errors= 0; + return length2; + } + if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */ + { + uint32 copied_length= length2 - length; + to_length-= copied_length; + from_length-= copied_length; + return copied_length + copy_and_convert_extended(to, to_length, + to_cs, + from, from_length, + from_cs, + errors); + } + } + + DBUG_ASSERT(FALSE); // Should never get to here + return 0; // Make compiler happy +} + + /** Copy string with HEX-encoding of "bad" characters. @@ -944,64 +946,6 @@ my_copy_with_hex_escaping(CHARSET_INFO *cs, return dst - dst0; } -/* - Optimized for quick copying of ASCII characters in the range 0x00..0x7F. -*/ -uint32 -copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, - const char *from, uint32 from_length, CHARSET_INFO *from_cs, - uint *errors) -{ - /* - If any of the character sets is not ASCII compatible, - immediately switch to slow mb_wc->wc_mb method. - */ - if ((to_cs->state | from_cs->state) & MY_CS_NONASCII) - return copy_and_convert_extended(to, to_length, to_cs, - from, from_length, from_cs, errors); - - uint32 length= min(to_length, from_length), length2= length; - -#if defined(__i386__) - /* - Special loop for i386, it allows to refer to a - non-aligned memory block as UINT32, which makes - it possible to copy four bytes at once. This - gives about 10% performance improvement comparing - to byte-by-byte loop. - */ - for ( ; length >= 4; length-= 4, from+= 4, to+= 4) - { - if ((*(uint32*)from) & 0x80808080) - break; - *((uint32*) to)= *((const uint32*) from); - } -#endif - - for (; ; *to++= *from++, length--) - { - if (!length) - { - *errors= 0; - return length2; - } - if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */ - { - uint32 copied_length= length2 - length; - to_length-= copied_length; - from_length-= copied_length; - return copied_length + copy_and_convert_extended(to, to_length, - to_cs, - from, from_length, - from_cs, - errors); - } - } - - DBUG_ASSERT(FALSE); // Should never get to here - return 0; // Make compiler happy -} - /* copy a string, @@ -1079,6 +1023,24 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, uint pad_length= to_cs->mbminlen - from_offset; bzero(to, pad_length); memmove(to + pad_length, from, from_offset); + /* + In some cases left zero-padding can create an incorrect character. + For example: + INSERT INTO t1 (utf32_column) VALUES (0x110000); + We'll pad the value to 0x00110000, which is a wrong UTF32 sequence! + The valid characters range is limited to 0x00000000..0x0010FFFF. + + Make sure we didn't pad to an incorrect character. + */ + if (to_cs->cset->well_formed_len(to_cs, + to, to + to_cs->mbminlen, 1, + &well_formed_error) != + to_cs->mbminlen) + { + *from_end_pos= *well_formed_error_pos= from; + *cannot_convert_error_pos= NULL; + return 0; + } nchars--; from+= from_offset; from_length-= from_offset; |