diff options
Diffstat (limited to 'strings')
-rw-r--r-- | strings/conf_to_src.c | 8 | ||||
-rw-r--r-- | strings/ctype-big5.c | 94 | ||||
-rw-r--r-- | strings/ctype-bin.c | 66 | ||||
-rw-r--r-- | strings/ctype-cp932.c | 7 | ||||
-rw-r--r-- | strings/ctype-czech.c | 4 | ||||
-rw-r--r-- | strings/ctype-euc_kr.c | 9 | ||||
-rw-r--r-- | strings/ctype-eucjpms.c | 12 | ||||
-rw-r--r-- | strings/ctype-gb2312.c | 7 | ||||
-rw-r--r-- | strings/ctype-gbk.c | 42 | ||||
-rw-r--r-- | strings/ctype-latin1.c | 39 | ||||
-rw-r--r-- | strings/ctype-mb.c | 93 | ||||
-rw-r--r-- | strings/ctype-mb.ic | 6 | ||||
-rw-r--r-- | strings/ctype-simple.c | 13 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 7 | ||||
-rw-r--r-- | strings/ctype-tis620.c | 41 | ||||
-rw-r--r-- | strings/ctype-uca.c | 28 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 32 | ||||
-rw-r--r-- | strings/ctype-ujis.c | 12 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 80 | ||||
-rw-r--r-- | strings/ctype-win1250ch.c | 4 | ||||
-rw-r--r-- | strings/str2int.c | 2 | ||||
-rw-r--r-- | strings/strcoll.ic | 47 |
22 files changed, 131 insertions, 522 deletions
diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c index 28d2fd1515e..5b9793f388d 100644 --- a/strings/conf_to_src.c +++ b/strings/conf_to_src.c @@ -256,7 +256,7 @@ void dispcset(FILE *f,CHARSET_INFO *cs) fprintf(f," 255, /* max_sort_char */\n"); fprintf(f," ' ', /* pad_char */\n"); fprintf(f," 0, /* escape_with_backslash_is_dangerous */\n"); - + fprintf(f," 1, /* levels_for_order */\n"); fprintf(f," &my_charset_8bit_handler,\n"); if (cs->state & MY_CS_BINSORT) fprintf(f," &my_collation_8bit_bin_handler,\n"); @@ -270,9 +270,9 @@ static void fprint_copyright(FILE *file) { fprintf(file, -"/* Copyright 2000-2008 MySQL AB, 2008 Sun Microsystems Inc.\n" +"/* Copyright 2000-2008 MySQL AB, 2008 Sun Microsystems, Inc.\n" " Copyright 2010-2011 Monty Program Ab\n" -" Copyright (c) 2003, 2011, Oracle and/or its affiliates\n" +" Copyright (c) 2000, 2011, Oracle and/or its affiliates.\n" "\n" " This program is free software; you can redistribute it and/or modify\n" " it under the terms of the GNU General Public License as published by\n" @@ -333,7 +333,7 @@ main(int argc, char **argv __attribute__((unused))) fprintf(f, " ./conf_to_src ../sql/share/charsets/ > FILE\n"); fprintf(f, "*/\n\n"); fprint_copyright(f); - fprintf(f,"#include <my_global.h>\n"); + fprintf(f,"#include \"strings_def.h\"\n"); fprintf(f,"#include <m_ctype.h>\n\n"); diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index d6a9695afbf..962931913a2 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -45,8 +45,6 @@ #define isbig5code(c,d) (isbig5head(c) && isbig5tail(d)) #define big5code(c,d) (((uchar)(c) <<8) | (uchar)(d)) -#define big5head(e) ((uchar)(e>>8)) -#define big5tail(e) ((uchar)(e&0xff)) #define MY_FUNCTION_NAME(x) my_ ## x ## _big5 #define IS_MB1_CHAR(x) ((uchar) (x) < 0x80) @@ -850,93 +848,6 @@ static uint16 big5strokexfrm(uint16 i) } -static size_t -my_strnxfrm_big5(CHARSET_INFO *cs, - uchar *dst, size_t dstlen, uint nweights, - const uchar *src, size_t srclen, uint flags) -{ - uchar *d0= dst; - uchar *de= dst + dstlen; - const uchar *se= src + srclen; - const uchar *sort_order= cs->sort_order; - - for (; dst < de && src < se && nweights; nweights--) - { - if (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) - { - /* - Note, it is safe not to check (src < se) - in the code below, because ismbchar() would - not return TRUE if src was too short - */ - uint16 e= big5strokexfrm((uint16) big5code(*src, *(src + 1))); - *dst++= big5head(e); - if (dst < de) - *dst++= big5tail(e); - src+= 2; - } - else - *dst++= sort_order ? sort_order[*src++] : *src++; - } - return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0); -} - -#if 0 -static int my_strcoll_big5(const uchar *s1, const uchar *s2) -{ - - while (*s1 && *s2) - { - if (*(s1+1) && *(s2+1) && isbig5code(*s1,*(s1+1)) && isbig5code(*s2, *(s2+1))) - { - if (*s1 != *s2 || *(s1+1) != *(s2+1)) - return ((int) big5code(*s1,*(s1+1)) - - (int) big5code(*s2,*(s2+1))); - s1 +=2; - s2 +=2; - } else if (sort_order_big5[(uchar) *s1++] != sort_order_big5[(uchar) *s2++]) - return ((int) sort_order_big5[(uchar) s1[-1]] - - (int) sort_order_big5[(uchar) s2[-1]]); - } - return 0; -} - -static int my_strxfrm_big5(uchar *dest, const uchar *src, int len) -{ - uint16 e; - uchar *d = dest; - - if (len < 1) return 0; - if (!*src) - { - *d = '\0'; - return 0; - } - while (*src && (len > 1)) - { - if (*(src+1) && isbig5code(*src, *(src+1))) - { - e = big5strokexfrm((uint16) big5code(*src, *(src+1))); - *d++ = big5head(e); - *d++ = big5tail(e); - src +=2; - len--; - } else - *d++ = sort_order_big5[(uchar) *src++]; - } - *d = '\0'; - return (int) (d-dest); -} -#endif - - -static uint ismbchar_big5(CHARSET_INFO *cs __attribute__((unused)), - const char* p, const char *e) -{ - return (isbig5head(*(p)) && (e)-(p)>1 && isbig5tail(*((p)+1))? 2: 0); -} - - static uint mbcharlen_big5(CHARSET_INFO *cs __attribute__((unused)), uint c) { return (isbig5head(c)? 2 : 1); @@ -6774,6 +6685,8 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)), #define MY_FUNCTION_NAME(x) my_ ## x ## _big5_chinese_ci #define WEIGHT_MB1(x) (sort_order_big5[(uchar) (x)]) #define WEIGHT_MB2(x,y) (big5code(x, y)) +#define WEIGHT_MB2_FRM(x,y) (big5strokexfrm((uint16) WEIGHT_MB2(x, y))) +#define DEFINE_STRNXFRM #include "strcoll.ic" @@ -6788,7 +6701,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_ci= NULL, /* init */ my_strnncoll_big5_chinese_ci, my_strnncollsp_big5_chinese_ci, - my_strnxfrm_big5, + my_strnxfrm_big5_chinese_ci, my_strnxfrmlen_simple, my_like_range_mb, my_wildcmp_mb, @@ -6818,7 +6731,6 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_bin= static MY_CHARSET_HANDLER my_charset_big5_handler= { NULL, /* init */ - ismbchar_big5, mbcharlen_big5, my_numchars_mb, my_charpos_mb, diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c index 0be6ae95577..8331de34ee4 100644 --- a/strings/ctype-bin.c +++ b/strings/ctype-bin.c @@ -119,9 +119,7 @@ size_t my_lengthsp_binary(CHARSET_INFO *cs __attribute__((unused)), static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)), const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference - __attribute__((unused))) + const uchar *t, size_t tlen) { return my_strnncoll_binary(cs,s,slen,t,tlen,0); } @@ -139,6 +137,27 @@ static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)), /* + Compare a string to an array of spaces, for PAD SPACE behaviour. + @param str - the string + @param length - the length of the string + @return <0 - if a byte less than SPACE was found + @return >0 - if a byte greater than SPACE was found + @return 0 - if the string entirely consists of SPACE characters +*/ +int my_strnncollsp_padspace_bin(const uchar *str, size_t length) +{ + for ( ; length ; str++, length--) + { + if (*str < ' ') + return -1; + else if (*str > ' ') + return 1; + } + return 0; +} + + +/* Compare two strings. Result is sign(first_argument - second_argument) SYNOPSIS @@ -148,9 +167,6 @@ static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)), slen Length of 's' t String to compare tlen Length of 't' - diff_if_only_endspace_difference - Set to 1 if the strings should be regarded as different - if they only difference in end space NOTE This function is used for character strings with binary collations. @@ -165,16 +181,10 @@ static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)), static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)), const uchar *a, size_t a_length, - const uchar *b, size_t b_length, - my_bool diff_if_only_endspace_difference) + const uchar *b, size_t b_length) { const uchar *end; size_t length; - int res; - -#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE - diff_if_only_endspace_difference= 0; -#endif end= a + (length= MY_MIN(a_length, b_length)); while (a < end) @@ -182,31 +192,10 @@ static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)), if (*a++ != *b++) return ((int) a[-1] - (int) b[-1]); } - res= 0; - if (a_length != b_length) - { - int swap= 1; - /* - Check the next not space character of the longer key. If it's < ' ', - then it's smaller than the other key. - */ - if (diff_if_only_endspace_difference) - res= 1; /* Assume 'a' is bigger */ - if (a_length < b_length) - { - /* put shorter key in s */ - a_length= b_length; - a= b; - swap= -1; /* swap sign of result */ - res= -res; - } - for (end= a + a_length-length; a < end ; a++) - { - if (*a != ' ') - return (*a < ' ') ? -swap : swap; - } - } - return res; + return a_length == b_length ? 0 : + a_length < b_length ? + -my_strnncollsp_padspace_bin(b, b_length - length) : + my_strnncollsp_padspace_bin(a, a_length - length); } @@ -521,7 +510,6 @@ static MY_COLLATION_HANDLER my_collation_binary_handler = static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - NULL, /* ismbchar */ my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, my_charpos_8bit, diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c index 9bf206f1de7..2163662269d 100644 --- a/strings/ctype-cp932.c +++ b/strings/ctype-cp932.c @@ -191,12 +191,6 @@ static const uchar sort_order_cp932[]= #include "ctype-mb.ic" -static uint ismbchar_cp932(CHARSET_INFO *cs __attribute__((unused)), - const char* p, const char *e) -{ - return (iscp932head((uchar) *p) && (e-p)>1 && iscp932tail((uchar)p[1]) ? 2: 0); -} - static uint mbcharlen_cp932(CHARSET_INFO *cs __attribute__((unused)),uint c) { return (iscp932head((uchar) c) ? 2 : 1); @@ -34693,7 +34687,6 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_bin= static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - ismbchar_cp932, mbcharlen_cp932, my_numchars_mb, my_charpos_mb, diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c index e3abebad91d..a7efd20b259 100644 --- a/strings/ctype-czech.c +++ b/strings/ctype-czech.c @@ -276,9 +276,7 @@ static int my_strnncoll_czech(CHARSET_INFO *cs __attribute__((unused)), static int my_strnncollsp_czech(CHARSET_INFO * cs, const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference - __attribute__((unused))) + const uchar *t, size_t tlen) { for ( ; slen && s[slen-1] == ' ' ; slen--); for ( ; tlen && t[tlen-1] == ' ' ; tlen--); diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index 1f13ab66284..19ed586ea49 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -210,14 +210,6 @@ static const uchar sort_order_euc_kr[]= #include "ctype-mb.ic" -static uint ismbchar_euc_kr(CHARSET_INFO *cs __attribute__((unused)), - const char* p, const char *e) -{ - return ((*(uchar*)(p)<0x80)? 0:\ - iseuc_kr_head(*(p)) && (e)-(p)>1 && iseuc_kr_tail(*((p)+1))? 2:\ - 0); -} - static uint mbcharlen_euc_kr(CHARSET_INFO *cs __attribute__((unused)),uint c) { return (iseuc_kr_head(c) ? 2 : 1); @@ -9987,7 +9979,6 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_bin= static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - ismbchar_euc_kr, mbcharlen_euc_kr, my_numchars_mb, my_charpos_mb, diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c index 82c4bb5a4e8..469d3a5be6c 100644 --- a/strings/ctype-eucjpms.c +++ b/strings/ctype-eucjpms.c @@ -199,6 +199,7 @@ static const uchar sort_order_eucjpms[]= #define IS_MB2_KATA(x,y) (iseucjpms_ss2(x) && iskata(y)) #define IS_MB2_CHAR(x,y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y)) #define IS_MB3_CHAR(x,y,z) (iseucjpms_ss3(x) && IS_MB2_JIS(y,z)) +#define IS_MB_PREFIX2(x,y) (iseucjpms_ss3(x) && iseucjpms(y)) #define DEFINE_ASIAN_ROUTINES #include "ctype-mb.ic" @@ -220,16 +221,6 @@ static const uchar sort_order_eucjpms[]= #include "strcoll.ic" -static uint ismbchar_eucjpms(CHARSET_INFO *cs __attribute__((unused)), - const char* p, const char *e) -{ - return ((*(uchar*)(p)<0x80)? 0:\ - iseucjpms(*(p)) && (e)-(p)>1 && iseucjpms(*((p)+1))? 2:\ - iseucjpms_ss2(*(p)) && (e)-(p)>1 && iskata(*((p)+1))? 2:\ - iseucjpms_ss3(*(p)) && (e)-(p)>2 && iseucjpms(*((p)+1)) && iseucjpms(*((p)+2))? 3:\ - 0); -} - static uint mbcharlen_eucjpms(CHARSET_INFO *cs __attribute__((unused)),uint c) { return (iseucjpms(c)? 2: iseucjpms_ss2(c)? 2: iseucjpms_ss3(c)? 3: 1); @@ -67520,7 +67511,6 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_bin_handler = static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - ismbchar_eucjpms, mbcharlen_eucjpms, my_numchars_mb, my_charpos_mb, diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index b0e275fe93d..a77237c1791 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -173,12 +173,6 @@ static const uchar sort_order_gb2312[]= #include "ctype-mb.ic" -static uint ismbchar_gb2312(CHARSET_INFO *cs __attribute__((unused)), - const char* p, const char *e) -{ - return (isgb2312head(*(p)) && (e)-(p)>1 && isgb2312tail(*((p)+1))? 2: 0); -} - static uint mbcharlen_gb2312(CHARSET_INFO *cs __attribute__((unused)),uint c) { return (isgb2312head(c)? 2 : 1); @@ -6391,7 +6385,6 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_bin= static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - ismbchar_gb2312, mbcharlen_gb2312, my_numchars_mb, my_charpos_mb, diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index 37b003f1899..e4e015a59d2 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -3451,44 +3451,6 @@ static uint16 gbksortorder(uint16 i) } -static size_t -my_strnxfrm_gbk(CHARSET_INFO *cs, - uchar *dst, size_t dstlen, uint nweights, - const uchar *src, size_t srclen, uint flags) -{ - uchar *d0= dst; - uchar *de= dst + dstlen; - const uchar *se= src + srclen; - const uchar *sort_order= cs->sort_order; - - for (; dst < de && src < se && nweights; nweights--) - { - if (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) - { - /* - Note, it is safe not to check (src < se) - in the code below, because ismbchar() would - not return TRUE if src was too short - */ - uint16 e= gbksortorder((uint16) gbkcode(*src, *(src + 1))); - *dst++= gbkhead(e); - if (dst < de) - *dst++= gbktail(e); - src+= 2; - } - else - *dst++= sort_order ? sort_order[*src++] : *src++; - } - return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0); -} - - -static uint ismbchar_gbk(CHARSET_INFO *cs __attribute__((unused)), - const char* p, const char *e) -{ - return (isgbkhead(*(p)) && (e)-(p)>1 && isgbktail(*((p)+1))? 2: 0); -} - static uint mbcharlen_gbk(CHARSET_INFO *cs __attribute__((unused)),uint c) { return (isgbkhead(c)? 2 : 1); @@ -10658,6 +10620,7 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)), #define MY_FUNCTION_NAME(x) my_ ## x ## _gbk_chinese_ci #define WEIGHT_MB1(x) (sort_order_gbk[(uchar) (x)]) #define WEIGHT_MB2(x,y) (gbksortorder(gbkcode(x,y))) +#define DEFINE_STRNXFRM #include "strcoll.ic" @@ -10672,7 +10635,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_ci= NULL, /* init */ my_strnncoll_gbk_chinese_ci, my_strnncollsp_gbk_chinese_ci, - my_strnxfrm_gbk, + my_strnxfrm_gbk_chinese_ci, my_strnxfrmlen_simple, my_like_range_mb, my_wildcmp_mb, @@ -10703,7 +10666,6 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_bin= static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - ismbchar_gbk, mbcharlen_gbk, my_numchars_mb, my_charpos_mb, diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index 26c66d60071..aba63d97abb 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -396,7 +396,6 @@ int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)), static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - NULL, my_mbcharlen_8bit, my_numchars_8bit, my_charpos_8bit, @@ -598,16 +597,10 @@ static int my_strnncoll_latin1_de(CHARSET_INFO *cs __attribute__((unused)), static int my_strnncollsp_latin1_de(CHARSET_INFO *cs __attribute__((unused)), const uchar *a, size_t a_length, - const uchar *b, size_t b_length, - my_bool diff_if_only_endspace_difference) + const uchar *b, size_t b_length) { const uchar *a_end= a + a_length, *b_end= b + b_length; uchar a_char, a_extend= 0, b_char, b_extend= 0; - int res; - -#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE - diff_if_only_endspace_difference= 0; -#endif while ((a < a_end || a_extend) && (b < b_end || b_extend)) { @@ -640,31 +633,11 @@ static int my_strnncollsp_latin1_de(CHARSET_INFO *cs __attribute__((unused)), if (b_extend) return -1; - res= 0; - if (a != a_end || b != b_end) - { - int swap= 1; - if (diff_if_only_endspace_difference) - res= 1; /* Assume 'a' is bigger */ - /* - Check the next not space character of the longer key. If it's < ' ', - then it's smaller than the other key. - */ - if (a == a_end) - { - /* put shorter key in a */ - a_end= b_end; - a= b; - swap= -1; /* swap sign of result */ - res= -res; - } - for ( ; a < a_end ; a++) - { - if (*a != ' ') - return (*a < ' ') ? -swap : swap; - } - } - return res; + if (a < a_end) + return my_strnncollsp_padspace_bin(a, a_end - a); + if (b < b_end) + return -my_strnncollsp_padspace_bin(b, b_end - b); + return 0; } diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index eef283d2925..3fa66cb0b2f 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -571,93 +571,6 @@ uint my_instr_mb(CHARSET_INFO *cs, } -/* BINARY collations handlers for MB charsets */ - -int -my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)), - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool t_is_prefix) -{ - size_t len=MY_MIN(slen,tlen); - int cmp= memcmp(s,t,len); - return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen); -} - - -/* - Compare two strings. - - SYNOPSIS - my_strnncollsp_mb_bin() - cs Chararacter set - s String to compare - slen Length of 's' - t String to compare - tlen Length of 't' - diff_if_only_endspace_difference - Set to 1 if the strings should be regarded as different - if they only difference in end space - - NOTE - This function is used for character strings with binary collations. - The shorter string is extended with end space to be as long as the longer - one. - - RETURN - A negative number if s < t - A positive number if s > t - 0 if strings are equal -*/ - -int -my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), - const uchar *a, size_t a_length, - const uchar *b, size_t b_length, - my_bool diff_if_only_endspace_difference) -{ - const uchar *end; - size_t length; - int res; - -#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE - diff_if_only_endspace_difference= 0; -#endif - - end= a + (length= MY_MIN(a_length, b_length)); - while (a < end) - { - if (*a++ != *b++) - return ((int) a[-1] - (int) b[-1]); - } - res= 0; - if (a_length != b_length) - { - int swap= 1; - if (diff_if_only_endspace_difference) - res= 1; /* Assume 'a' is bigger */ - /* - Check the next not space character of the longer key. If it's < ' ', - then it's smaller than the other key. - */ - if (a_length < b_length) - { - /* put shorter key in s */ - a_length= b_length; - a= b; - swap= -1; /* swap sign of result */ - res= -res; - } - for (end= a + a_length-length; a < end ; a++) - { - if (*a != ' ') - return (*a < ' ') ? -swap : swap; - } - } - return res; -} - - /* Copy one non-ascii character. "dst" must have enough room for the character. @@ -668,7 +581,7 @@ my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), */ #define my_strnxfrm_mb_non_ascii_char(cs, dst, src, se) \ { \ - switch (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) { \ + switch (my_ismbchar(cs, (const char *) src, (const char *) se)) { \ case 4: \ *dst++= *src++; \ /* fall through */ \ @@ -740,8 +653,8 @@ my_strnxfrm_mb(CHARSET_INFO *cs, for (; src < se && nweights && dst < de; nweights--) { int chlen; - if (*src < 128 || - !(chlen= cs->cset->ismbchar(cs, (const char*) src, (const char*) se))) + if (*src < 128 || !(chlen= my_ismbchar(cs, (const char *) src, + (const char *) se))) { /* Single byte character */ *dst++= sort_order ? sort_order[*src++] : *src++; diff --git a/strings/ctype-mb.ic b/strings/ctype-mb.ic index 6fc4d6e3db4..2df9c9d5e49 100644 --- a/strings/ctype-mb.ic +++ b/strings/ctype-mb.ic @@ -75,7 +75,13 @@ MY_FUNCTION_NAME(charlen)(CHARSET_INFO *cs __attribute__((unused)), #ifdef IS_MB3_CHAR if (b + 3 > e) + { +#ifdef IS_MB_PREFIX2 + if (!IS_MB_PREFIX2(b[0], b[1])) + return MY_CS_ILSEQ; +#endif return MY_CS_TOOSMALLN(3); + } if (IS_MB3_CHAR(b[0], b[1], b[2])) return 3; /* Three-byte character */ #endif diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index 288f5fdd49d..5e5a345a638 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -128,9 +128,6 @@ int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, size_t slen, a_length Length of 'a' b Second string to compare b_length Length of 'b' - diff_if_only_endspace_difference - Set to 1 if the strings should be regarded as different - if they only difference in end space IMPLEMENTATION If one string is shorter as the other, then we space extend the other @@ -149,17 +146,12 @@ int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, size_t slen, */ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, size_t a_length, - const uchar *b, size_t b_length, - my_bool diff_if_only_endspace_difference) + const uchar *b, size_t b_length) { const uchar *map= cs->sort_order, *end; size_t length; int res; -#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE - diff_if_only_endspace_difference= 0; -#endif - end= a + (length= MY_MIN(a_length, b_length)); while (a < end) { @@ -170,8 +162,6 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, size_t a_length, if (a_length != b_length) { int swap= 1; - if (diff_if_only_endspace_difference) - res= 1; /* Assume 'a' is bigger */ /* Check the next not space character of the longer key. If it's < ' ', then it's smaller than the other key. @@ -1926,7 +1916,6 @@ my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs, MY_CHARSET_HANDLER my_charset_8bit_handler= { my_cset_init_8bit, - NULL, /* ismbchar */ my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, my_charpos_8bit, diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index 629e1cd8309..ebcea22d242 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -192,12 +192,6 @@ static const uchar sort_order_sjis[]= #include "ctype-mb.ic" -static uint ismbchar_sjis(CHARSET_INFO *cs __attribute__((unused)), - const char* p, const char *e) -{ - return (issjishead((uchar) *p) && (e-p)>1 && issjistail((uchar)p[1]) ? 2: 0); -} - static uint mbcharlen_sjis(CHARSET_INFO *cs __attribute__((unused)),uint c) { return (issjishead((uchar) c) ? 2 : 1); @@ -34072,7 +34066,6 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_bin= static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - ismbchar_sjis, mbcharlen_sjis, my_numchars_mb, my_charpos_mb, diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index a1ca320835d..711bb21773e 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -543,17 +543,12 @@ int my_strnncoll_tis620(CHARSET_INFO *cs __attribute__((unused)), static int my_strnncollsp_tis620(CHARSET_INFO * cs __attribute__((unused)), const uchar *a0, size_t a_length, - const uchar *b0, size_t b_length, - my_bool diff_if_only_endspace_difference) + const uchar *b0, size_t b_length) { uchar buf[80], *end, *a, *b, *alloced= NULL; size_t length; int res= 0; -#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE - diff_if_only_endspace_difference= 0; -#endif - a= buf; if ((a_length + b_length +2) > (int) sizeof(buf)) alloced= a= (uchar*) my_str_malloc(a_length+b_length+2); @@ -575,33 +570,12 @@ int my_strnncollsp_tis620(CHARSET_INFO * cs __attribute__((unused)), goto ret; } } - if (a_length != b_length) - { - int swap= 1; - if (diff_if_only_endspace_difference) - res= 1; /* Assume 'a' is bigger */ - /* - Check the next not space character of the longer key. If it's < ' ', - then it's smaller than the other key. - */ - if (a_length < b_length) - { - /* put shorter key in s */ - a_length= b_length; - a= b; - swap= -1; /* swap sign of result */ - res= -res; - } - for (end= a + a_length-length; a < end ; a++) - { - if (*a != ' ') - { - res= (*a < ' ') ? -swap : swap; - goto ret; - } - } - } - + + res= a_length == b_length ? 0 : + a_length < b_length ? + -my_strnncollsp_padspace_bin(b, b_length - length) : + my_strnncollsp_padspace_bin(a, a_length - length); + ret: if (alloced) @@ -860,7 +834,6 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - NULL, /* ismbchar */ my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, my_charpos_8bit, diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index b0728978e71..0b279b620fd 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -20775,9 +20775,6 @@ my_char_weight_addr(const MY_UCA_WEIGHT_LEVEL *level, uint wc) slen First string length t Second string tlen Seconf string length - diff_if_only_endspace_difference - Set to 1 if the strings should be regarded as different - if they only difference in end space NOTES: Works exactly the same with my_strnncoll_uca(), @@ -20815,16 +20812,11 @@ my_char_weight_addr(const MY_UCA_WEIGHT_LEVEL *level, uint wc) static int my_strnncollsp_uca(CHARSET_INFO *cs, my_uca_scanner_handler *scanner_handler, const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference) + const uchar *t, size_t tlen) { my_uca_scanner sscanner, tscanner; int s_res, t_res; -#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE - diff_if_only_endspace_difference= 0; -#endif - scanner_handler->init(&sscanner, cs, &cs->uca->level[0], s, slen); scanner_handler->init(&tscanner, cs, &cs->uca->level[0], t, tlen); @@ -20846,7 +20838,7 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs, return (s_res - t_res); s_res= scanner_handler->next(&sscanner); } while (s_res > 0); - return diff_if_only_endspace_difference ? 1 : 0; + return 0; } if (s_res < 0 && t_res > 0) @@ -20861,7 +20853,7 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs, return (s_res - t_res); t_res= scanner_handler->next(&tscanner); } while (t_res > 0); - return diff_if_only_endspace_difference ? -1 : 0; + return 0; } return ( s_res - t_res ); @@ -22845,12 +22837,9 @@ static int my_strnncoll_any_uca(CHARSET_INFO *cs, static int my_strnncollsp_any_uca(CHARSET_INFO *cs, const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference) + const uchar *t, size_t tlen) { - return my_strnncollsp_uca(cs, &my_any_uca_scanner_handler, - s, slen, t, tlen, - diff_if_only_endspace_difference); + return my_strnncollsp_uca(cs, &my_any_uca_scanner_handler, s, slen, t, tlen); } static void my_hash_sort_any_uca(CHARSET_INFO *cs, @@ -22890,12 +22879,9 @@ static int my_strnncoll_ucs2_uca(CHARSET_INFO *cs, static int my_strnncollsp_ucs2_uca(CHARSET_INFO *cs, const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference) + const uchar *t, size_t tlen) { - return my_strnncollsp_uca(cs, &my_any_uca_scanner_handler, - s, slen, t, tlen, - diff_if_only_endspace_difference); + return my_strnncollsp_uca(cs, &my_any_uca_scanner_handler, s, slen, t, tlen); } static void my_hash_sort_ucs2_uca(CHARSET_INFO *cs, diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index cae85f38c12..74e474cc28c 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1413,15 +1413,6 @@ my_casedn_utf16(CHARSET_INFO *cs, char *src, size_t srclen, } -static uint -my_ismbchar_utf16(CHARSET_INFO *cs, const char *b, const char *e) -{ - my_wc_t wc; - int res= cs->cset->mb_wc(cs, &wc, (const uchar *) b, (const uchar *) e); - return (uint) (res > 0 ? res : 0); -} - - static int my_charlen_utf16(CHARSET_INFO *cs, const uchar *str, const uchar *end) { @@ -1456,7 +1447,7 @@ my_numchars_utf16(CHARSET_INFO *cs, size_t nchars= 0; for ( ; ; nchars++) { - size_t charlen= my_ismbchar_utf16(cs, b, e); + size_t charlen= my_ismbchar(cs, b, e); if (!charlen) break; b+= charlen; @@ -1576,7 +1567,6 @@ static MY_COLLATION_HANDLER my_collation_utf16_bin_handler = MY_CHARSET_HANDLER my_charset_utf16_handler= { NULL, /* init */ - my_ismbchar_utf16, /* ismbchar */ my_mbcharlen_utf16, /* mbcharlen */ my_numchars_utf16, my_charpos_utf16, @@ -1799,7 +1789,6 @@ static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler = static MY_CHARSET_HANDLER my_charset_utf16le_handler= { NULL, /* init */ - my_ismbchar_utf16, my_mbcharlen_utf16, my_numchars_utf16, my_charpos_utf16, @@ -2075,15 +2064,6 @@ my_casedn_utf32(CHARSET_INFO *cs, char *src, size_t srclen, } -static uint -my_ismbchar_utf32(CHARSET_INFO *cs __attribute__((unused)), - const char *b, - const char *e) -{ - return b + 4 > e || !IS_UTF32_MBHEAD4(b[0], b[1]) ? 0 : 4; -} - - static int my_charlen_utf32(CHARSET_INFO *cs __attribute__((unused)), const uchar *b, const uchar *e) @@ -2545,7 +2525,6 @@ static MY_COLLATION_HANDLER my_collation_utf32_bin_handler = MY_CHARSET_HANDLER my_charset_utf32_handler= { NULL, /* init */ - my_ismbchar_utf32, my_mbcharlen_utf32, my_numchars_utf32, my_charpos_utf32, @@ -2883,14 +2862,6 @@ my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)), } -static uint my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)), - const char *b, - const char *e) -{ - return b + 2 > e ? 0 : 2; -} - - static uint my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) , uint c __attribute__((unused))) { @@ -3032,7 +3003,6 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler = MY_CHARSET_HANDLER my_charset_ucs2_handler= { NULL, /* init */ - my_ismbchar_ucs2, /* ismbchar */ my_mbcharlen_ucs2, /* mbcharlen */ my_numchars_ucs2, my_charpos_ucs2, diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c index 308f5f0f7d1..b24fdb3075f 100644 --- a/strings/ctype-ujis.c +++ b/strings/ctype-ujis.c @@ -198,6 +198,7 @@ static const uchar sort_order_ujis[]= #define IS_MB2_KATA(x,y) (isujis_ss2(x) && iskata(y)) #define IS_MB2_CHAR(x, y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y)) #define IS_MB3_CHAR(x, y, z) (isujis_ss3(x) && IS_MB2_JIS(y,z)) +#define IS_MB_PREFIX2(x,y) (isujis_ss3(x) && isujis(y)) #define DEFINE_ASIAN_ROUTINES #include "ctype-mb.ic" @@ -219,16 +220,6 @@ static const uchar sort_order_ujis[]= #include "strcoll.ic" -static uint ismbchar_ujis(CHARSET_INFO *cs __attribute__((unused)), - const char* p, const char *e) -{ - return ((*(uchar*)(p)<0x80)? 0:\ - isujis(*(p)) && (e)-(p)>1 && isujis(*((p)+1))? 2:\ - isujis_ss2(*(p)) && (e)-(p)>1 && iskata(*((p)+1))? 2:\ - isujis_ss3(*(p)) && (e)-(p)>2 && isujis(*((p)+1)) && isujis(*((p)+2))? 3:\ - 0); -} - static uint mbcharlen_ujis(CHARSET_INFO *cs __attribute__((unused)),uint c) { return (isujis(c)? 2: isujis_ss2(c)? 2: isujis_ss3(c)? 3: 1); @@ -67264,7 +67255,6 @@ static MY_COLLATION_HANDLER my_collation_ujis_bin_handler = static MY_CHARSET_HANDLER my_charset_handler= { NULL, /* init */ - ismbchar_ujis, mbcharlen_ujis, my_numchars_mb, my_charpos_mb, diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index c0865157ad5..3a5616b7323 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -5426,12 +5426,6 @@ my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2) #include "strcoll.ic" -static uint my_ismbchar_utf8(CHARSET_INFO *cs,const char *b, const char *e) -{ - int res= my_charlen_utf8(cs, (const uchar*) b, (const uchar*) e); - return (res>1) ? res : 0; -} - static uint my_mbcharlen_utf8(CHARSET_INFO *cs __attribute__((unused)), uint c) { @@ -5497,7 +5491,6 @@ static MY_COLLATION_HANDLER my_collation_utf8_bin_handler = MY_CHARSET_HANDLER my_charset_utf8_handler= { NULL, /* init */ - my_ismbchar_utf8, my_mbcharlen_utf8, my_numchars_mb, my_charpos_mb, @@ -5685,8 +5678,7 @@ static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference) + const uchar *t, size_t tlen) { int s_res, t_res, res; my_wc_t s_wc, t_wc; @@ -5695,10 +5687,6 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, int save_diff= 0; MY_UNICASE_INFO *uni_plane= cs->caseinfo; -#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE - diff_if_only_endspace_difference= 0; -#endif - while ( s < se && t < te ) { s_res=my_utf8_uni(cs,&s_wc, s, se); @@ -5729,37 +5717,22 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, slen= se-s; tlen= te-t; - res= 0; - - if (slen != tlen) - { - int swap= 1; - if (diff_if_only_endspace_difference) - res= 1; /* Assume 'a' is bigger */ - if (slen < tlen) - { - slen= tlen; - s= t; - se= te; - swap= -1; - res= -res; - } - /* - This following loop uses the fact that in UTF-8 - all multibyte characters are greater than space, - and all multibyte head characters are greater than - space. It means if we meet a character greater - than space, it always means that the longer string - is greater. So we can reuse the same loop from the - 8bit version, without having to process full multibute - sequences. - */ - for ( ; s < se; s++) - { - if (*s != (uchar) ' ') - return (*s < (uchar) ' ') ? -swap : swap; - } - } + + /* + The following code uses the fact that in UTF-8 + all multibyte characters are greater than space, + and all multibyte head characters are greater than + space. It means if we meet a character greater + than space, it always means that the longer string + is greater. So we can reuse the same loop from the + 8bit version, without having to process full multibute + sequences. + */ + if ((res= slen == tlen ? 0 : + slen < tlen ? + -my_strnncollsp_padspace_bin(t, tlen) : + my_strnncollsp_padspace_bin(s, slen))) + return res; return save_diff; } @@ -7044,15 +7017,6 @@ my_charlen_filename(CHARSET_INFO *cs, const uchar *str, const uchar *end) } -static uint -my_ismbchar_filename(CHARSET_INFO *cs, const char *str, const char *end) -{ - my_wc_t wc; - int rc= my_mb_wc_filename(cs, &wc, (const uchar *) str, (const uchar *) end); - return rc > 1 ? rc : 0; -} - - #define MY_FUNCTION_NAME(x) my_ ## x ## _filename #define CHARLEN(cs,str,end) my_charlen_filename(cs,str,end) #define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN @@ -7081,7 +7045,6 @@ static MY_COLLATION_HANDLER my_collation_filename_handler = static MY_CHARSET_HANDLER my_charset_filename_handler= { NULL, /* init */ - my_ismbchar_filename, my_mbcharlen_utf8, my_numchars_mb, my_charpos_mb, @@ -7793,14 +7756,6 @@ size_t my_well_formed_len_utf8mb4(CHARSET_INFO *cs, static uint -my_ismbchar_utf8mb4(CHARSET_INFO *cs, const char *b, const char *e) -{ - int res= my_charlen_utf8mb4(cs, (const uchar*) b, (const uchar*) e); - return (res > 1) ? res : 0; -} - - -static uint my_mbcharlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), uint c) { if (c < 0x80) @@ -7852,7 +7807,6 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_bin_handler = MY_CHARSET_HANDLER my_charset_utf8mb4_handler= { NULL, /* init */ - my_ismbchar_utf8mb4, my_mbcharlen_utf8mb4, my_numchars_mb, my_charpos_mb, diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c index 8e3527f9ff1..c18733b06ae 100644 --- a/strings/ctype-win1250ch.c +++ b/strings/ctype-win1250ch.c @@ -478,9 +478,7 @@ static int my_strnncoll_win1250ch(CHARSET_INFO *cs __attribute__((unused)), static int my_strnncollsp_win1250ch(CHARSET_INFO * cs, const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference - __attribute__((unused))) + const uchar *t, size_t tlen) { for ( ; slen && s[slen-1] == ' ' ; slen--); for ( ; tlen && t[tlen-1] == ' ' ; tlen--); diff --git a/strings/str2int.c b/strings/str2int.c index ec89503af5e..fe6cd6b793e 100644 --- a/strings/str2int.c +++ b/strings/str2int.c @@ -45,7 +45,7 @@ easy task. Coping with integer overflow and the asymmetric range of twos complement machines is anything but easy. - So that users of atoi and atol can check whether an error occured, + So that users of atoi and atol can check whether an error occurred, I have taken a wholly unprecedented step: errno is CLEARED if this call has no problems. */ diff --git a/strings/strcoll.ic b/strings/strcoll.ic index 4bced593a23..a9693b1f3c0 100644 --- a/strings/strcoll.ic +++ b/strings/strcoll.ic @@ -210,17 +210,13 @@ MY_FUNCTION_NAME(strnncoll)(CHARSET_INFO *cs __attribute__((unused)), @param a_length - the length of the left string @param b - the right string @param b_length - the length of the right string - @param diff_if_only_endspace_difference - not used in the code. - TODO: this should be eventually removed (in 10.2?) @return - the comparison result */ static int MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)), const uchar *a, size_t a_length, - const uchar *b, size_t b_length, - my_bool diff_if_only_endspace_difference - __attribute__((unused))) + const uchar *b, size_t b_length) { const uchar *a_end= a + a_length; const uchar *b_end= b + b_length; @@ -262,6 +258,45 @@ MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)), return 0; } + +#ifdef DEFINE_STRNXFRM +#ifndef WEIGHT_MB2_FRM +#define WEIGHT_MB2_FRM(x,y) WEIGHT_MB2(x,y) +#endif + +static size_t +MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs, + uchar *dst, size_t dstlen, uint nweights, + const uchar *src, size_t srclen, uint flags) +{ + uchar *d0= dst; + uchar *de= dst + dstlen; + const uchar *se= src + srclen; + const uchar *sort_order= cs->sort_order; + + for (; dst < de && src < se && nweights; nweights--) + { + if (my_charlen(cs, (const char *) src, (const char *) se) > 1) + { + /* + Note, it is safe not to check (src < se) + in the code below, because my_charlen() would + not return 2 if src was too short + */ + uint16 e= WEIGHT_MB2_FRM(src[0], src[1]); + *dst++= (uchar) (e >> 8); + if (dst < de) + *dst++= (uchar) (e & 0xFF); + src+= 2; + } + else + *dst++= sort_order ? sort_order[*src++] : *src++; + } + return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0); +} +#endif /* DEFINE_STRNXFRM */ + + /* We usually include this file at least two times from the same source file, for the _ci and the _bin collations. Prepare for the second inclusion. @@ -273,3 +308,5 @@ MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)), #undef WEIGHT_MB3 #undef WEIGHT_MB4 #undef WEIGHT_PAD_SPACE +#undef WEIGHT_MB2_FRM +#undef DEFINE_STRNXFRM |