diff options
author | monty@mysql.com <> | 2004-03-25 15:05:01 +0200 |
---|---|---|
committer | monty@mysql.com <> | 2004-03-25 15:05:01 +0200 |
commit | ebc3b3afac575b2c2a3e89823dfba217436a85c4 (patch) | |
tree | 27e5290a14e7d3e6c7aaced5b2ce86546de4721e /strings | |
parent | 042af556d57f305734e64c7aa047e4709288f5bd (diff) | |
download | mariadb-git-ebc3b3afac575b2c2a3e89823dfba217436a85c4.tar.gz |
BTREE-indexes in HEAP tables can now be used to optimize ORDER BY
Don't read character set files if we are using only the default charset. In most cases the user will not anymore get a warning about missing character set files
Compare strings with space extend instead of space strip. Now the following comparisons holds: "a" == "a " and "a\t" < "a". (Bug #3152).
Note: Because of the above fix, one has to do a REPAIR on any table that has an ascii character < 32 last in a CHAR/VARCHAR/TEXT columns.
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype-big5.c | 90 | ||||
-rw-r--r-- | strings/ctype-czech.c | 297 | ||||
-rw-r--r-- | strings/ctype-gbk.c | 88 | ||||
-rw-r--r-- | strings/ctype-latin1.c | 128 | ||||
-rw-r--r-- | strings/ctype-mb.c | 17 | ||||
-rw-r--r-- | strings/ctype-simple.c | 90 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 116 | ||||
-rw-r--r-- | strings/ctype-tis620.c | 16 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 48 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 8 | ||||
-rw-r--r-- | strings/ctype-win1250ch.c | 4 |
11 files changed, 557 insertions, 345 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index ee55cfda6c1..2bde29ecc47 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -218,40 +218,80 @@ static uint16 big5strokexfrm(uint16 i) return 0xA140; } -static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)), - const uchar * s1, uint len1, - const uchar * s2, uint len2) + + +static int my_strnncoll_big5_internal(const uchar **a_res, + const uchar **b_res, uint length) { - uint len; + const char *a= *a_res, *b= *b_res; - len = min(len1,len2); - while (len--) + while (length--) { - if ((len > 0) && isbig5code(*s1,*(s1+1)) && isbig5code(*s2, *(s2+1))) + if ((length > 0) && isbig5code(*a,*(a+1)) && isbig5code(*b, *(b+1))) { - if (*s1 != *s2 || *(s1+1) != *(s2+1)) - return ((int) big5code(*s1,*(s1+1)) - - (int) big5code(*s2,*(s2+1))); - s1 +=2; - s2 +=2; - len--; - } else if (sort_order_big5[(uchar) *s1++] != sort_order_big5[(uchar) *s2++]) - return ((int) sort_order_big5[(uchar) s1[-1]] - - (int) sort_order_big5[(uchar) s2[-1]]); + if (*a != *b || *(a+1) != *(b+1)) + return ((int) big5code(*a,*(a+1)) - + (int) big5code(*b,*(b+1))); + a+= 2; + b+= 2; + length--; + } + else if (sort_order_big5[(uchar) *a++] != + sort_order_big5[(uchar) *b++]) + return ((int) sort_order_big5[(uchar) a[-1]] - + (int) sort_order_big5[(uchar) b[-1]]); } - return (int) (len1-len2); + *a_res= a; + *b_res= b; + return 0; } -static -int my_strnncollsp_big5(CHARSET_INFO * cs, - const uchar *s, uint slen, - const uchar *t, uint tlen) + +/* Compare strings */ + +static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) { - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - return my_strnncoll_big5(cs,s,slen,t,tlen); + uint length= min(a_length, b_length); + int res= my_strnncoll_big5_internal(&a, &b, length); + return res ? res : (int) (a_length - b_length); } + +/* compare strings, ignore end space */ + +static int my_strnncollsp_big5(CHARSET_INFO * cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) +{ + uint length= min(a_length, b_length); + int res= my_strnncoll_big5_internal(&a, &b, length); + if (!res && a_length != b_length) + { + const uchar *end; + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a_length < b_length) + { + /* put shorter key in a */ + a_length= b_length; + a= b; + swap= -1; /* swap sign of result */ + } + for (end= a + a_length-length; a < end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return res; +} + + static int my_strnxfrm_big5(CHARSET_INFO *cs __attribute__((unused)), uchar * dest, uint len, const uchar * src, uint srclen) @@ -377,7 +417,7 @@ static my_bool my_like_range_big5(CHARSET_INFO *cs __attribute__((unused)), *min_length= (uint) (min_str-min_org); *max_length= res_length; do { - *min_str++ = ' '; /* Because if key compression */ + *min_str++ = 0; *max_str++ = max_sort_char; } while (min_str != min_end); return 0; diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c index ed8c0b5b415..5094a7c45da 100644 --- a/strings/ctype-czech.c +++ b/strings/ctype-czech.c @@ -165,169 +165,144 @@ static struct wordvalue doubles[] = { Na konci připojíme znak 0 */ -#define ADD_TO_RESULT(dest, len, totlen, value) \ - if ((totlen) < (len)) { dest[totlen] = value; } (totlen++); - -#define NEXT_CMP_VALUE(src, p, store, pass, value, len) \ - while (1) /* we will make a loop */ \ - { \ - if (IS_END(p, src, len)) \ - /* when we are at the end of string */ \ - { /* return either 0 for end of string */ \ - /* or 1 for end of pass */ \ - if (pass == 3) { value = 0; break; } \ - if (pass == 0) p = store; \ - else p = src; \ - value = 1; pass++; break; \ - } \ - /* not at end of string */ \ - value = CZ_SORT_TABLE[pass][*p]; \ - \ - if (value == 0) { p++; continue; } /* ignore value */ \ - if (value == 2) /* space */ \ - { \ - const uchar * tmp; \ - const uchar * runner = ++p; \ - while (!(IS_END(runner, src, len)) && (CZ_SORT_TABLE[pass][*runner] == 2)) \ - runner++; /* skip all spaces */ \ - if (IS_END(runner, src, len) && SKIP_TRAILING_SPACES) \ - p = runner; \ - if ((pass <= 2) && !(IS_END(runner, src, len))) \ - p = runner; \ - if (IS_END(p, src, len)) \ - continue; \ - /* we switch passes */ \ - if (pass > 1) \ - break; \ - tmp = p; \ - if (pass == 0) pass = 1; \ - else pass = 0; \ - p = store; store = tmp; \ - break; \ - } \ - if (value == 255) \ - { \ - int i; \ - for (i = 0; i < (int) sizeof(doubles); i++) \ - { \ - const char * pattern = doubles[i].word; \ - const char * q = (const char *) p; \ - int j = 0; \ - while (pattern[j]) \ - { \ - if (IS_END(q, src, len) || (*q != pattern[j])) \ - { break ; } \ - j++; q++; \ - } \ - if (!(pattern[j])) \ - { \ - value = (int)(doubles[i].outvalue[pass]); \ - p = (const uchar *) q - 1; \ - break; \ - } \ - } \ - } \ - p++; \ - break; \ - } - -#define IS_END(p, src, len) (!(*p)) - -#if 0 -/* Function strcoll, with Czech sorting, for zero terminated strings */ -static int my_strcoll_czech(const uchar * s1, const uchar * s2) - { - int v1, v2; - const uchar * p1, * p2, * store1, * store2; - int pass1 = 0, pass2 = 0; - int diff; - - p1 = s1; p2 = s2; - store1 = s1; store2 = s2; - - do - { - NEXT_CMP_VALUE(s1, p1, store1, pass1, v1, 0); - NEXT_CMP_VALUE(s2, p2, store2, pass2, v2, 0); - diff = v1 - v2; - if (diff != 0) return diff; - } - while (v1); - return 0; - } -#endif +#define ADD_TO_RESULT(dest, len, totlen, value) \ +if ((totlen) < (len)) { dest[totlen] = value; } (totlen++); +#define IS_END(p, src, len) (((char *)p - (char *)src) >= (len)) -#if 0 -/* Function strxfrm, with Czech sorting, for zero terminated strings */ -static int my_strxfrm_czech(uchar * dest, const uchar * src, int len) +#define NEXT_CMP_VALUE(src, p, store, pass, value, len) \ +while (1) \ +{ \ + if (IS_END(p, src, len)) \ + { \ + /* when we are at the end of string */ \ + /* return either 0 for end of string */ \ + /* or 1 for end of pass */ \ + value= 0; \ + if (pass != 3) \ + { \ + p= (pass++ == 0) ? store : src; \ + value = 1; \ + } \ + break; \ + } \ + /* not at end of string */ \ + value = CZ_SORT_TABLE[pass][*p]; \ + if (value == 0) \ + { p++; continue; } /* ignore value */ \ + if (value == 2) /* space */ \ + { \ + const uchar * tmp; \ + const uchar * runner = ++p; \ + while (!(IS_END(runner, src, len)) && (CZ_SORT_TABLE[pass][*runner] == 2)) \ + runner++; /* skip all spaces */ \ + if (IS_END(runner, src, len) && SKIP_TRAILING_SPACES) \ + p = runner; \ + if ((pass <= 2) && !(IS_END(runner, src, len))) \ + p = runner; \ + if (IS_END(p, src, len)) \ + continue; \ + /* we switch passes */ \ + if (pass > 1) \ + break; \ + tmp = p; \ + pass= 1-pass; \ + p = store; store = tmp; \ + break; \ + } \ + if (value == 255) \ + { \ + int i; \ + for (i = 0; i < (int) sizeof(doubles); i++) \ + { \ + const char * pattern = doubles[i].word; \ + const char * q = (const char *) p; \ + int j = 0; \ + while (pattern[j]) \ + { \ + if (IS_END(q, src, len) || (*q != pattern[j])) \ + break; \ + j++; q++; \ + } \ + if (!(pattern[j])) \ + { \ + value = (int)(doubles[i].outvalue[pass]); \ + p= (const uchar *) q - 1; \ + break; \ + } \ + } \ + } \ + p++; \ + break; \ +} + +/* + Function strnncoll, actually strcoll, with Czech sorting, which expect + the length of the strings being specified +*/ + +static int my_strnncoll_czech(CHARSET_INFO *cs __attribute__((unused)), + const uchar * s1, uint len1, + const uchar * s2, uint len2) { - int value; - const uchar * p, * store; - int pass = 0; - int totlen = 0; - p = store = src; - - do - { - NEXT_CMP_VALUE(src, p, store, pass, value, 0); - ADD_TO_RESULT(dest, len, totlen, value); - } - while (value); - return totlen; - } -#endif + int v1, v2; + const uchar * p1, * p2, * store1, * store2; + int pass1 = 0, pass2 = 0; + p1 = s1; p2 = s2; + store1 = s1; store2 = s2; -#undef IS_END + do + { + int diff; + NEXT_CMP_VALUE(s1, p1, store1, pass1, v1, (int)len1); + NEXT_CMP_VALUE(s2, p2, store2, pass2, v2, (int)len2); + if ((diff = v1 - v2)) + return diff; + } + while (v1); + return 0; +} -#define IS_END(p, src, len) (((char *)p - (char *)src) >= (len)) -/* Function strnncoll, actually strcoll, with Czech sorting, which expect - the length of the strings being specified */ -static int my_strnncoll_czech(CHARSET_INFO *cs __attribute__((unused)), - const uchar * s1, uint len1, - const uchar * s2, uint len2) - { - int v1, v2; - const uchar * p1, * p2, * store1, * store2; - int pass1 = 0, pass2 = 0; - int diff; - - p1 = s1; p2 = s2; - store1 = s1; store2 = s2; - - do - { - NEXT_CMP_VALUE(s1, p1, store1, pass1, v1, (int)len1); - NEXT_CMP_VALUE(s2, p2, store2, pass2, v2, (int)len2); - diff = v1 - v2; - - if (diff != 0) return diff; - } - while (v1); - return 0; - } - -/* Function strnxfrm, actually strxfrm, with Czech sorting, which expect - the length of the strings being specified */ + +/* + TODO: Fix this one to compare strings as they are done in ctype-simple1 +*/ + +static +int my_strnncollsp_czech(CHARSET_INFO * cs, + const uchar *s, uint slen, + const uchar *t, uint tlen) +{ + for ( ; slen && s[slen-1] == ' ' ; slen--); + for ( ; tlen && t[tlen-1] == ' ' ; tlen--); + return my_strnncoll_czech(cs,s,slen,t,tlen); +} + + +/* + Function strnxfrm, actually strxfrm, with Czech sorting, which expect + the length of the strings being specified +*/ + static int my_strnxfrm_czech(CHARSET_INFO *cs __attribute__((unused)), - uchar * dest, uint len, - const uchar * src, uint srclen) - { - int value; - const uchar * p, * store; - int pass = 0; - int totlen = 0; - p = src; store = src; - - do - { - NEXT_CMP_VALUE(src, p, store, pass, value, (int)srclen); - ADD_TO_RESULT(dest, (int)len, totlen, value); - } - while (value); - return totlen; - } + uchar * dest, uint len, + const uchar * src, uint srclen) +{ + int value; + const uchar * p, * store; + int pass = 0; + int totlen = 0; + p = src; store = src; + + do + { + NEXT_CMP_VALUE(src, p, store, pass, value, (int)srclen); + ADD_TO_RESULT(dest, (int)len, totlen, value); + } + while (value); + return totlen; +} #undef IS_END @@ -595,16 +570,6 @@ static MY_UNI_IDX idx_uni_8859_2[]={ }; -static -int my_strnncollsp_czech(CHARSET_INFO * cs, - const uchar *s, uint slen, - const uchar *t, uint tlen) -{ - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - return my_strnncoll_czech(cs,s,slen,t,tlen); -} - static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler = { my_strnncoll_czech, diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index 98511406ba9..1990060e67b 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -2582,40 +2582,74 @@ static uint16 gbksortorder(uint16 i) } -int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)), - const uchar * s1, uint len1, - const uchar * s2, uint len2) +int my_strnncoll_gbk_internal(const uchar **a_res, const uchar **b_res, + uint length) { - uint len,c1,c2; + const char *a= *a_res, *b= *b_res; + uint a_char,b_char; - len = min(len1,len2); - while (len--) + while (length--) { - if ((len > 0) && isgbkcode(*s1,*(s1+1)) && isgbkcode(*s2, *(s2+1))) + if ((length > 0) && isgbkcode(*a,*(a+1)) && isgbkcode(*b, *(b+1))) { - c1=gbkcode(*s1,*(s1+1)); - c2=gbkcode(*s2,*(s2+1)); - if (c1!=c2) - return ((int) gbksortorder((uint16) c1) - - (int) gbksortorder((uint16) c2)); - s1+=2; - s2+=2; - --len; - } else if (sort_order_gbk[(uchar) *s1++] != sort_order_gbk[(uchar) *s2++]) - return ((int) sort_order_gbk[(uchar) s1[-1]] - - (int) sort_order_gbk[(uchar) s2[-1]]); + a_char= gbkcode(*a,*(a+1)); + b_char= gbkcode(*b,*(b+1)); + if (a_char != b_char) + return ((int) gbksortorder((uint16) a_char) - + (int) gbksortorder((uint16) b_char)); + a+= 2; + b+= 2; + length--; + } + else if (sort_order_gbk[(uchar) *a++] != sort_order_gbk[(uchar) *b++]) + return ((int) sort_order_gbk[(uchar) a[-1]] - + (int) sort_order_gbk[(uchar) b[-1]]); } - return (int) (len1-len2); + *a_res= a; + *b_res= b; + return 0; } -static -int my_strnncollsp_gbk(CHARSET_INFO * cs, - const uchar *s, uint slen, - const uchar *t, uint tlen) + + +int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) { - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - return my_strnncoll_gbk(cs,s,slen,t,tlen); + uint length= min(a_length, b_length); + int res= my_strnncoll_gbk_internal(&a, &b, length); + return res ? res : (int) (a_length - b_length); +} + + +static int my_strnncollsp_gbk(CHARSET_INFO * cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) +{ + uint length= min(a_length, b_length); + int res= my_strnncoll_gbk_internal(&a, &b, length); + if (!res && a_length != b_length) + { + const uchar *end; + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a_length < b_length) + { + /* put shorter key in a */ + a_length= b_length; + a= b; + swap= -1; /* swap sign of result */ + } + for (end= a + a_length-length; a < end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return res; } @@ -2696,7 +2730,7 @@ static my_bool my_like_range_gbk(CHARSET_INFO *cs __attribute__((unused)), *min_length= (uint) (min_str - min_org); *max_length= res_length; do { - *min_str++ = '\0'; /* Because if key compression */ + *min_str++= 0; *max_str++ = max_sort_char; } while (min_str != min_end); return 0; diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index 0682b15d135..7a010c3bef8 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -319,51 +319,105 @@ uchar combo2map[]={ static int my_strnncoll_latin1_de(CHARSET_INFO *cs __attribute__((unused)), - const uchar *s1, uint len1, - const uchar *s2, uint len2) + const uchar *a, uint a_length, + const uchar *b, uint b_length) { - const uchar *e1 = s1 + len1; - const uchar *e2 = s2 + len2; - uchar c1, c12=0, c2, c22=0; + const uchar *a_end= a + a_length; + const uchar *b_end= b + b_length; + uchar a_char, a_extend= 0, b_char, b_extend= 0; - while ((s1 < e1 || c12) && (s2 < e2 || c22)) + while ((a < a_end || a_extend) && (b < b_end || b_extend)) { - if (c12) + if (a_extend) { - c1=c12; c12=0; + a_char=a_extend; a_extend=0; } else { - c12=combo2map[*s1]; - c1=combo1map[*s1++]; + a_extend=combo2map[*a]; + a_char=combo1map[*a++]; } - if (c22) + if (b_extend) { - c2=c22; c22=0; + b_char=b_extend; b_extend=0; } else { - c22=combo2map[*s2]; - c2=combo1map[*s2++]; + b_extend=combo2map[*b]; + b_char=combo1map[*b++]; } - if (c1 != c2) return (int)c1 - (int)c2; + if (a_char != b_char) + return (int) a_char - (int) b_char; } - /* A simple test of string lengths won't work -- we test to see which string ran out first */ - return (s1 < e1 || c12) ? 1 : (s2 < e2 || c22) ? -1 : 0; + return ((a < a_end || a_extend) ? 1 : + (b < b_end || b_extend) ? -1 : 0); } -static int my_strnncollsp_latin1_de(CHARSET_INFO *cs, - const uchar *s, uint slen, - const uchar *t, uint tlen) +static int my_strnncollsp_latin1_de(CHARSET_INFO *cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) { - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - return my_strnncoll_latin1_de(cs,s,slen,t,tlen); + const uchar *a_end= a + a_length; + const uchar *b_end= b + b_length; + uchar a_char, a_extend= 0, b_char, b_extend= 0; + + while ((a < a_end || a_extend) && (b < b_end || b_extend)) + { + if (a_extend) + { + a_char=a_extend; + a_extend= 0; + } + else + { + a_extend= combo2map[*a]; + a_char= combo1map[*a++]; + } + if (b_extend) + { + b_char= b_extend; + b_extend= 0; + } + else + { + b_extend= combo2map[*b]; + b_char= combo1map[*b++]; + } + if (a_char != b_char) + return (int) a_char - (int) b_char; + } + /* Check if double character last */ + if (a_extend) + return 1; + if (b_extend) + return -1; + + if (a != a_end || b != b_end) + { + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a == a_end) + { + /* put shorter key in a */ + a_end= b_end; + a= b; + swap= -1; /* swap sign of result */ + } + for ( ; a < a_end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return 0; } @@ -385,6 +439,32 @@ static int my_strnxfrm_latin1_de(CHARSET_INFO *cs __attribute__((unused)), } +void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)), + const uchar *key, uint len, + ulong *nr1, ulong *nr2) +{ + const uchar *end= key+len; + /* + Remove end space. We have to do this to be able to compare + 'AE' and 'Ä' as identical + */ + while (end > key && end[-1] == ' ') + end--; + + for (; key < end ; key++) + { + uint X= (uint) combo1map[(uint) *key]; + nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8); + nr2[0]+=3; + if ((X= combo2map[*key])) + { + nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8); + nr2[0]+=3; + } + } +} + + static MY_COLLATION_HANDLER my_collation_german2_ci_handler= { my_strnncoll_latin1_de, @@ -394,7 +474,7 @@ static MY_COLLATION_HANDLER my_collation_german2_ci_handler= my_wildcmp_8bit, my_strcasecmp_8bit, my_instr_simple, - my_hash_sort_simple + my_hash_sort_latin1_de }; diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 2f7cf698664..ed772a68845 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -347,6 +347,7 @@ uint my_instr_mb(CHARSET_INFO *cs, return 0; } + /* BINARY collations handlers for MB charsets */ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)), @@ -357,20 +358,6 @@ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)), return cmp ? cmp : (int) (slen - tlen); } -static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), - const uchar *s, uint slen, - const uchar *t, uint tlen) -{ - int len, cmp; - - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - - len = ( slen > tlen ) ? tlen : slen; - - cmp= memcmp(s,t,len); - return cmp ? cmp : (int) (slen - tlen); -} static int my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)), uchar * dest, uint len, @@ -526,7 +513,7 @@ static int my_wildcmp_mb_bin(CHARSET_INFO *cs, MY_COLLATION_HANDLER my_collation_mb_bin_handler = { my_strnncoll_mb_bin, - my_strnncollsp_mb_bin, + my_strnncoll_mb_bin, my_strnxfrm_mb_bin, my_like_range_simple, my_wildcmp_mb_bin, diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index 0aae60a0b56..c8eb3c07a3f 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -60,25 +60,69 @@ int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, uint slen, } -int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *s, uint slen, - const uchar *t, uint tlen) +/* + Compare strings, discarding end space + + SYNOPSIS + my_strnncollsp_simple() + cs character set handler + a First string to compare + a_length Length of 'a' + b Second string to compare + b_length Length of 'b' + + IMPLEMENTATION + If one string is shorter as the other, then we space extend the other + so that the strings have equal length. + + This will ensure that the following things hold: + + "a" == "a " + "a\0" < "a" + "a\0" < "a " + + RETURN + < 0 a < b + = 0 a == b + > 0 a > b +*/ + +int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, uint a_length, + const uchar *b, uint b_length) { - uchar *map= cs->sort_order; - int len; - - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - - len = ( slen > tlen ) ? tlen : slen; - - while (len--) + const uchar *map= cs->sort_order, *end; + uint length; + + end= a + (length= min(a_length, b_length)); + while (a < end) { - if (map[*s++] != map[*t++]) - return ((int) map[s[-1]] - (int) map[t[-1]]); + if (map[*a++] != map[*b++]) + return ((int) map[a[-1]] - (int) map[b[-1]]); } - return (int) (slen-tlen); + if (a_length != b_length) + { + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a_length < b_length) + { + /* put shorter key in s */ + a_length= b_length; + a= b; + swap= -1; /* swap sign of result */ + } + for (end= a + a_length-length; a < end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return 0; } + void my_caseup_str_8bit(CHARSET_INFO * cs,char *str) { register uchar *map=cs->to_upper; @@ -169,8 +213,8 @@ int my_snprintf_8bit(CHARSET_INFO *cs __attribute__((unused)), void my_hash_sort_simple(CHARSET_INFO *cs, - const uchar *key, uint len, - ulong *nr1, ulong *nr2) + const uchar *key, uint len, + ulong *nr1, ulong *nr2) { register uchar *sort_order=cs->sort_order; const uchar *pos = key; @@ -953,9 +997,10 @@ my_bool my_like_range_simple(CHARSET_INFO *cs, { *min_length= (uint) (min_str - min_org); *max_length=res_length; - do { - *min_str++ = ' '; /* Because if key compression */ - *max_str++ = (char) cs->max_sort_char; + do + { + *min_str++= 0; + *max_str++= (char) cs->max_sort_char; } while (min_str != min_end); return 0; } @@ -963,13 +1008,6 @@ my_bool my_like_range_simple(CHARSET_INFO *cs, } *min_length= *max_length = (uint) (min_str - min_org); - /* Temporary fix for handling w_one at end of string (key compression) */ - { - char *tmp; - for (tmp= min_str ; tmp > min_org && tmp[-1] == '\0';) - *--tmp=' '; - } - while (min_str != min_end) *min_str++ = *max_str++ = ' '; /* Because if key compression */ return 0; diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index c337b8122fb..91a24fa8bee 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -184,7 +184,7 @@ static uchar NEAR sort_order_sjis[]= static int ismbchar_sjis(CHARSET_INFO *cs __attribute__((unused)), - const char* p, const char *e) + const char* p, const char *e) { return (issjishead((uchar) *p) && (e-p)>1 && issjistail((uchar)p[1]) ? 2: 0); } @@ -197,59 +197,101 @@ static int mbcharlen_sjis(CHARSET_INFO *cs __attribute__((unused)),uint c) #define sjiscode(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d)) -static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)), - const uchar *s1, uint len1, - const uchar *s2, uint len2) + +static int my_strnncoll_sjis_internal(CHARSET_INFO *cs, + const uchar **a_res, uint a_length, + const uchar **b_res, uint b_length) { - const uchar *e1 = s1 + len1; - const uchar *e2 = s2 + len2; - while (s1 < e1 && s2 < e2) { - if (ismbchar_sjis(cs,(char*) s1, (char*) e1) && - ismbchar_sjis(cs,(char*) s2, (char*) e2)) { - uint c1 = sjiscode(*s1, *(s1+1)); - uint c2 = sjiscode(*s2, *(s2+1)); - if (c1 != c2) - return c1 - c2; - s1 += 2; - s2 += 2; - } else { - if (sort_order_sjis[(uchar)*s1] != sort_order_sjis[(uchar)*s2]) - return sort_order_sjis[(uchar)*s1] - sort_order_sjis[(uchar)*s2]; - s1++; - s2++; + const uchar *a= *a_res, *b= *b_res; + const uchar *a_end= a + a_length; + const uchar *b_end= b + b_length; + while (a < a_end && b < b_end) + { + if (ismbchar_sjis(cs,(char*) a, (char*) a_end) && + ismbchar_sjis(cs,(char*) b, (char*) b_end)) + { + uint a_char= sjiscode(*a, *(a+1)); + uint b_char= sjiscode(*b, *(b+1)); + if (a_char != b_char) + return a_char - b_char; + a += 2; + b += 2; + } else + { + if (sort_order_sjis[(uchar)*a] != sort_order_sjis[(uchar)*b]) + return sort_order_sjis[(uchar)*a] - sort_order_sjis[(uchar)*b]; + a++; + b++; } } - return len1 - len2; + *a_res= a; + *b_res= b; + return 0; +} + + +static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) +{ + int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length); + return res ? res : (int) (a_length - b_length); } -static -int my_strnncollsp_sjis(CHARSET_INFO * cs, - const uchar *s, uint slen, - const uchar *t, uint tlen) + +static int my_strnncollsp_sjis(CHARSET_INFO *cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) { - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - return my_strnncoll_sjis(cs,s,slen,t,tlen); + const uchar *a_end= a + a_length; + const uchar *b_end= b + b_length; + int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length); + if (!res && (a != a_end || b != b_end)) + { + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a == a_end) + { + /* put shorter key in a */ + a_end= b_end; + a= b; + swap= -1; /* swap sign of result */ + } + for (; a < a_end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return res; } + + static int my_strnxfrm_sjis(CHARSET_INFO *cs __attribute__((unused)), uchar *dest, uint len, const uchar *src, uint srclen) { uchar *d_end = dest + len; uchar *s_end = (uchar*) src + srclen; - while (dest < d_end && src < s_end) { - if (ismbchar_sjis(cs,(char*) src, (char*) s_end)) { + while (dest < d_end && src < s_end) + { + if (ismbchar_sjis(cs,(char*) src, (char*) s_end)) + { *dest++ = *src++; if (dest < d_end && src < s_end) *dest++ = *src++; - } else { - *dest++ = sort_order_sjis[(uchar)*src++]; } + else + *dest++ = sort_order_sjis[(uchar)*src++]; } return srclen; } + /* ** Calculate min_str and max_str that ranges a LIKE string. ** Arguments: @@ -300,12 +342,14 @@ static my_bool my_like_range_sjis(CHARSET_INFO *cs __attribute__((unused)), ptr++; continue; } - if (*ptr == w_many) { /* '%' in SQL */ + if (*ptr == w_many) + { /* '%' in SQL */ *min_length = (uint)(min_str - min_org); *max_length = res_length; - do { - *min_str++ = ' '; /* Because if key compression */ - *max_str++ = max_sort_char; + do + { + *min_str++= 0; + *max_str++= max_sort_char; } while (min_str < min_end); return 0; } diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index 59be820863a..954a3768536 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -518,6 +518,10 @@ static uint thai2sortable(uchar *tstr, uint len) strncoll() replacement, compare 2 string, both are converted to sortable string + NOTE: + We can't cut strings at end \0 as this would break comparision with + LIKE characters, where the min range is stored as end \0 + Arg: 2 Strings and it compare length Ret: strcmp result */ @@ -530,9 +534,6 @@ int my_strnncoll_tis620(CHARSET_INFO *cs __attribute__((unused)), uchar *tc1, *tc2; int i; - /* Cut strings at end \0 */ - len1= (int) strnlen((char*) s1,len1); - len2= (int) strnlen((char*) s2,len2); tc1= buf; if ((len1 + len2 +2) > (int) sizeof(buf)) tc1= (uchar*) malloc(len1+len2); @@ -550,6 +551,10 @@ int my_strnncoll_tis620(CHARSET_INFO *cs __attribute__((unused)), } +/* + TODO: Has to be fixed like strnncollsp in ctype-simple.c +*/ + static int my_strnncollsp_tis620(CHARSET_INFO * cs, const uchar *s, uint slen, @@ -637,8 +642,9 @@ my_bool my_like_range_tis620(CHARSET_INFO *cs __attribute__((unused)), { *min_length= (uint) (min_str - min_org); *max_length=res_length; - do { - *min_str++ = ' '; /* Because of key compression */ + do + { + *min_str++ = 0; *max_str++ = max_sort_chr; } while (min_str != min_end); return 0; diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index f4c1a22939a..7d32dcb1b61 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -90,8 +90,8 @@ static uchar to_upper_ucs2[] = { }; -static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)), - my_wc_t * pwc, const uchar *s, const uchar *e) +static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t * pwc, const uchar *s, const uchar *e) { if (s+2 > e) /* Need 2 characters */ return MY_CS_TOOFEW(0); @@ -100,8 +100,8 @@ static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)), return 2; } -static int my_uni_ucs2 (CHARSET_INFO *cs __attribute__((unused)) , - my_wc_t wc, uchar *r, uchar *e) +static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) , + my_wc_t wc, uchar *r, uchar *e) { if ( r+2 > e ) return MY_CS_TOOSMALL; @@ -128,13 +128,15 @@ static void my_caseup_ucs2(CHARSET_INFO *cs, char *s, uint slen) } } -static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, ulong *n1, ulong *n2) + +static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, + ulong *n1, ulong *n2) { my_wc_t wc; int res; const uchar *e=s+slen; - while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 ) + while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0) { int plane = (wc>>8) & 0xFF; wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc; @@ -148,7 +150,7 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, ulong static void my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)), - char * s __attribute__((unused))) + char * s __attribute__((unused))) { } @@ -173,13 +175,14 @@ static void my_casedn_ucs2(CHARSET_INFO *cs, char *s, uint slen) } static void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)), - char * s __attribute__((unused))) + char * s __attribute__((unused))) { } static int my_strnncoll_ucs2(CHARSET_INFO *cs, - const uchar *s, uint slen, const uchar *t, uint tlen) + const uchar *s, uint slen, + const uchar *t, uint tlen) { int s_res,t_res; my_wc_t s_wc,t_wc; @@ -213,8 +216,9 @@ static int my_strnncoll_ucs2(CHARSET_INFO *cs, return ( (se-s) - (te-t) ); } + static int my_strncasecmp_ucs2(CHARSET_INFO *cs, - const char *s, const char *t, uint len) + const char *s, const char *t, uint len) { int s_res,t_res; my_wc_t s_wc,t_wc; @@ -249,6 +253,7 @@ static int my_strncasecmp_ucs2(CHARSET_INFO *cs, return ( (se-s) - (te-t) ); } + static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t) { uint s_len=strlen(s); @@ -257,6 +262,7 @@ static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t) return my_strncasecmp_ucs2(cs, s, t, len); } + static int my_strnxfrm_ucs2(CHARSET_INFO *cs, uchar *dst, uint dstlen, const uchar *src, uint srclen) { @@ -288,6 +294,7 @@ static int my_strnxfrm_ucs2(CHARSET_INFO *cs, return dst - dst_orig; } + static int my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)), const char *b __attribute__((unused)), const char *e __attribute__((unused))) @@ -295,6 +302,7 @@ static int my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)), return 2; } + static int my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) , uint c __attribute__((unused))) { @@ -380,8 +388,8 @@ static int my_vsnprintf_ucs2(char *dst, uint n, const char* fmt, va_list ap) return (uint) (dst - start); } -static int my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused)) - ,char* to, uint n, const char* fmt, ...) +static int my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused)), + char* to, uint n, const char* fmt, ...) { va_list args; va_start(args,fmt); @@ -389,9 +397,9 @@ static int my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused)) } -long my_strntol_ucs2(CHARSET_INFO *cs, - const char *nptr, uint l, int base, - char **endptr, int *err) +long my_strntol_ucs2(CHARSET_INFO *cs, + const char *nptr, uint l, int base, + char **endptr, int *err) { int negative=0; int overflow; @@ -504,9 +512,9 @@ bs: } -ulong my_strntoul_ucs2(CHARSET_INFO *cs, - const char *nptr, uint l, int base, - char **endptr, int *err) +ulong my_strntoul_ucs2(CHARSET_INFO *cs, + const char *nptr, uint l, int base, + char **endptr, int *err) { int negative=0; int overflow; @@ -1334,8 +1342,8 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, *min_length= (uint) (min_str - min_org); *max_length=res_length; do { - *min_str++ = '\0'; - *min_str++ = ' '; /* Because if key compression */ + *min_str++ = 0; + *min_str++ = 0; *max_str++ = (char) cs->max_sort_char >>8; *max_str++ = (char) cs->max_sort_char & 255; } while (min_str + 1 < min_end); diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index dca73e5a79f..886ecfbd0c9 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -1801,7 +1801,8 @@ static void my_casedn_str_utf8(CHARSET_INFO *cs, char * s) static int my_strnncoll_utf8(CHARSET_INFO *cs, - const uchar *s, uint slen, const uchar *t, uint tlen) + const uchar *s, uint slen, + const uchar *t, uint tlen) { int s_res,t_res; my_wc_t s_wc,t_wc; @@ -1835,6 +1836,11 @@ static int my_strnncoll_utf8(CHARSET_INFO *cs, return ( (se-s) - (te-t) ); } + +/* + TODO: Has to be fixed as strnncollsp in ctype-simple +*/ + static int my_strnncollsp_utf8(CHARSET_INFO * cs, const uchar *s, uint slen, diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c index bda349f1988..8fd4e612713 100644 --- a/strings/ctype-win1250ch.c +++ b/strings/ctype-win1250ch.c @@ -467,6 +467,10 @@ static int my_strnncoll_win1250ch(CHARSET_INFO *cs __attribute__((unused)), } +/* + TODO: Has to be fixed as strnncollsp in ctype-simple +*/ + static int my_strnncollsp_win1250ch(CHARSET_INFO * cs, const uchar *s, uint slen, |