diff options
Diffstat (limited to 'strings/ctype-ucs2.c')
-rw-r--r-- | strings/ctype-ucs2.c | 827 |
1 files changed, 147 insertions, 680 deletions
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index d1441a4d3a5..e7ba5cbc3c3 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1186,11 +1186,14 @@ my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)), #define MY_UTF16_SURROGATE_LOW_FIRST 0xDC00 #define MY_UTF16_SURROGATE_LOW_LAST 0xDFFF -#define MY_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8) -#define MY_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC) -#define MY_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800) +#define MY_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8) +#define MY_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC) +/* Test if a byte is a leading byte of a high or low surrogate head: */ +#define MY_UTF16_SURROGATE_HEAD(x) ((((uchar) (x)) & 0xF8) == 0xD8) +/* Test if a Unicode code point is a high or low surrogate head */ +#define MY_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800) -#define MY_UTF16_WC2(a, b) ((a << 8) + b) +#define MY_UTF16_WC2(a, b) ((a << 8) + b) /* a= 110110?? (<< 18) @@ -1201,6 +1204,30 @@ my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)), #define MY_UTF16_WC4(a, b, c, d) (((a & 3) << 18) + (b << 10) + \ ((c & 3) << 8) + d + 0x10000) +#define IS_MB2_CHAR(b0,b1) (!MY_UTF16_SURROGATE_HEAD(b0)) +#define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b0) && MY_UTF16_LOW_HEAD(b2)) + +static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1) +{ + my_wc_t wc= MY_UTF16_WC2(b0, b1); + MY_UNICASE_CHARACTER *page= my_unicase_default.page[wc >> 8]; + return (int) (page ? page[wc & 0xFF].sort : wc); +} +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_general_ci +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1) +#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER +#include "strcoll.ic" + +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_bin +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b0, b1)) +#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b0, b1, b2, b3)) +#include "strcoll.ic" + +#undef IS_MB2_CHAR +#undef IS_MB4_CHAR + static int my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)), my_wc_t *pwc, const uchar *s, const uchar *e) @@ -1371,146 +1398,6 @@ my_casedn_utf16(CHARSET_INFO *cs, char *src, size_t srclen, } -static int -my_strnncoll_utf16(CHARSET_INFO *cs, - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool t_is_prefix) -{ - int s_res, t_res; - my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc); - my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; - const uchar *se= s + slen; - const uchar *te= t + tlen; - MY_UNICASE_INFO *uni_plane= cs->caseinfo; - - while (s < se && t < te) - { - s_res= mb_wc(cs, &s_wc, s, se); - t_res= mb_wc(cs, &t_wc, t, te); - - if (s_res <= 0 || t_res <= 0) - { - /* Incorrect string, compare by char value */ - return my_bincmp(s, se, t, te); - } - - my_tosort_utf16(uni_plane, &s_wc); - my_tosort_utf16(uni_plane, &t_wc); - - if (s_wc != t_wc) - { - return s_wc > t_wc ? 1 : -1; - } - - s+= s_res; - t+= t_res; - } - return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t))); -} - - -/** - Compare strings, discarding end space - - If one string is shorter as the other, then we space extend the other - so that the strings have equal length. - - This will ensure that the following things hold: - - "a" == "a " - "a\0" < "a" - "a\0" < "a " - - @param cs Character set pinter. - @param a First string to compare. - @param a_length Length of 'a'. - @param b Second string to compare. - @param b_length Length of 'b'. - - IMPLEMENTATION - - @return Comparison result. - @retval Negative number, if a less than b. - @retval 0, if a is equal to b - @retval Positive number, if a > b -*/ - -static int -my_strnncollsp_utf16(CHARSET_INFO *cs, - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference) -{ - int res; - my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc); - my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; - const uchar *se= s + slen, *te= t + tlen; - MY_UNICASE_INFO *uni_plane= cs->caseinfo; - - DBUG_ASSERT((slen % 2) == 0); - DBUG_ASSERT((tlen % 2) == 0); - -#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE - diff_if_only_endspace_difference= FALSE; -#endif - - while (s < se && t < te) - { - int s_res= mb_wc(cs, &s_wc, s, se); - int t_res= mb_wc(cs, &t_wc, t, te); - - if (s_res <= 0 || t_res <= 0) - { - /* Incorrect string, compare bytewise */ - return my_bincmp(s, se, t, te); - } - - my_tosort_utf16(uni_plane, &s_wc); - my_tosort_utf16(uni_plane, &t_wc); - - if (s_wc != t_wc) - { - return s_wc > t_wc ? 1 : -1; - } - - s+= s_res; - t+= t_res; - } - - slen= (size_t) (se - s); - tlen= (size_t) (te - t); - res= 0; - - if (slen != tlen) - { - int s_res, swap= 1; - if (diff_if_only_endspace_difference) - res= 1; /* Assume 's' is bigger */ - if (slen < tlen) - { - slen= tlen; - s= t; - se= te; - swap= -1; - res= -res; - } - - for ( ; s < se; s+= s_res) - { - if ((s_res= mb_wc(cs, &s_wc, s, se)) < 0) - { - DBUG_ASSERT(0); - return 0; - } - if (s_wc != ' ') - return (s_wc < ' ') ? -swap : swap; - } - } - return res; -} - - static uint my_ismbchar_utf16(CHARSET_INFO *cs, const char *b, const char *e) { @@ -1623,111 +1510,6 @@ my_wildcmp_utf16_bin(CHARSET_INFO *cs, } -static int -my_strnncoll_utf16_bin(CHARSET_INFO *cs, - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool t_is_prefix) -{ - int s_res,t_res; - my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc); - my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; - const uchar *se=s+slen; - const uchar *te=t+tlen; - - while ( s < se && t < te ) - { - s_res= mb_wc(cs, &s_wc, s, se); - t_res= mb_wc(cs, &t_wc, t, te); - - if (s_res <= 0 || t_res <= 0) - { - /* Incorrect string, compare by char value */ - return my_bincmp(s, se, t, te); - } - if (s_wc != t_wc) - { - return s_wc > t_wc ? 1 : -1; - } - - s+= s_res; - t+= t_res; - } - return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t))); -} - - -static int -my_strnncollsp_utf16_bin(CHARSET_INFO *cs, - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference) -{ - int res; - my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc); - my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; - const uchar *se= s + slen, *te= t + tlen; - - DBUG_ASSERT((slen % 2) == 0); - DBUG_ASSERT((tlen % 2) == 0); - -#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE - diff_if_only_endspace_difference= FALSE; -#endif - - while (s < se && t < te) - { - int s_res= mb_wc(cs, &s_wc, s, se); - int t_res= mb_wc(cs, &t_wc, t, te); - - if (s_res <= 0 || t_res <= 0) - { - /* Incorrect string, compare bytewise */ - return my_bincmp(s, se, t, te); - } - - if (s_wc != t_wc) - { - return s_wc > t_wc ? 1 : -1; - } - - s+= s_res; - t+= t_res; - } - - slen= (size_t) (se - s); - tlen= (size_t) (te - t); - res= 0; - - if (slen != tlen) - { - int s_res, swap= 1; - if (diff_if_only_endspace_difference) - res= 1; /* Assume 's' is bigger */ - if (slen < tlen) - { - slen= tlen; - s= t; - se= te; - swap= -1; - res= -res; - } - - for ( ; s < se; s+= s_res) - { - if ((s_res= mb_wc(cs, &s_wc, s, se)) < 0) - { - DBUG_ASSERT(0); - return 0; - } - if (s_wc != ' ') - return (s_wc < ' ') ? -swap : swap; - } - } - return res; -} - - static void my_hash_sort_utf16_bin(CHARSET_INFO *cs, const uchar *pos, size_t len, ulong *nr1, ulong *nr2) @@ -1747,8 +1529,8 @@ my_hash_sort_utf16_bin(CHARSET_INFO *cs, static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler = { NULL, /* init */ - my_strnncoll_utf16, - my_strnncollsp_utf16, + my_strnncoll_utf16_general_ci, + my_strnncollsp_utf16_general_ci, my_strnxfrm_unicode, my_strnxfrmlen_unicode, my_like_range_generic, @@ -1808,6 +1590,7 @@ MY_CHARSET_HANDLER my_charset_utf16_handler= my_charlen_utf16, my_well_formed_char_length_utf16, my_copy_fix_mb2_or_mb4, + my_uni_utf16, }; @@ -1877,6 +1660,24 @@ struct charset_info_st my_charset_utf16_bin= }; +#define IS_MB2_CHAR(b0,b1) (!MY_UTF16_SURROGATE_HEAD(b1)) +#define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b1) && MY_UTF16_LOW_HEAD(b3)) + +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_general_ci +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0) +#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER +#include "strcoll.ic" + +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_bin +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b1, b0)) +#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b1, b0, b3, b2)) +#include "strcoll.ic" + +#undef IS_MB2_CHAR +#undef IS_MB4_CHAR + static int my_utf16le_uni(CHARSET_INFO *cs __attribute__((unused)), my_wc_t *pwc, const uchar *s, const uchar *e) @@ -1948,6 +1749,38 @@ my_lengthsp_utf16le(CHARSET_INFO *cs __attribute__((unused)), } +static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler = +{ + NULL, /* init */ + my_strnncoll_utf16le_general_ci, + my_strnncollsp_utf16le_general_ci, + my_strnxfrm_unicode, + my_strnxfrmlen_unicode, + my_like_range_generic, + my_wildcmp_utf16_ci, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf16, + my_propagate_simple +}; + + +static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler = +{ + NULL, /* init */ + my_strnncoll_utf16le_bin, + my_strnncollsp_utf16le_bin, + my_strnxfrm_unicode_full_bin, + my_strnxfrmlen_unicode_full_bin, + my_like_range_generic, + my_wildcmp_utf16_bin, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf16_bin, + my_propagate_simple +}; + + static MY_CHARSET_HANDLER my_charset_utf16le_handler= { NULL, /* init */ @@ -1980,6 +1813,7 @@ static MY_CHARSET_HANDLER my_charset_utf16le_handler= my_charlen_utf16, my_well_formed_char_length_utf16, my_copy_fix_mb2_or_mb4, + my_uni_utf16le, }; @@ -2012,7 +1846,7 @@ struct charset_info_st my_charset_utf16le_general_ci= 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ &my_charset_utf16le_handler, - &my_collation_utf16_general_ci_handler + &my_collation_utf16le_general_ci_handler }; @@ -2045,7 +1879,7 @@ struct charset_info_st my_charset_utf16le_bin= 0, /* escape_with_backslash_is_dangerous */ 1, /* levels_for_order */ &my_charset_utf16le_handler, - &my_collation_utf16_bin_handler + &my_collation_utf16le_bin_handler }; @@ -2060,6 +1894,34 @@ struct charset_info_st my_charset_utf16le_bin= */ #define IS_UTF32_MBHEAD4(b0,b1) (!(b0) && ((uchar) (b1) <= 0x10)) +#define IS_MB4_CHAR(b0,b1,b2,b3) (IS_UTF32_MBHEAD4(b0,b1)) + +#define MY_UTF32_WC4(b0,b1,b2,b3) ((b0 << 24) + (b1 << 16) + (b2 << 8) + (b3)) + +static inline int my_weight_utf32_general_ci(uchar b0, uchar b1, + uchar b2, uchar b3) +{ + my_wc_t wc= MY_UTF32_WC4(b0, b1, b2, b3); + if (wc <= 0xFFFF) + { + MY_UNICASE_CHARACTER *page= my_unicase_default.page[wc >> 8]; + return (int) (page ? page[wc & 0xFF].sort : wc); + } + return MY_CS_REPLACEMENT_CHARACTER; +} +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_general_ci +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB4(b0,b1,b2,b3) my_weight_utf32_general_ci(b0, b1, b2, b3) +#include "strcoll.ic" + +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_bin +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF32_WC4(b0, b1, b2, b3)) +#include "strcoll.ic" + +#undef IS_MB2_CHAR +#undef IS_MB4_CHAR + static int my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)), @@ -2067,7 +1929,7 @@ my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)), { if (s + 4 > e) return MY_CS_TOOSMALL4; - *pwc= (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + (s[3]); + *pwc= MY_UTF32_WC4(s[0], s[1], s[2], s[3]); return *pwc > 0x10FFFF ? MY_CS_ILSEQ : 4; } @@ -2197,144 +2059,6 @@ my_casedn_utf32(CHARSET_INFO *cs, char *src, size_t srclen, } -static int -my_strnncoll_utf32(CHARSET_INFO *cs, - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool t_is_prefix) -{ - my_wc_t UNINIT_VAR(s_wc),UNINIT_VAR(t_wc); - const uchar *se= s + slen; - const uchar *te= t + tlen; - MY_UNICASE_INFO *uni_plane= cs->caseinfo; - - while (s < se && t < te) - { - int s_res= my_utf32_uni(cs, &s_wc, s, se); - int t_res= my_utf32_uni(cs, &t_wc, t, te); - - if ( s_res <= 0 || t_res <= 0) - { - /* Incorrect string, compare by char value */ - return my_bincmp(s, se, t, te); - } - - my_tosort_utf32(uni_plane, &s_wc); - my_tosort_utf32(uni_plane, &t_wc); - - if (s_wc != t_wc) - { - return s_wc > t_wc ? 1 : -1; - } - - s+= s_res; - t+= t_res; - } - return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t))); -} - - -/** - Compare strings, discarding end space - - If one string is shorter as the other, then we space extend the other - so that the strings have equal length. - - This will ensure that the following things hold: - - "a" == "a " - "a\0" < "a" - "a\0" < "a " - - @param cs Character set pinter. - @param a First string to compare. - @param a_length Length of 'a'. - @param b Second string to compare. - @param b_length Length of 'b'. - - IMPLEMENTATION - - @return Comparison result. - @retval Negative number, if a less than b. - @retval 0, if a is equal to b - @retval Positive number, if a > b -*/ - - -static int -my_strnncollsp_utf32(CHARSET_INFO *cs, - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference) -{ - int res; - my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc); - const uchar *se= s + slen, *te= t + tlen; - MY_UNICASE_INFO *uni_plane= cs->caseinfo; - - DBUG_ASSERT((slen % 4) == 0); - DBUG_ASSERT((tlen % 4) == 0); - -#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE - diff_if_only_endspace_difference= FALSE; -#endif - - while ( s < se && t < te ) - { - int s_res= my_utf32_uni(cs, &s_wc, s, se); - int t_res= my_utf32_uni(cs, &t_wc, t, te); - - if ( s_res <= 0 || t_res <= 0 ) - { - /* Incorrect string, compare bytewise */ - return my_bincmp(s, se, t, te); - } - - my_tosort_utf32(uni_plane, &s_wc); - my_tosort_utf32(uni_plane, &t_wc); - - if ( s_wc != t_wc ) - { - return s_wc > t_wc ? 1 : -1; - } - - s+= s_res; - t+= t_res; - } - - slen= (size_t) (se - s); - tlen= (size_t) (te - t); - res= 0; - - if (slen != tlen) - { - int s_res, swap= 1; - if (diff_if_only_endspace_difference) - res= 1; /* Assume 's' is bigger */ - if (slen < tlen) - { - slen= tlen; - s= t; - se= te; - swap= -1; - res= -res; - } - - for ( ; s < se; s+= s_res) - { - if ((s_res= my_utf32_uni(cs, &s_wc, s, se)) < 0) - { - DBUG_ASSERT(0); - return 0; - } - if (s_wc != ' ') - return (s_wc < ' ') ? -swap : swap; - } - } - return res; -} - - static uint my_ismbchar_utf32(CHARSET_INFO *cs __attribute__((unused)), const char *b, @@ -2746,97 +2470,6 @@ my_wildcmp_utf32_bin(CHARSET_INFO *cs, } -static int -my_strnncoll_utf32_bin(CHARSET_INFO *cs, - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool t_is_prefix) -{ - my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc); - const uchar *se= s + slen; - const uchar *te= t + tlen; - - while (s < se && t < te) - { - int s_res= my_utf32_uni(cs, &s_wc, s, se); - int t_res= my_utf32_uni(cs, &t_wc, t, te); - - if (s_res <= 0 || t_res <= 0) - { - /* Incorrect string, compare by char value */ - return my_bincmp(s, se, t, te); - } - if (s_wc != t_wc) - { - return s_wc > t_wc ? 1 : -1; - } - - s+= s_res; - t+= t_res; - } - return (int) (t_is_prefix ? (t-te) : ((se - s) - (te - t))); -} - - -static inline my_wc_t -my_utf32_get(const uchar *s) -{ - return - ((my_wc_t) s[0] << 24) + - ((my_wc_t) s[1] << 16) + - ((my_wc_t) s[2] << 8) + - s[3]; -} - - -static int -my_strnncollsp_utf32_bin(CHARSET_INFO *cs __attribute__((unused)), - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference - __attribute__((unused))) -{ - const uchar *se, *te; - size_t minlen; - - DBUG_ASSERT((slen % 4) == 0); - DBUG_ASSERT((tlen % 4) == 0); - - se= s + slen; - te= t + tlen; - - for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 4) - { - my_wc_t s_wc= my_utf32_get(s); - my_wc_t t_wc= my_utf32_get(t); - if (s_wc != t_wc) - return s_wc > t_wc ? 1 : -1; - - s+= 4; - t+= 4; - } - - if (slen != tlen) - { - int swap= 1; - if (slen < tlen) - { - s= t; - se= te; - swap= -1; - } - - for ( ; s < se ; s+= 4) - { - my_wc_t s_wc= my_utf32_get(s); - if (s_wc != ' ') - return (s_wc < ' ') ? -swap : swap; - } - } - return 0; -} - - static size_t my_scan_utf32(CHARSET_INFO *cs, const char *str, const char *end, int sequence_type) @@ -2864,8 +2497,8 @@ my_scan_utf32(CHARSET_INFO *cs, static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler = { NULL, /* init */ - my_strnncoll_utf32, - my_strnncollsp_utf32, + my_strnncoll_utf32_general_ci, + my_strnncollsp_utf32_general_ci, my_strnxfrm_unicode, my_strnxfrmlen_unicode, my_like_range_generic, @@ -2925,6 +2558,7 @@ MY_CHARSET_HANDLER my_charset_utf32_handler= my_charlen_utf32, my_well_formed_char_length_utf32, my_copy_fix_mb2_or_mb4, + my_uni_utf32, }; @@ -3058,6 +2692,31 @@ static const uchar to_upper_ucs2[] = { }; +/* Definitions for strcoll.ic */ +#define IS_MB2_CHAR(x,y) (1) +#define UCS2_CODE(b0,b1) (((uchar) b0) << 8 | ((uchar) b1)) + + +static inline int my_weight_mb2_ucs2_general_ci(uchar b0, uchar b1) +{ + my_wc_t wc= UCS2_CODE(b0, b1); + MY_UNICASE_CHARACTER *page= my_unicase_default.page[wc >> 8]; + return (int) (page ? page[wc & 0xFF].sort : wc); +} + + +#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_general_ci +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) my_weight_mb2_ucs2_general_ci(b0,b1) +#include "strcoll.ic" + + +#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_bin +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) UCS2_CODE(b0,b1) +#include "strcoll.ic" + + static int my_charlen_ucs2(CHARSET_INFO *cs __attribute__((unused)), const uchar *s, const uchar *e) @@ -3208,120 +2867,6 @@ my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)), } -static int my_strnncoll_ucs2(CHARSET_INFO *cs, - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool t_is_prefix) -{ - int s_res,t_res; - my_wc_t UNINIT_VAR(s_wc),UNINIT_VAR(t_wc); - const uchar *se=s+slen; - const uchar *te=t+tlen; - MY_UNICASE_INFO *uni_plane= cs->caseinfo; - - while ( s < se && t < te ) - { - s_res=my_ucs2_uni(cs,&s_wc, s, se); - t_res=my_ucs2_uni(cs,&t_wc, t, te); - - if ( s_res <= 0 || t_res <= 0 ) - { - /* Incorrect string, compare by char value */ - return ((int)s[0]-(int)t[0]); - } - - my_tosort_ucs2(uni_plane, &s_wc); - my_tosort_ucs2(uni_plane, &t_wc); - - if ( s_wc != t_wc ) - { - return s_wc > t_wc ? 1 : -1; - } - - s+=s_res; - t+=t_res; - } - return (int) (t_is_prefix ? t-te : ((se-s) - (te-t))); -} - -/* - Compare strings, discarding end space - - SYNOPSIS - my_strnncollsp_ucs2() - cs character set handler - a First string to compare - a_length Length of 'a' - b Second string to compare - b_length Length of 'b' - - IMPLEMENTATION - If one string is shorter as the other, then we space extend the other - so that the strings have equal length. - - This will ensure that the following things hold: - - "a" == "a " - "a\0" < "a" - "a\0" < "a " - - RETURN - < 0 a < b - = 0 a == b - > 0 a > b -*/ - -static int my_strnncollsp_ucs2(CHARSET_INFO *cs __attribute__((unused)), - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference - __attribute__((unused))) -{ - const uchar *se, *te; - size_t minlen; - MY_UNICASE_INFO *uni_plane= cs->caseinfo; - - /* extra safety to make sure the lengths are even numbers */ - slen&= ~1; - tlen&= ~1; - - se= s + slen; - te= t + tlen; - - for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 2) - { - int s_wc = uni_plane->page[s[0]] ? (int) uni_plane->page[s[0]][s[1]].sort : - (((int) s[0]) << 8) + (int) s[1]; - - int t_wc = uni_plane->page[t[0]] ? (int) uni_plane->page[t[0]][t[1]].sort : - (((int) t[0]) << 8) + (int) t[1]; - if ( s_wc != t_wc ) - return s_wc > t_wc ? 1 : -1; - - s+= 2; - t+= 2; - } - - if (slen != tlen) - { - int swap= 1; - if (slen < tlen) - { - s= t; - se= te; - swap= -1; - } - - for ( ; s < se ; s+= 2) - { - if (s[0] || s[1] != ' ') - return (s[0] == 0 && s[1] < ' ') ? -swap : swap; - } - } - return 0; -} - - static uint my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)), const char *b, const char *e) @@ -3418,85 +2963,6 @@ int my_wildcmp_ucs2_bin(CHARSET_INFO *cs, static -int my_strnncoll_ucs2_bin(CHARSET_INFO *cs, - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool t_is_prefix) -{ - int s_res,t_res; - my_wc_t UNINIT_VAR(s_wc),UNINIT_VAR(t_wc); - const uchar *se=s+slen; - const uchar *te=t+tlen; - - while ( s < se && t < te ) - { - s_res=my_ucs2_uni(cs,&s_wc, s, se); - t_res=my_ucs2_uni(cs,&t_wc, t, te); - - if ( s_res <= 0 || t_res <= 0 ) - { - /* Incorrect string, compare by char value */ - return ((int)s[0]-(int)t[0]); - } - if ( s_wc != t_wc ) - { - return s_wc > t_wc ? 1 : -1; - } - - s+=s_res; - t+=t_res; - } - return (int) (t_is_prefix ? t-te : ((se-s) - (te-t))); -} - -static int my_strnncollsp_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), - const uchar *s, size_t slen, - const uchar *t, size_t tlen, - my_bool diff_if_only_endspace_difference - __attribute__((unused))) -{ - const uchar *se, *te; - size_t minlen; - - /* extra safety to make sure the lengths are even numbers */ - slen= (slen >> 1) << 1; - tlen= (tlen >> 1) << 1; - - se= s + slen; - te= t + tlen; - - for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 2) - { - int s_wc= s[0] * 256 + s[1]; - int t_wc= t[0] * 256 + t[1]; - if ( s_wc != t_wc ) - return s_wc > t_wc ? 1 : -1; - - s+= 2; - t+= 2; - } - - if (slen != tlen) - { - int swap= 1; - if (slen < tlen) - { - s= t; - se= te; - swap= -1; - } - - for ( ; s < se ; s+= 2) - { - if (s[0] || s[1] != ' ') - return (s[0] == 0 && s[1] < ' ') ? -swap : swap; - } - } - return 0; -} - - -static void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), const uchar *key, size_t len,ulong *nr1, ulong *nr2) { @@ -3518,8 +2984,8 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler = { NULL, /* init */ - my_strnncoll_ucs2, - my_strnncollsp_ucs2, + my_strnncoll_ucs2_general_ci, + my_strnncollsp_ucs2_general_ci, my_strnxfrm_unicode, my_strnxfrmlen_unicode, my_like_range_generic, @@ -3579,6 +3045,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler= my_charlen_ucs2, my_well_formed_char_length_ucs2, my_copy_fix_mb2_or_mb4, + my_uni_ucs2, }; |