summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
Diffstat (limited to 'strings')
-rw-r--r--strings/conf_to_src.c8
-rw-r--r--strings/ctype-big5.c94
-rw-r--r--strings/ctype-bin.c66
-rw-r--r--strings/ctype-cp932.c7
-rw-r--r--strings/ctype-czech.c4
-rw-r--r--strings/ctype-euc_kr.c9
-rw-r--r--strings/ctype-eucjpms.c12
-rw-r--r--strings/ctype-gb2312.c7
-rw-r--r--strings/ctype-gbk.c42
-rw-r--r--strings/ctype-latin1.c39
-rw-r--r--strings/ctype-mb.c93
-rw-r--r--strings/ctype-mb.ic6
-rw-r--r--strings/ctype-simple.c13
-rw-r--r--strings/ctype-sjis.c7
-rw-r--r--strings/ctype-tis620.c41
-rw-r--r--strings/ctype-uca.c28
-rw-r--r--strings/ctype-ucs2.c32
-rw-r--r--strings/ctype-ujis.c12
-rw-r--r--strings/ctype-utf8.c80
-rw-r--r--strings/ctype-win1250ch.c4
-rw-r--r--strings/str2int.c2
-rw-r--r--strings/strcoll.ic47
22 files changed, 131 insertions, 522 deletions
diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c
index 28d2fd1515e..5b9793f388d 100644
--- a/strings/conf_to_src.c
+++ b/strings/conf_to_src.c
@@ -256,7 +256,7 @@ void dispcset(FILE *f,CHARSET_INFO *cs)
fprintf(f," 255, /* max_sort_char */\n");
fprintf(f," ' ', /* pad_char */\n");
fprintf(f," 0, /* escape_with_backslash_is_dangerous */\n");
-
+ fprintf(f," 1, /* levels_for_order */\n");
fprintf(f," &my_charset_8bit_handler,\n");
if (cs->state & MY_CS_BINSORT)
fprintf(f," &my_collation_8bit_bin_handler,\n");
@@ -270,9 +270,9 @@ static void
fprint_copyright(FILE *file)
{
fprintf(file,
-"/* Copyright 2000-2008 MySQL AB, 2008 Sun Microsystems Inc.\n"
+"/* Copyright 2000-2008 MySQL AB, 2008 Sun Microsystems, Inc.\n"
" Copyright 2010-2011 Monty Program Ab\n"
-" Copyright (c) 2003, 2011, Oracle and/or its affiliates\n"
+" Copyright (c) 2000, 2011, Oracle and/or its affiliates.\n"
"\n"
" This program is free software; you can redistribute it and/or modify\n"
" it under the terms of the GNU General Public License as published by\n"
@@ -333,7 +333,7 @@ main(int argc, char **argv __attribute__((unused)))
fprintf(f, " ./conf_to_src ../sql/share/charsets/ > FILE\n");
fprintf(f, "*/\n\n");
fprint_copyright(f);
- fprintf(f,"#include <my_global.h>\n");
+ fprintf(f,"#include \"strings_def.h\"\n");
fprintf(f,"#include <m_ctype.h>\n\n");
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index d6a9695afbf..962931913a2 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -45,8 +45,6 @@
#define isbig5code(c,d) (isbig5head(c) && isbig5tail(d))
#define big5code(c,d) (((uchar)(c) <<8) | (uchar)(d))
-#define big5head(e) ((uchar)(e>>8))
-#define big5tail(e) ((uchar)(e&0xff))
#define MY_FUNCTION_NAME(x) my_ ## x ## _big5
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
@@ -850,93 +848,6 @@ static uint16 big5strokexfrm(uint16 i)
}
-static size_t
-my_strnxfrm_big5(CHARSET_INFO *cs,
- uchar *dst, size_t dstlen, uint nweights,
- const uchar *src, size_t srclen, uint flags)
-{
- uchar *d0= dst;
- uchar *de= dst + dstlen;
- const uchar *se= src + srclen;
- const uchar *sort_order= cs->sort_order;
-
- for (; dst < de && src < se && nweights; nweights--)
- {
- if (cs->cset->ismbchar(cs, (const char*) src, (const char*) se))
- {
- /*
- Note, it is safe not to check (src < se)
- in the code below, because ismbchar() would
- not return TRUE if src was too short
- */
- uint16 e= big5strokexfrm((uint16) big5code(*src, *(src + 1)));
- *dst++= big5head(e);
- if (dst < de)
- *dst++= big5tail(e);
- src+= 2;
- }
- else
- *dst++= sort_order ? sort_order[*src++] : *src++;
- }
- return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
-}
-
-#if 0
-static int my_strcoll_big5(const uchar *s1, const uchar *s2)
-{
-
- while (*s1 && *s2)
- {
- if (*(s1+1) && *(s2+1) && isbig5code(*s1,*(s1+1)) && isbig5code(*s2, *(s2+1)))
- {
- if (*s1 != *s2 || *(s1+1) != *(s2+1))
- return ((int) big5code(*s1,*(s1+1)) -
- (int) big5code(*s2,*(s2+1)));
- s1 +=2;
- s2 +=2;
- } else if (sort_order_big5[(uchar) *s1++] != sort_order_big5[(uchar) *s2++])
- return ((int) sort_order_big5[(uchar) s1[-1]] -
- (int) sort_order_big5[(uchar) s2[-1]]);
- }
- return 0;
-}
-
-static int my_strxfrm_big5(uchar *dest, const uchar *src, int len)
-{
- uint16 e;
- uchar *d = dest;
-
- if (len < 1) return 0;
- if (!*src)
- {
- *d = '\0';
- return 0;
- }
- while (*src && (len > 1))
- {
- if (*(src+1) && isbig5code(*src, *(src+1)))
- {
- e = big5strokexfrm((uint16) big5code(*src, *(src+1)));
- *d++ = big5head(e);
- *d++ = big5tail(e);
- src +=2;
- len--;
- } else
- *d++ = sort_order_big5[(uchar) *src++];
- }
- *d = '\0';
- return (int) (d-dest);
-}
-#endif
-
-
-static uint ismbchar_big5(CHARSET_INFO *cs __attribute__((unused)),
- const char* p, const char *e)
-{
- return (isbig5head(*(p)) && (e)-(p)>1 && isbig5tail(*((p)+1))? 2: 0);
-}
-
-
static uint mbcharlen_big5(CHARSET_INFO *cs __attribute__((unused)), uint c)
{
return (isbig5head(c)? 2 : 1);
@@ -6774,6 +6685,8 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
#define MY_FUNCTION_NAME(x) my_ ## x ## _big5_chinese_ci
#define WEIGHT_MB1(x) (sort_order_big5[(uchar) (x)])
#define WEIGHT_MB2(x,y) (big5code(x, y))
+#define WEIGHT_MB2_FRM(x,y) (big5strokexfrm((uint16) WEIGHT_MB2(x, y)))
+#define DEFINE_STRNXFRM
#include "strcoll.ic"
@@ -6788,7 +6701,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_ci=
NULL, /* init */
my_strnncoll_big5_chinese_ci,
my_strnncollsp_big5_chinese_ci,
- my_strnxfrm_big5,
+ my_strnxfrm_big5_chinese_ci,
my_strnxfrmlen_simple,
my_like_range_mb,
my_wildcmp_mb,
@@ -6818,7 +6731,6 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_bin=
static MY_CHARSET_HANDLER my_charset_big5_handler=
{
NULL, /* init */
- ismbchar_big5,
mbcharlen_big5,
my_numchars_mb,
my_charpos_mb,
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index 0be6ae95577..8331de34ee4 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -119,9 +119,7 @@ size_t my_lengthsp_binary(CHARSET_INFO *cs __attribute__((unused)),
static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool diff_if_only_endspace_difference
- __attribute__((unused)))
+ const uchar *t, size_t tlen)
{
return my_strnncoll_binary(cs,s,slen,t,tlen,0);
}
@@ -139,6 +137,27 @@ static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
/*
+ Compare a string to an array of spaces, for PAD SPACE behaviour.
+ @param str - the string
+ @param length - the length of the string
+ @return <0 - if a byte less than SPACE was found
+ @return >0 - if a byte greater than SPACE was found
+ @return 0 - if the string entirely consists of SPACE characters
+*/
+int my_strnncollsp_padspace_bin(const uchar *str, size_t length)
+{
+ for ( ; length ; str++, length--)
+ {
+ if (*str < ' ')
+ return -1;
+ else if (*str > ' ')
+ return 1;
+ }
+ return 0;
+}
+
+
+/*
Compare two strings. Result is sign(first_argument - second_argument)
SYNOPSIS
@@ -148,9 +167,6 @@ static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
slen Length of 's'
t String to compare
tlen Length of 't'
- diff_if_only_endspace_difference
- Set to 1 if the strings should be regarded as different
- if they only difference in end space
NOTE
This function is used for character strings with binary collations.
@@ -165,16 +181,10 @@ static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *a, size_t a_length,
- const uchar *b, size_t b_length,
- my_bool diff_if_only_endspace_difference)
+ const uchar *b, size_t b_length)
{
const uchar *end;
size_t length;
- int res;
-
-#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
- diff_if_only_endspace_difference= 0;
-#endif
end= a + (length= MY_MIN(a_length, b_length));
while (a < end)
@@ -182,31 +192,10 @@ static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
if (*a++ != *b++)
return ((int) a[-1] - (int) b[-1]);
}
- res= 0;
- if (a_length != b_length)
- {
- int swap= 1;
- /*
- Check the next not space character of the longer key. If it's < ' ',
- then it's smaller than the other key.
- */
- if (diff_if_only_endspace_difference)
- res= 1; /* Assume 'a' is bigger */
- if (a_length < b_length)
- {
- /* put shorter key in s */
- a_length= b_length;
- a= b;
- swap= -1; /* swap sign of result */
- res= -res;
- }
- for (end= a + a_length-length; a < end ; a++)
- {
- if (*a != ' ')
- return (*a < ' ') ? -swap : swap;
- }
- }
- return res;
+ return a_length == b_length ? 0 :
+ a_length < b_length ?
+ -my_strnncollsp_padspace_bin(b, b_length - length) :
+ my_strnncollsp_padspace_bin(a, a_length - length);
}
@@ -521,7 +510,6 @@ static MY_COLLATION_HANDLER my_collation_binary_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- NULL, /* ismbchar */
my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c
index 9bf206f1de7..2163662269d 100644
--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -191,12 +191,6 @@ static const uchar sort_order_cp932[]=
#include "ctype-mb.ic"
-static uint ismbchar_cp932(CHARSET_INFO *cs __attribute__((unused)),
- const char* p, const char *e)
-{
- return (iscp932head((uchar) *p) && (e-p)>1 && iscp932tail((uchar)p[1]) ? 2: 0);
-}
-
static uint mbcharlen_cp932(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
return (iscp932head((uchar) c) ? 2 : 1);
@@ -34693,7 +34687,6 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_bin=
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- ismbchar_cp932,
mbcharlen_cp932,
my_numchars_mb,
my_charpos_mb,
diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c
index e3abebad91d..a7efd20b259 100644
--- a/strings/ctype-czech.c
+++ b/strings/ctype-czech.c
@@ -276,9 +276,7 @@ static int my_strnncoll_czech(CHARSET_INFO *cs __attribute__((unused)),
static
int my_strnncollsp_czech(CHARSET_INFO * cs,
const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool diff_if_only_endspace_difference
- __attribute__((unused)))
+ const uchar *t, size_t tlen)
{
for ( ; slen && s[slen-1] == ' ' ; slen--);
for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index 1f13ab66284..19ed586ea49 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -210,14 +210,6 @@ static const uchar sort_order_euc_kr[]=
#include "ctype-mb.ic"
-static uint ismbchar_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
- const char* p, const char *e)
-{
- return ((*(uchar*)(p)<0x80)? 0:\
- iseuc_kr_head(*(p)) && (e)-(p)>1 && iseuc_kr_tail(*((p)+1))? 2:\
- 0);
-}
-
static uint mbcharlen_euc_kr(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
return (iseuc_kr_head(c) ? 2 : 1);
@@ -9987,7 +9979,6 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_bin=
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- ismbchar_euc_kr,
mbcharlen_euc_kr,
my_numchars_mb,
my_charpos_mb,
diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c
index 82c4bb5a4e8..469d3a5be6c 100644
--- a/strings/ctype-eucjpms.c
+++ b/strings/ctype-eucjpms.c
@@ -199,6 +199,7 @@ static const uchar sort_order_eucjpms[]=
#define IS_MB2_KATA(x,y) (iseucjpms_ss2(x) && iskata(y))
#define IS_MB2_CHAR(x,y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
#define IS_MB3_CHAR(x,y,z) (iseucjpms_ss3(x) && IS_MB2_JIS(y,z))
+#define IS_MB_PREFIX2(x,y) (iseucjpms_ss3(x) && iseucjpms(y))
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
@@ -220,16 +221,6 @@ static const uchar sort_order_eucjpms[]=
#include "strcoll.ic"
-static uint ismbchar_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
- const char* p, const char *e)
-{
- return ((*(uchar*)(p)<0x80)? 0:\
- iseucjpms(*(p)) && (e)-(p)>1 && iseucjpms(*((p)+1))? 2:\
- iseucjpms_ss2(*(p)) && (e)-(p)>1 && iskata(*((p)+1))? 2:\
- iseucjpms_ss3(*(p)) && (e)-(p)>2 && iseucjpms(*((p)+1)) && iseucjpms(*((p)+2))? 3:\
- 0);
-}
-
static uint mbcharlen_eucjpms(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
return (iseucjpms(c)? 2: iseucjpms_ss2(c)? 2: iseucjpms_ss3(c)? 3: 1);
@@ -67520,7 +67511,6 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_bin_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- ismbchar_eucjpms,
mbcharlen_eucjpms,
my_numchars_mb,
my_charpos_mb,
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index b0e275fe93d..a77237c1791 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -173,12 +173,6 @@ static const uchar sort_order_gb2312[]=
#include "ctype-mb.ic"
-static uint ismbchar_gb2312(CHARSET_INFO *cs __attribute__((unused)),
- const char* p, const char *e)
-{
- return (isgb2312head(*(p)) && (e)-(p)>1 && isgb2312tail(*((p)+1))? 2: 0);
-}
-
static uint mbcharlen_gb2312(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
return (isgb2312head(c)? 2 : 1);
@@ -6391,7 +6385,6 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_bin=
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- ismbchar_gb2312,
mbcharlen_gb2312,
my_numchars_mb,
my_charpos_mb,
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index 37b003f1899..e4e015a59d2 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -3451,44 +3451,6 @@ static uint16 gbksortorder(uint16 i)
}
-static size_t
-my_strnxfrm_gbk(CHARSET_INFO *cs,
- uchar *dst, size_t dstlen, uint nweights,
- const uchar *src, size_t srclen, uint flags)
-{
- uchar *d0= dst;
- uchar *de= dst + dstlen;
- const uchar *se= src + srclen;
- const uchar *sort_order= cs->sort_order;
-
- for (; dst < de && src < se && nweights; nweights--)
- {
- if (cs->cset->ismbchar(cs, (const char*) src, (const char*) se))
- {
- /*
- Note, it is safe not to check (src < se)
- in the code below, because ismbchar() would
- not return TRUE if src was too short
- */
- uint16 e= gbksortorder((uint16) gbkcode(*src, *(src + 1)));
- *dst++= gbkhead(e);
- if (dst < de)
- *dst++= gbktail(e);
- src+= 2;
- }
- else
- *dst++= sort_order ? sort_order[*src++] : *src++;
- }
- return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
-}
-
-
-static uint ismbchar_gbk(CHARSET_INFO *cs __attribute__((unused)),
- const char* p, const char *e)
-{
- return (isgbkhead(*(p)) && (e)-(p)>1 && isgbktail(*((p)+1))? 2: 0);
-}
-
static uint mbcharlen_gbk(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
return (isgbkhead(c)? 2 : 1);
@@ -10658,6 +10620,7 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
#define MY_FUNCTION_NAME(x) my_ ## x ## _gbk_chinese_ci
#define WEIGHT_MB1(x) (sort_order_gbk[(uchar) (x)])
#define WEIGHT_MB2(x,y) (gbksortorder(gbkcode(x,y)))
+#define DEFINE_STRNXFRM
#include "strcoll.ic"
@@ -10672,7 +10635,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_ci=
NULL, /* init */
my_strnncoll_gbk_chinese_ci,
my_strnncollsp_gbk_chinese_ci,
- my_strnxfrm_gbk,
+ my_strnxfrm_gbk_chinese_ci,
my_strnxfrmlen_simple,
my_like_range_mb,
my_wildcmp_mb,
@@ -10703,7 +10666,6 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_bin=
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- ismbchar_gbk,
mbcharlen_gbk,
my_numchars_mb,
my_charpos_mb,
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index 26c66d60071..aba63d97abb 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -396,7 +396,6 @@ int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)),
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- NULL,
my_mbcharlen_8bit,
my_numchars_8bit,
my_charpos_8bit,
@@ -598,16 +597,10 @@ static int my_strnncoll_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
static int my_strnncollsp_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
const uchar *a, size_t a_length,
- const uchar *b, size_t b_length,
- my_bool diff_if_only_endspace_difference)
+ const uchar *b, size_t b_length)
{
const uchar *a_end= a + a_length, *b_end= b + b_length;
uchar a_char, a_extend= 0, b_char, b_extend= 0;
- int res;
-
-#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
- diff_if_only_endspace_difference= 0;
-#endif
while ((a < a_end || a_extend) && (b < b_end || b_extend))
{
@@ -640,31 +633,11 @@ static int my_strnncollsp_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
if (b_extend)
return -1;
- res= 0;
- if (a != a_end || b != b_end)
- {
- int swap= 1;
- if (diff_if_only_endspace_difference)
- res= 1; /* Assume 'a' is bigger */
- /*
- Check the next not space character of the longer key. If it's < ' ',
- then it's smaller than the other key.
- */
- if (a == a_end)
- {
- /* put shorter key in a */
- a_end= b_end;
- a= b;
- swap= -1; /* swap sign of result */
- res= -res;
- }
- for ( ; a < a_end ; a++)
- {
- if (*a != ' ')
- return (*a < ' ') ? -swap : swap;
- }
- }
- return res;
+ if (a < a_end)
+ return my_strnncollsp_padspace_bin(a, a_end - a);
+ if (b < b_end)
+ return -my_strnncollsp_padspace_bin(b, b_end - b);
+ return 0;
}
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index eef283d2925..3fa66cb0b2f 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -571,93 +571,6 @@ uint my_instr_mb(CHARSET_INFO *cs,
}
-/* BINARY collations handlers for MB charsets */
-
-int
-my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool t_is_prefix)
-{
- size_t len=MY_MIN(slen,tlen);
- int cmp= memcmp(s,t,len);
- return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen);
-}
-
-
-/*
- Compare two strings.
-
- SYNOPSIS
- my_strnncollsp_mb_bin()
- cs Chararacter set
- s String to compare
- slen Length of 's'
- t String to compare
- tlen Length of 't'
- diff_if_only_endspace_difference
- Set to 1 if the strings should be regarded as different
- if they only difference in end space
-
- NOTE
- This function is used for character strings with binary collations.
- The shorter string is extended with end space to be as long as the longer
- one.
-
- RETURN
- A negative number if s < t
- A positive number if s > t
- 0 if strings are equal
-*/
-
-int
-my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
- const uchar *a, size_t a_length,
- const uchar *b, size_t b_length,
- my_bool diff_if_only_endspace_difference)
-{
- const uchar *end;
- size_t length;
- int res;
-
-#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
- diff_if_only_endspace_difference= 0;
-#endif
-
- end= a + (length= MY_MIN(a_length, b_length));
- while (a < end)
- {
- if (*a++ != *b++)
- return ((int) a[-1] - (int) b[-1]);
- }
- res= 0;
- if (a_length != b_length)
- {
- int swap= 1;
- if (diff_if_only_endspace_difference)
- res= 1; /* Assume 'a' is bigger */
- /*
- Check the next not space character of the longer key. If it's < ' ',
- then it's smaller than the other key.
- */
- if (a_length < b_length)
- {
- /* put shorter key in s */
- a_length= b_length;
- a= b;
- swap= -1; /* swap sign of result */
- res= -res;
- }
- for (end= a + a_length-length; a < end ; a++)
- {
- if (*a != ' ')
- return (*a < ' ') ? -swap : swap;
- }
- }
- return res;
-}
-
-
/*
Copy one non-ascii character.
"dst" must have enough room for the character.
@@ -668,7 +581,7 @@ my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
*/
#define my_strnxfrm_mb_non_ascii_char(cs, dst, src, se) \
{ \
- switch (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) { \
+ switch (my_ismbchar(cs, (const char *) src, (const char *) se)) { \
case 4: \
*dst++= *src++; \
/* fall through */ \
@@ -740,8 +653,8 @@ my_strnxfrm_mb(CHARSET_INFO *cs,
for (; src < se && nweights && dst < de; nweights--)
{
int chlen;
- if (*src < 128 ||
- !(chlen= cs->cset->ismbchar(cs, (const char*) src, (const char*) se)))
+ if (*src < 128 || !(chlen= my_ismbchar(cs, (const char *) src,
+ (const char *) se)))
{
/* Single byte character */
*dst++= sort_order ? sort_order[*src++] : *src++;
diff --git a/strings/ctype-mb.ic b/strings/ctype-mb.ic
index 6fc4d6e3db4..2df9c9d5e49 100644
--- a/strings/ctype-mb.ic
+++ b/strings/ctype-mb.ic
@@ -75,7 +75,13 @@ MY_FUNCTION_NAME(charlen)(CHARSET_INFO *cs __attribute__((unused)),
#ifdef IS_MB3_CHAR
if (b + 3 > e)
+ {
+#ifdef IS_MB_PREFIX2
+ if (!IS_MB_PREFIX2(b[0], b[1]))
+ return MY_CS_ILSEQ;
+#endif
return MY_CS_TOOSMALLN(3);
+ }
if (IS_MB3_CHAR(b[0], b[1], b[2]))
return 3; /* Three-byte character */
#endif
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index 288f5fdd49d..5e5a345a638 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -128,9 +128,6 @@ int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, size_t slen,
a_length Length of 'a'
b Second string to compare
b_length Length of 'b'
- diff_if_only_endspace_difference
- Set to 1 if the strings should be regarded as different
- if they only difference in end space
IMPLEMENTATION
If one string is shorter as the other, then we space extend the other
@@ -149,17 +146,12 @@ int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, size_t slen,
*/
int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, size_t a_length,
- const uchar *b, size_t b_length,
- my_bool diff_if_only_endspace_difference)
+ const uchar *b, size_t b_length)
{
const uchar *map= cs->sort_order, *end;
size_t length;
int res;
-#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
- diff_if_only_endspace_difference= 0;
-#endif
-
end= a + (length= MY_MIN(a_length, b_length));
while (a < end)
{
@@ -170,8 +162,6 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, size_t a_length,
if (a_length != b_length)
{
int swap= 1;
- if (diff_if_only_endspace_difference)
- res= 1; /* Assume 'a' is bigger */
/*
Check the next not space character of the longer key. If it's < ' ',
then it's smaller than the other key.
@@ -1926,7 +1916,6 @@ my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs,
MY_CHARSET_HANDLER my_charset_8bit_handler=
{
my_cset_init_8bit,
- NULL, /* ismbchar */
my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index 629e1cd8309..ebcea22d242 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -192,12 +192,6 @@ static const uchar sort_order_sjis[]=
#include "ctype-mb.ic"
-static uint ismbchar_sjis(CHARSET_INFO *cs __attribute__((unused)),
- const char* p, const char *e)
-{
- return (issjishead((uchar) *p) && (e-p)>1 && issjistail((uchar)p[1]) ? 2: 0);
-}
-
static uint mbcharlen_sjis(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
return (issjishead((uchar) c) ? 2 : 1);
@@ -34072,7 +34066,6 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_bin=
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- ismbchar_sjis,
mbcharlen_sjis,
my_numchars_mb,
my_charpos_mb,
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index a1ca320835d..711bb21773e 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -543,17 +543,12 @@ int my_strnncoll_tis620(CHARSET_INFO *cs __attribute__((unused)),
static
int my_strnncollsp_tis620(CHARSET_INFO * cs __attribute__((unused)),
const uchar *a0, size_t a_length,
- const uchar *b0, size_t b_length,
- my_bool diff_if_only_endspace_difference)
+ const uchar *b0, size_t b_length)
{
uchar buf[80], *end, *a, *b, *alloced= NULL;
size_t length;
int res= 0;
-#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
- diff_if_only_endspace_difference= 0;
-#endif
-
a= buf;
if ((a_length + b_length +2) > (int) sizeof(buf))
alloced= a= (uchar*) my_str_malloc(a_length+b_length+2);
@@ -575,33 +570,12 @@ int my_strnncollsp_tis620(CHARSET_INFO * cs __attribute__((unused)),
goto ret;
}
}
- if (a_length != b_length)
- {
- int swap= 1;
- if (diff_if_only_endspace_difference)
- res= 1; /* Assume 'a' is bigger */
- /*
- Check the next not space character of the longer key. If it's < ' ',
- then it's smaller than the other key.
- */
- if (a_length < b_length)
- {
- /* put shorter key in s */
- a_length= b_length;
- a= b;
- swap= -1; /* swap sign of result */
- res= -res;
- }
- for (end= a + a_length-length; a < end ; a++)
- {
- if (*a != ' ')
- {
- res= (*a < ' ') ? -swap : swap;
- goto ret;
- }
- }
- }
-
+
+ res= a_length == b_length ? 0 :
+ a_length < b_length ?
+ -my_strnncollsp_padspace_bin(b, b_length - length) :
+ my_strnncollsp_padspace_bin(a, a_length - length);
+
ret:
if (alloced)
@@ -860,7 +834,6 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- NULL, /* ismbchar */
my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index b0728978e71..0b279b620fd 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -20775,9 +20775,6 @@ my_char_weight_addr(const MY_UCA_WEIGHT_LEVEL *level, uint wc)
slen First string length
t Second string
tlen Seconf string length
- diff_if_only_endspace_difference
- Set to 1 if the strings should be regarded as different
- if they only difference in end space
NOTES:
Works exactly the same with my_strnncoll_uca(),
@@ -20815,16 +20812,11 @@ my_char_weight_addr(const MY_UCA_WEIGHT_LEVEL *level, uint wc)
static int my_strnncollsp_uca(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler,
const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool diff_if_only_endspace_difference)
+ const uchar *t, size_t tlen)
{
my_uca_scanner sscanner, tscanner;
int s_res, t_res;
-#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
- diff_if_only_endspace_difference= 0;
-#endif
-
scanner_handler->init(&sscanner, cs, &cs->uca->level[0], s, slen);
scanner_handler->init(&tscanner, cs, &cs->uca->level[0], t, tlen);
@@ -20846,7 +20838,7 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs,
return (s_res - t_res);
s_res= scanner_handler->next(&sscanner);
} while (s_res > 0);
- return diff_if_only_endspace_difference ? 1 : 0;
+ return 0;
}
if (s_res < 0 && t_res > 0)
@@ -20861,7 +20853,7 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs,
return (s_res - t_res);
t_res= scanner_handler->next(&tscanner);
} while (t_res > 0);
- return diff_if_only_endspace_difference ? -1 : 0;
+ return 0;
}
return ( s_res - t_res );
@@ -22845,12 +22837,9 @@ static int my_strnncoll_any_uca(CHARSET_INFO *cs,
static int my_strnncollsp_any_uca(CHARSET_INFO *cs,
const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool diff_if_only_endspace_difference)
+ const uchar *t, size_t tlen)
{
- return my_strnncollsp_uca(cs, &my_any_uca_scanner_handler,
- s, slen, t, tlen,
- diff_if_only_endspace_difference);
+ return my_strnncollsp_uca(cs, &my_any_uca_scanner_handler, s, slen, t, tlen);
}
static void my_hash_sort_any_uca(CHARSET_INFO *cs,
@@ -22890,12 +22879,9 @@ static int my_strnncoll_ucs2_uca(CHARSET_INFO *cs,
static int my_strnncollsp_ucs2_uca(CHARSET_INFO *cs,
const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool diff_if_only_endspace_difference)
+ const uchar *t, size_t tlen)
{
- return my_strnncollsp_uca(cs, &my_any_uca_scanner_handler,
- s, slen, t, tlen,
- diff_if_only_endspace_difference);
+ return my_strnncollsp_uca(cs, &my_any_uca_scanner_handler, s, slen, t, tlen);
}
static void my_hash_sort_ucs2_uca(CHARSET_INFO *cs,
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index cae85f38c12..74e474cc28c 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1413,15 +1413,6 @@ my_casedn_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
}
-static uint
-my_ismbchar_utf16(CHARSET_INFO *cs, const char *b, const char *e)
-{
- my_wc_t wc;
- int res= cs->cset->mb_wc(cs, &wc, (const uchar *) b, (const uchar *) e);
- return (uint) (res > 0 ? res : 0);
-}
-
-
static int
my_charlen_utf16(CHARSET_INFO *cs, const uchar *str, const uchar *end)
{
@@ -1456,7 +1447,7 @@ my_numchars_utf16(CHARSET_INFO *cs,
size_t nchars= 0;
for ( ; ; nchars++)
{
- size_t charlen= my_ismbchar_utf16(cs, b, e);
+ size_t charlen= my_ismbchar(cs, b, e);
if (!charlen)
break;
b+= charlen;
@@ -1576,7 +1567,6 @@ static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
MY_CHARSET_HANDLER my_charset_utf16_handler=
{
NULL, /* init */
- my_ismbchar_utf16, /* ismbchar */
my_mbcharlen_utf16, /* mbcharlen */
my_numchars_utf16,
my_charpos_utf16,
@@ -1799,7 +1789,6 @@ static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler =
static MY_CHARSET_HANDLER my_charset_utf16le_handler=
{
NULL, /* init */
- my_ismbchar_utf16,
my_mbcharlen_utf16,
my_numchars_utf16,
my_charpos_utf16,
@@ -2075,15 +2064,6 @@ my_casedn_utf32(CHARSET_INFO *cs, char *src, size_t srclen,
}
-static uint
-my_ismbchar_utf32(CHARSET_INFO *cs __attribute__((unused)),
- const char *b,
- const char *e)
-{
- return b + 4 > e || !IS_UTF32_MBHEAD4(b[0], b[1]) ? 0 : 4;
-}
-
-
static int
my_charlen_utf32(CHARSET_INFO *cs __attribute__((unused)),
const uchar *b, const uchar *e)
@@ -2545,7 +2525,6 @@ static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
MY_CHARSET_HANDLER my_charset_utf32_handler=
{
NULL, /* init */
- my_ismbchar_utf32,
my_mbcharlen_utf32,
my_numchars_utf32,
my_charpos_utf32,
@@ -2883,14 +2862,6 @@ my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)),
}
-static uint my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)),
- const char *b,
- const char *e)
-{
- return b + 2 > e ? 0 : 2;
-}
-
-
static uint my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
uint c __attribute__((unused)))
{
@@ -3032,7 +3003,6 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
MY_CHARSET_HANDLER my_charset_ucs2_handler=
{
NULL, /* init */
- my_ismbchar_ucs2, /* ismbchar */
my_mbcharlen_ucs2, /* mbcharlen */
my_numchars_ucs2,
my_charpos_ucs2,
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index 308f5f0f7d1..b24fdb3075f 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -198,6 +198,7 @@ static const uchar sort_order_ujis[]=
#define IS_MB2_KATA(x,y) (isujis_ss2(x) && iskata(y))
#define IS_MB2_CHAR(x, y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
#define IS_MB3_CHAR(x, y, z) (isujis_ss3(x) && IS_MB2_JIS(y,z))
+#define IS_MB_PREFIX2(x,y) (isujis_ss3(x) && isujis(y))
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
@@ -219,16 +220,6 @@ static const uchar sort_order_ujis[]=
#include "strcoll.ic"
-static uint ismbchar_ujis(CHARSET_INFO *cs __attribute__((unused)),
- const char* p, const char *e)
-{
- return ((*(uchar*)(p)<0x80)? 0:\
- isujis(*(p)) && (e)-(p)>1 && isujis(*((p)+1))? 2:\
- isujis_ss2(*(p)) && (e)-(p)>1 && iskata(*((p)+1))? 2:\
- isujis_ss3(*(p)) && (e)-(p)>2 && isujis(*((p)+1)) && isujis(*((p)+2))? 3:\
- 0);
-}
-
static uint mbcharlen_ujis(CHARSET_INFO *cs __attribute__((unused)),uint c)
{
return (isujis(c)? 2: isujis_ss2(c)? 2: isujis_ss3(c)? 3: 1);
@@ -67264,7 +67255,6 @@ static MY_COLLATION_HANDLER my_collation_ujis_bin_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
- ismbchar_ujis,
mbcharlen_ujis,
my_numchars_mb,
my_charpos_mb,
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index c0865157ad5..3a5616b7323 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -5426,12 +5426,6 @@ my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2)
#include "strcoll.ic"
-static uint my_ismbchar_utf8(CHARSET_INFO *cs,const char *b, const char *e)
-{
- int res= my_charlen_utf8(cs, (const uchar*) b, (const uchar*) e);
- return (res>1) ? res : 0;
-}
-
static uint my_mbcharlen_utf8(CHARSET_INFO *cs __attribute__((unused)),
uint c)
{
@@ -5497,7 +5491,6 @@ static MY_COLLATION_HANDLER my_collation_utf8_bin_handler =
MY_CHARSET_HANDLER my_charset_utf8_handler=
{
NULL, /* init */
- my_ismbchar_utf8,
my_mbcharlen_utf8,
my_numchars_mb,
my_charpos_mb,
@@ -5685,8 +5678,7 @@ static int my_strnncoll_utf8_cs(CHARSET_INFO *cs,
static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs,
const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool diff_if_only_endspace_difference)
+ const uchar *t, size_t tlen)
{
int s_res, t_res, res;
my_wc_t s_wc, t_wc;
@@ -5695,10 +5687,6 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs,
int save_diff= 0;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
-#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
- diff_if_only_endspace_difference= 0;
-#endif
-
while ( s < se && t < te )
{
s_res=my_utf8_uni(cs,&s_wc, s, se);
@@ -5729,37 +5717,22 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs,
slen= se-s;
tlen= te-t;
- res= 0;
-
- if (slen != tlen)
- {
- int swap= 1;
- if (diff_if_only_endspace_difference)
- res= 1; /* Assume 'a' is bigger */
- if (slen < tlen)
- {
- slen= tlen;
- s= t;
- se= te;
- swap= -1;
- res= -res;
- }
- /*
- This following loop uses the fact that in UTF-8
- all multibyte characters are greater than space,
- and all multibyte head characters are greater than
- space. It means if we meet a character greater
- than space, it always means that the longer string
- is greater. So we can reuse the same loop from the
- 8bit version, without having to process full multibute
- sequences.
- */
- for ( ; s < se; s++)
- {
- if (*s != (uchar) ' ')
- return (*s < (uchar) ' ') ? -swap : swap;
- }
- }
+
+ /*
+ The following code uses the fact that in UTF-8
+ all multibyte characters are greater than space,
+ and all multibyte head characters are greater than
+ space. It means if we meet a character greater
+ than space, it always means that the longer string
+ is greater. So we can reuse the same loop from the
+ 8bit version, without having to process full multibute
+ sequences.
+ */
+ if ((res= slen == tlen ? 0 :
+ slen < tlen ?
+ -my_strnncollsp_padspace_bin(t, tlen) :
+ my_strnncollsp_padspace_bin(s, slen)))
+ return res;
return save_diff;
}
@@ -7044,15 +7017,6 @@ my_charlen_filename(CHARSET_INFO *cs, const uchar *str, const uchar *end)
}
-static uint
-my_ismbchar_filename(CHARSET_INFO *cs, const char *str, const char *end)
-{
- my_wc_t wc;
- int rc= my_mb_wc_filename(cs, &wc, (const uchar *) str, (const uchar *) end);
- return rc > 1 ? rc : 0;
-}
-
-
#define MY_FUNCTION_NAME(x) my_ ## x ## _filename
#define CHARLEN(cs,str,end) my_charlen_filename(cs,str,end)
#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
@@ -7081,7 +7045,6 @@ static MY_COLLATION_HANDLER my_collation_filename_handler =
static MY_CHARSET_HANDLER my_charset_filename_handler=
{
NULL, /* init */
- my_ismbchar_filename,
my_mbcharlen_utf8,
my_numchars_mb,
my_charpos_mb,
@@ -7793,14 +7756,6 @@ size_t my_well_formed_len_utf8mb4(CHARSET_INFO *cs,
static uint
-my_ismbchar_utf8mb4(CHARSET_INFO *cs, const char *b, const char *e)
-{
- int res= my_charlen_utf8mb4(cs, (const uchar*) b, (const uchar*) e);
- return (res > 1) ? res : 0;
-}
-
-
-static uint
my_mbcharlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)), uint c)
{
if (c < 0x80)
@@ -7852,7 +7807,6 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_bin_handler =
MY_CHARSET_HANDLER my_charset_utf8mb4_handler=
{
NULL, /* init */
- my_ismbchar_utf8mb4,
my_mbcharlen_utf8mb4,
my_numchars_mb,
my_charpos_mb,
diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c
index 8e3527f9ff1..c18733b06ae 100644
--- a/strings/ctype-win1250ch.c
+++ b/strings/ctype-win1250ch.c
@@ -478,9 +478,7 @@ static int my_strnncoll_win1250ch(CHARSET_INFO *cs __attribute__((unused)),
static
int my_strnncollsp_win1250ch(CHARSET_INFO * cs,
const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool diff_if_only_endspace_difference
- __attribute__((unused)))
+ const uchar *t, size_t tlen)
{
for ( ; slen && s[slen-1] == ' ' ; slen--);
for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
diff --git a/strings/str2int.c b/strings/str2int.c
index ec89503af5e..fe6cd6b793e 100644
--- a/strings/str2int.c
+++ b/strings/str2int.c
@@ -45,7 +45,7 @@
easy task. Coping with integer overflow and the asymmetric range of
twos complement machines is anything but easy.
- So that users of atoi and atol can check whether an error occured,
+ So that users of atoi and atol can check whether an error occurred,
I have taken a wholly unprecedented step: errno is CLEARED if this
call has no problems.
*/
diff --git a/strings/strcoll.ic b/strings/strcoll.ic
index 4bced593a23..a9693b1f3c0 100644
--- a/strings/strcoll.ic
+++ b/strings/strcoll.ic
@@ -210,17 +210,13 @@ MY_FUNCTION_NAME(strnncoll)(CHARSET_INFO *cs __attribute__((unused)),
@param a_length - the length of the left string
@param b - the right string
@param b_length - the length of the right string
- @param diff_if_only_endspace_difference - not used in the code.
- TODO: this should be eventually removed (in 10.2?)
@return - the comparison result
*/
static int
MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)),
const uchar *a, size_t a_length,
- const uchar *b, size_t b_length,
- my_bool diff_if_only_endspace_difference
- __attribute__((unused)))
+ const uchar *b, size_t b_length)
{
const uchar *a_end= a + a_length;
const uchar *b_end= b + b_length;
@@ -262,6 +258,45 @@ MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)),
return 0;
}
+
+#ifdef DEFINE_STRNXFRM
+#ifndef WEIGHT_MB2_FRM
+#define WEIGHT_MB2_FRM(x,y) WEIGHT_MB2(x,y)
+#endif
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
+ uchar *dst, size_t dstlen, uint nweights,
+ const uchar *src, size_t srclen, uint flags)
+{
+ uchar *d0= dst;
+ uchar *de= dst + dstlen;
+ const uchar *se= src + srclen;
+ const uchar *sort_order= cs->sort_order;
+
+ for (; dst < de && src < se && nweights; nweights--)
+ {
+ if (my_charlen(cs, (const char *) src, (const char *) se) > 1)
+ {
+ /*
+ Note, it is safe not to check (src < se)
+ in the code below, because my_charlen() would
+ not return 2 if src was too short
+ */
+ uint16 e= WEIGHT_MB2_FRM(src[0], src[1]);
+ *dst++= (uchar) (e >> 8);
+ if (dst < de)
+ *dst++= (uchar) (e & 0xFF);
+ src+= 2;
+ }
+ else
+ *dst++= sort_order ? sort_order[*src++] : *src++;
+ }
+ return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
+}
+#endif /* DEFINE_STRNXFRM */
+
+
/*
We usually include this file at least two times from the same source file,
for the _ci and the _bin collations. Prepare for the second inclusion.
@@ -273,3 +308,5 @@ MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)),
#undef WEIGHT_MB3
#undef WEIGHT_MB4
#undef WEIGHT_PAD_SPACE
+#undef WEIGHT_MB2_FRM
+#undef DEFINE_STRNXFRM