17 files changed, 436 insertions, 196 deletions
diff --git a/strings/CMakeLists.txt b/strings/CMakeLists.txt
index 3d9de566670..5ba1c0e5747 100755
--- a/strings/CMakeLists.txt
+++ b/strings/CMakeLists.txt
@@ -23,9 +23,10 @@ SET(STRINGS_SOURCES bchange.c bcmp.c bfill.c bmove512.c bmove_upp.c ctype-big5.c
                 ctype-latin1.c ctype-mb.c ctype-simple.c ctype-sjis.c ctype-tis620.c ctype-uca.c
                 ctype-ucs2.c ctype-ujis.c ctype-utf8.c ctype-win1250ch.c ctype.c decimal.c int2str.c
                 is_prefix.c llstr.c longlong2str.c my_strtoll10.c my_vsnprintf.c r_strinstr.c
-                str2int.c str_alloc.c strcend.c strend.c strfill.c strmake.c strmov.c strnmov.c 
+                str2int.c str_alloc.c strcend.c strend.c strfill.c strmake.c strmov.c strmov_overlapp.c strnmov.c 
                 strtod.c strtol.c strtoll.c strtoul.c strtoull.c strxmov.c strxnmov.c xml.c
-		my_strchr.c strcont.c strinstr.c strnlen.c)
+                my_strchr.c strcont.c strinstr.c strnlen.c
+                strappend.c)
 
 IF(NOT SOURCE_SUBLIBS)
   ADD_LIBRARY(strings ${STRINGS_SOURCES})
diff --git a/strings/Makefile.am b/strings/Makefile.am
index db9016b7148..94e24045d03 100644
--- a/strings/Makefile.am
+++ b/strings/Makefile.am
@@ -21,19 +21,19 @@ pkglib_LIBRARIES =	libmystrings.a
 # Exact one of ASSEMBLER_X
 if ASSEMBLER_x86
 ASRCS		= strings-x86.s longlong2str-x86.s my_strtoll10-x86.s
-CSRCS		= bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c strtod.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-ucs2.c ctype-uca.c ctype-win1250ch.c ctype-bin.c ctype-latin1.c my_vsnprintf.c xml.c decimal.c ctype-extra.c str_alloc.c longlong2str_asm.c my_strchr.c strmov.c
+CSRCS		= bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c strtod.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-ucs2.c ctype-uca.c ctype-win1250ch.c ctype-bin.c ctype-latin1.c my_vsnprintf.c xml.c decimal.c ctype-extra.c str_alloc.c longlong2str_asm.c my_strchr.c strmov.c strmov_overlapp.c
 else
 if ASSEMBLER_sparc32
 # These file MUST all be on the same line!! Otherwise automake
 # generats a very broken makefile
 ASRCS		= bmove_upp-sparc.s strappend-sparc.s strend-sparc.s strinstr-sparc.s strmake-sparc.s strmov-sparc.s strnmov-sparc.s strstr-sparc.s
-CSRCS		= strcont.c strfill.c strcend.c is_prefix.c longlong2str.c bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c strtod.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c strxmov.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-ucs2.c ctype-uca.c ctype-win1250ch.c ctype-bin.c ctype-latin1.c my_vsnprintf.c xml.c decimal.c ctype-extra.c my_strtoll10.c str_alloc.c my_strchr.c strmov.c
+CSRCS		= strcont.c strfill.c strcend.c is_prefix.c longlong2str.c bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c strtod.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c strxmov.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-ucs2.c ctype-uca.c ctype-win1250ch.c ctype-bin.c ctype-latin1.c my_vsnprintf.c xml.c decimal.c ctype-extra.c my_strtoll10.c str_alloc.c my_strchr.c strmov.c strmov_overlapp.c
 else
 #no assembler
 ASRCS		=
 # These file MUST all be on the same line!! Otherwise automake
 # generats a very broken makefile
-CSRCS		= strxmov.c bmove_upp.c strappend.c strcont.c strend.c strfill.c strcend.c is_prefix.c strstr.c strinstr.c strmake.c strnmov.c strmov.c longlong2str.c bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c strtod.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-ucs2.c ctype-uca.c ctype-win1250ch.c ctype-bin.c ctype-latin1.c my_vsnprintf.c xml.c decimal.c ctype-extra.c my_strtoll10.c str_alloc.c my_strchr.c
+CSRCS		= strxmov.c bmove_upp.c strappend.c strcont.c strend.c strfill.c strcend.c is_prefix.c strstr.c strinstr.c strmake.c strnmov.c strmov.c longlong2str.c bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c strtod.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-ucs2.c ctype-uca.c ctype-win1250ch.c ctype-bin.c ctype-latin1.c my_vsnprintf.c xml.c decimal.c ctype-extra.c my_strtoll10.c str_alloc.c my_strchr.c strmov_overlapp.c
 endif
 endif
 
@@ -54,7 +54,7 @@ EXTRA_DIST =		ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc
 			strinstr-sparc.s strmake-sparc.s strmov-sparc.s \
 			strnmov-sparc.s strstr-sparc.s strxmov-sparc.s \
 			t_ctype.h my_strchr.c CMakeLists.txt \
-			CHARSET_INFO.txt
+			CHARSET_INFO.txt strmov_overlapp.c
 
 libmystrings_a_LIBADD=
 conf_to_src_SOURCES = conf_to_src.c xml.c ctype.c bcmp.c
diff --git a/strings/bcmp.c b/strings/bcmp.c
index 1b6ed22fc22..39b5f24e055 100644
--- a/strings/bcmp.c
+++ b/strings/bcmp.c
@@ -24,7 +24,7 @@
 #include <my_global.h>
 #include "m_string.h"
 
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
 #undef bcmp
 #undef HAVE_BCMP
 #endif
@@ -50,7 +50,7 @@ uint len;					/* 0 <= len <= 65535 */
 
 #else
 
-#ifndef HAVE_purify
+#ifndef HAVE_valgrind
 size_t bcmp(register const uchar *s1,register const uchar *s2,
             register size_t len)
 #else
diff --git a/strings/bmove512.c b/strings/bmove512.c
index c3f0446ead6..0ae23d1f42d 100644
--- a/strings/bmove512.c
+++ b/strings/bmove512.c
@@ -33,7 +33,7 @@
 #ifdef HAVE_LONG_LONG
 #define LONG ulonglong
 #else
-#define LONG ulonglong
+#define LONG ulong
 #endif
 
 void bmove512(uchar *to, const uchar *from, register size_t length)
diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c
index 7e742050aa8..aa40a269848 100644
--- a/strings/conf_to_src.c
+++ b/strings/conf_to_src.c
@@ -184,11 +184,12 @@ void dispcset(FILE *f,CHARSET_INFO *cs)
 {
   fprintf(f,"{\n");
   fprintf(f,"  %d,%d,%d,\n",cs->number,0,0);
-  fprintf(f,"  MY_CS_COMPILED%s%s%s%s,\n",
+  fprintf(f,"  MY_CS_COMPILED%s%s%s%s%s,\n",
           cs->state & MY_CS_BINSORT         ? "|MY_CS_BINSORT"   : "",
           cs->state & MY_CS_PRIMARY         ? "|MY_CS_PRIMARY"   : "",
           is_case_sensitive(cs)             ? "|MY_CS_CSSORT"    : "",
-          my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "");
+          my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "",
+	  !my_charset_is_ascii_compatible(cs) ? "|MY_CS_NONASCII": "");
   
   if (cs->name)
   {
diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c
index 75244e40435..64cf30e3673 100644
--- a/strings/ctype-extra.c
+++ b/strings/ctype-extra.c
@@ -6804,7 +6804,7 @@ CHARSET_INFO compiled_charsets[] = {
 #ifdef HAVE_CHARSET_swe7
 {
   10,0,0,
-  MY_CS_COMPILED|MY_CS_PRIMARY,
+  MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_NONASCII,
   "swe7",                     /* cset name     */
   "swe7_swedish_ci",                     /* coll name     */
   "",                       /* comment       */
@@ -8454,7 +8454,7 @@ CHARSET_INFO compiled_charsets[] = {
 #ifdef HAVE_CHARSET_swe7
 {
   82,0,0,
-  MY_CS_COMPILED|MY_CS_BINSORT,
+  MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_NONASCII,
   "swe7",                     /* cset name     */
   "swe7_bin",                     /* coll name     */
   "",                       /* comment       */
@@ -8550,72 +8550,6 @@ CHARSET_INFO compiled_charsets[] = {
 }
 ,
 #endif
-#ifdef HAVE_CHARSET_geostd8
-{
-  92,0,0,
-  MY_CS_COMPILED|MY_CS_PRIMARY,
-  "geostd8",                     /* cset name     */
-  "geostd8_general_ci",                     /* coll name     */
-  "",                       /* comment       */
-  NULL,                       /* tailoring     */
-  ctype_geostd8_general_ci,                   /* ctype         */
-  to_lower_geostd8_general_ci,                /* lower         */
-  to_upper_geostd8_general_ci,                /* upper         */
-  sort_order_geostd8_general_ci,            /* sort_order    */
-  NULL,                       /* contractions  */
-  NULL,                       /* sort_order_big*/
-  to_uni_geostd8_general_ci,                  /* to_uni        */
-  NULL,                       /* from_uni      */
-  my_unicase_default,         /* caseinfo      */
-  NULL,                       /* state map     */
-  NULL,                       /* ident map     */
-  1,                          /* strxfrm_multiply*/
-  1,                          /* caseup_multiply*/
-  1,                          /* casedn_multiply*/
-  1,                          /* mbminlen      */
-  1,                          /* mbmaxlen      */
-  0,                          /* min_sort_char */
-  255,                        /* max_sort_char */
-  ' ',                        /* pad_char      */
-  0,                          /* escape_with_backslash_is_dangerous */
-  &my_charset_8bit_handler,
-  &my_collation_8bit_simple_ci_handler,
-}
-,
-#endif
-#ifdef HAVE_CHARSET_geostd8
-{
-  93,0,0,
-  MY_CS_COMPILED|MY_CS_BINSORT,
-  "geostd8",                     /* cset name     */
-  "geostd8_bin",                     /* coll name     */
-  "",                       /* comment       */
-  NULL,                       /* tailoring     */
-  ctype_geostd8_bin,                   /* ctype         */
-  to_lower_geostd8_bin,                /* lower         */
-  to_upper_geostd8_bin,                /* upper         */
-  NULL,                     /* sort_order    */
-  NULL,                       /* contractions  */
-  NULL,                       /* sort_order_big*/
-  to_uni_geostd8_bin,                  /* to_uni        */
-  NULL,                       /* from_uni      */
-  my_unicase_default,         /* caseinfo      */
-  NULL,                       /* state map     */
-  NULL,                       /* ident map     */
-  1,                          /* strxfrm_multiply*/
-  1,                          /* caseup_multiply*/
-  1,                          /* casedn_multiply*/
-  1,                          /* mbminlen      */
-  1,                          /* mbmaxlen      */
-  0,                          /* min_sort_char */
-  255,                        /* max_sort_char */
-  ' ',                        /* pad_char      */
-  0,                          /* escape_with_backslash_is_dangerous */
-  &my_charset_8bit_handler,
-  &my_collation_8bit_bin_handler,
-}
-,
-#endif
 #ifdef HAVE_CHARSET_latin1
 {
   94,0,0,
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 903811e2ab9..51f93954bd5 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -567,8 +567,7 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
   char *min_end= min_str + res_length;
   char *max_end= max_str + res_length;
   size_t maxcharlen= res_length / cs->mbmaxlen;
-  const char *contraction_flags= cs->contractions ? 
-              ((const char*) cs->contractions) + 0x40*0x40 : NULL;
+  my_bool have_contractions= my_uca_have_contractions(cs);
 
   for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
   {
@@ -636,8 +635,8 @@ fill_max_and_min:
         'ab\min\min\min\min' and 'ab\max\max\max\max'.
 
       */
-      if (contraction_flags && ptr + 1 < end &&
-          contraction_flags[(uchar) *ptr])
+      if (have_contractions && ptr + 1 < end &&
+          my_uca_can_be_contraction_head(cs, (uchar) *ptr))
       {
         /* Ptr[0] is a contraction head. */
         
@@ -659,8 +658,8 @@ fill_max_and_min:
           is not a contraction, then we put only ptr[0],
           and continue with ptr[1] on the next loop.
         */
-        if (contraction_flags[(uchar) ptr[1]] &&
-            cs->contractions[(*ptr-0x40)*0x40 + ptr[1] - 0x40])
+        if (my_uca_can_be_contraction_tail(cs, (uchar) ptr[1]) &&
+            my_uca_contraction2_weight(cs, (uchar) ptr[0], (uchar) ptr[1]))
         {
           /* Contraction found */
           if (maxcharlen == 1 || min_str + 1 >= min_end)
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index ac426e0d7b5..cc7581ffe6e 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -4672,7 +4672,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
 CHARSET_INFO my_charset_sjis_japanese_ci=
 {
     13,0,0,		/* number */
-    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM,	/* state      */
+    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_NONASCII,	/* state      */
     "sjis",		/* cs name    */
     "sjis_japanese_ci",	/* name */
     "",			/* comment    */
@@ -4704,7 +4704,7 @@ CHARSET_INFO my_charset_sjis_japanese_ci=
 CHARSET_INFO my_charset_sjis_bin=
 {
     88,0,0,		/* number */
-    MY_CS_COMPILED|MY_CS_BINSORT,	/* state      */
+    MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_NONASCII, /* state      */
     "sjis",		/* cs name    */
     "sjis_bin",		/* name */
     "",			/* comment    */
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 6ae0cc3a293..b2a60265a0a 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -36,6 +36,12 @@
 #include "m_string.h"
 #include "m_ctype.h"
 
+
+#define MY_UCA_CNT_FLAG_SIZE 4096
+#define MY_UCA_CNT_FLAG_MASK 4095
+#define MY_UCA_CNT_HEAD 1
+#define MY_UCA_CNT_TAIL 2
+
 #ifdef HAVE_UCA_COLLATIONS
 
 #define MY_UCA_NPAGES 256
@@ -6713,6 +6719,16 @@ static const char hungarian[]=
     "&U < \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170";
 
 
+static const char croatian[]=
+
+"&C <  \\u010D <<< \\u010C < \\u0107 <<< \\u0106 "
+"&D <  d\\u017E <<< \\u01C6 <<< D\\u017E <<< \\u01C5 <<< D\\u017D <<< \\u01C4 "
+"   <  \\u0111 <<< \\u0110 "
+"&L < lj <<< \\u01C9 <<< Lj <<< \\u01C8 <<< LJ <<< \\u01C7 "
+"&N < nj <<< \\u01CC <<< Nj <<< \\u01CB <<< NJ <<< \\u01CA "
+"&S < \\u0161 <<< \\u0160 "
+"&Z < \\u017E <<< \\u017D";
+
 /*
   Unicode Collation Algorithm:
   Collation element (weight) scanner, 
@@ -6726,7 +6742,7 @@ typedef struct my_uca_scanner_st
   const uchar  *send;	/* End of the input string                */
   uchar *uca_length;
   uint16 **uca_weight;
-  uint16 *contractions;
+  MY_CONTRACTIONS *contractions;
   uint16 implicit[2];
   int page;
   int code;
@@ -6746,6 +6762,75 @@ typedef struct my_uca_scanner_handler_st
 
 static uint16 nochar[]= {0,0};
 
+/********** Helper functions to handle contraction ************/
+
+
+/**
+  Mark a character as a contraction part
+  
+  @cs       Pointer to CHARSET_INFO data
+  @wc       Unicode code point
+  @flag     flag: "is contraction head", "is contraction tail"
+*/
+
+static void
+my_uca_add_contraction_flag(CHARSET_INFO *cs, my_wc_t wc, int flag)
+{
+  cs->contractions->flags[wc & MY_UCA_CNT_FLAG_MASK]|= flag;
+}
+
+
+/**
+  Add a new contraction into contraction list
+  
+  @cs       Pointer to CHARSET_INFO data
+  @wc       Unicode code points of the characters
+  @len      Number of characters
+  
+  @return   New contraction
+  @retval   Pointer to a newly added contraction
+*/
+
+static MY_CONTRACTION *
+my_uca_add_contraction(CHARSET_INFO *cs,
+                       my_wc_t *wc, int len __attribute__((unused)))
+{
+  MY_CONTRACTIONS *list= cs->contractions;
+  MY_CONTRACTION *next= &list->item[list->nitems];
+  DBUG_ASSERT(len == 2); /* We currently support only contraction2 */
+  next->ch[0]= wc[0];
+  next->ch[1]= wc[1];
+  list->nitems++;
+  return next;
+}
+
+
+/**
+  Allocate and initialize memory for contraction list and flags
+  
+  @cs       Pointer to CHARSET_INFO data
+  @alloc    Memory allocation function (typically points to my_alloc_once)
+  @n        Number of contractions
+  
+  @return   Error code
+  @retval   0 - memory allocated successfully
+  @retval   1 - not enough memory
+*/
+
+static my_bool
+my_uca_alloc_contractions(CHARSET_INFO *cs, void *(*alloc)(size_t), size_t n)
+{
+  uint size= n * sizeof(MY_CONTRACTION);
+  if (!(cs->contractions= (*alloc)(sizeof(MY_CONTRACTIONS))))
+    return 1;
+  bzero(cs->contractions, sizeof(MY_CONTRACTIONS));
+  if (!(cs->contractions->item= (*alloc)(size)) ||
+      !(cs->contractions->flags= (char*) (*alloc)(MY_UCA_CNT_FLAG_SIZE)))
+    return 1;
+  bzero((void*) cs->contractions->item, size);
+  bzero((void*) cs->contractions->flags, MY_UCA_CNT_FLAG_SIZE);
+  return 0;
+}
 
 #ifdef HAVE_CHARSET_ucs2
 /*
@@ -6766,7 +6851,7 @@ static uint16 nochar[]= {0,0};
 */
 
 static void my_uca_scanner_init_ucs2(my_uca_scanner *scanner,
-                                     CHARSET_INFO *cs __attribute__((unused)),
+                                     CHARSET_INFO *cs,
                                      const uchar *str, size_t length)
 {
   scanner->wbeg= nochar; 
@@ -6777,6 +6862,7 @@ static void my_uca_scanner_init_ucs2(my_uca_scanner *scanner,
     scanner->uca_length= cs->sort_order;
     scanner->uca_weight= cs->sort_order_big;
     scanner->contractions= cs->contractions;
+    scanner->cs= cs;
     return;
   }
 
@@ -6865,18 +6951,23 @@ static int my_uca_scanner_next_ucs2(my_uca_scanner *scanner)
     
     if (scanner->contractions && (scanner->sbeg <= scanner->send))
     {
-      int cweight;
+      my_wc_t wc1= ((scanner->page << 8) | scanner->code);
       
-      if (!scanner->page && !scanner->sbeg[0] &&
-          (scanner->sbeg[1] > 0x40) && (scanner->sbeg[1] < 0x80) &&
-          (scanner->code > 0x40) && (scanner->code < 0x80) &&
-          (cweight= scanner->contractions[(scanner->code-0x40)*0x40+scanner->sbeg[1]-0x40]))
+      if (my_uca_can_be_contraction_head(scanner->cs, wc1))
+      {
+        uint16 *cweight;
+        my_wc_t wc2= (((my_wc_t) scanner->sbeg[0]) << 8) | scanner->sbeg[1];
+        if (my_uca_can_be_contraction_tail(scanner->cs, wc2) &&
+          (cweight= my_uca_contraction2_weight(scanner->cs,
+                                               scanner->code,
+                                               scanner->sbeg[1])))
         {
           scanner->implicit[0]= 0;
           scanner->wbeg= scanner->implicit;
           scanner->sbeg+=2;
-          return cweight;
+          return *cweight;
         }
+      }
     }
     
     if (!ucaw[scanner->page])
@@ -6959,23 +7050,22 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner)
     scanner->code= wc & 0xFF;
     scanner->sbeg+= mb_len;
     
-    if (scanner->contractions && !scanner->page &&
-        (scanner->code > 0x40) && (scanner->code < 0x80))
+    if (my_uca_have_contractions(scanner->cs) &&
+        my_uca_can_be_contraction_head(scanner->cs, wc))
     {
-      uint page1, code1, cweight;
+      my_wc_t wc2;
+      uint16 *cweight;
       
-      if (((mb_len= scanner->cs->cset->mb_wc(scanner->cs, &wc,
+      if (((mb_len= scanner->cs->cset->mb_wc(scanner->cs, &wc2,
                                             scanner->sbeg, 
                                             scanner->send)) >=0) &&
-           (!(page1= (wc >> 8))) &&
-           ((code1= (wc & 0xFF)) > 0x40) &&
-           (code1 < 0x80) && 
-           (cweight= scanner->contractions[(scanner->code-0x40)*0x40 + code1-0x40]))
+           my_uca_can_be_contraction_tail(scanner->cs, wc2) &&
+          (cweight= my_uca_contraction2_weight(scanner->cs, wc, wc2)))
       {
         scanner->implicit[0]= 0;
         scanner->wbeg= scanner->implicit;
         scanner->sbeg+= mb_len;
-        return cweight;
+        return *cweight;
       }
     }
     
@@ -7012,6 +7102,33 @@ static my_uca_scanner_handler my_any_uca_scanner_handler=
   my_uca_scanner_next_any
 };
 
+
+
+/**
+  Helper function:
+  Find address of weights of the given character.
+
+  @weights  UCA weight array
+  @lengths  UCA length array
+  @ch       character Unicode code point
+
+  @return Weight array
+  @retval  pointer to weight array for the given character,
+           or NULL if this page does not have implicit weights.
+*/
+
+static inline uint16 *
+my_char_weight_addr(CHARSET_INFO *cs, uint wc)
+{
+  uint page= (wc >> 8);
+  uint ofst= wc & 0xFF;
+  return cs->sort_order_big[page] ?
+         cs->sort_order_big[page] + ofst * cs->sort_order[page] :
+         NULL;
+}
+
+
+
 /*
   Compares two strings according to the collation
 
@@ -7683,8 +7800,8 @@ ex:
 
 typedef struct my_coll_rule_item_st
 {
-  uint base;     /* Base character                             */
-  uint curr[2];  /* Current character                          */
+  my_wc_t base;     /* Base character                             */
+  my_wc_t curr[2];  /* Current character                          */
   int diff[3];   /* Primary, Secondary and Tertiary difference */
 } MY_COLL_RULE;
 
@@ -7834,6 +7951,7 @@ static int my_coll_rule_parse(MY_COLL_RULE *rule, size_t mitems,
 static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(size_t))
 {
   MY_COLL_RULE rule[MY_MAX_COLL_RULE];
+  MY_COLL_RULE *r, *rfirst, *rlast;
   char errstr[128];
   uchar   *newlengths;
   uint16 **newweights;
@@ -7858,6 +7976,9 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(size_t))
     return 1;
   }
   
+  rfirst= rule;
+  rlast= rule + rc;
+  
   if (!cs->caseinfo)
     cs->caseinfo= my_unicase_default;
   
@@ -7941,44 +8062,21 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(size_t))
   /* Now process contractions */
   if (ncontractions)
   {
-    /*
-      8K for weights for basic latin letter pairs,
-      plus 256 bytes for "is contraction part" flags.
-    */
-    uint size= 0x40*0x40*sizeof(uint16) + 256;
-    char *contraction_flags;
-    if (!(cs->contractions= (uint16*) (*alloc)(size)))
-        return 1;
-    bzero((void*)cs->contractions, size);
-    contraction_flags= ((char*) cs->contractions) + 0x40*0x40;
-    for (i=0; i < rc; i++)
+    if (my_uca_alloc_contractions(cs, alloc, ncontractions))
+      return 1;
+    for (r= rfirst; r < rlast; r++)
     {
-      if (rule[i].curr[1])
+      uint16 *to;
+      if (r->curr[1]) /* Contraction */
       {
-        uint pageb= (rule[i].base >> 8) & 0xFF;
-        uint chb= rule[i].base & 0xFF;
-        uint16 *offsb= defweights[pageb] + chb*deflengths[pageb];
-        uint offsc;
-        
-        if (offsb[1] || 
-            rule[i].curr[0] < 0x40 || rule[i].curr[0] > 0x7f ||
-            rule[i].curr[1] < 0x40 || rule[i].curr[1] > 0x7f)
-        {
-          /* 
-           TODO: add error reporting;
-           We support only basic latin letters contractions at this point.
-           Also, We don't support contractions with weight longer than one.
-           Otherwise, we'd need much more memory.
-          */
-          return 1;
-        }
-        offsc= (rule[i].curr[0]-0x40)*0x40+(rule[i].curr[1]-0x40);
-        
-        /* Copy base weight applying primary difference */
-        cs->contractions[offsc]= offsb[0] + rule[i].diff[0];
-        /* Mark both letters as "is contraction part */
-        contraction_flags[rule[i].curr[0]]= 1;
-        contraction_flags[rule[i].curr[1]]= 1;
+        /* Mark both letters as "is contraction part" */
+        my_uca_add_contraction_flag(cs, r->curr[0], MY_UCA_CNT_HEAD);
+        my_uca_add_contraction_flag(cs, r->curr[1], MY_UCA_CNT_TAIL);
+        to= my_uca_add_contraction(cs, r->curr, 2)->weight;
+        /* Copy weight from the reset character */
+        to[0]= my_char_weight_addr(cs, r->base)[0];
+        /* Apply primary difference */
+        to[0]+= r->diff[0];
       }
     }
   }
@@ -8090,7 +8188,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
 CHARSET_INFO my_charset_ucs2_unicode_ci=
 {
     128,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_unicode_ci",	/* name         */
     "",			/* comment      */
@@ -8122,7 +8220,7 @@ CHARSET_INFO my_charset_ucs2_unicode_ci=
 CHARSET_INFO my_charset_ucs2_icelandic_uca_ci=
 {
     129,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_icelandic_ci",/* name         */
     "",			/* comment      */
@@ -8154,7 +8252,7 @@ CHARSET_INFO my_charset_ucs2_icelandic_uca_ci=
 CHARSET_INFO my_charset_ucs2_latvian_uca_ci=
 {
     130,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_latvian_ci",	/* name         */
     "",			/* comment      */
@@ -8186,7 +8284,7 @@ CHARSET_INFO my_charset_ucs2_latvian_uca_ci=
 CHARSET_INFO my_charset_ucs2_romanian_uca_ci=
 {
     131,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_romanian_ci",	/* name         */
     "",			/* comment      */
@@ -8218,7 +8316,7 @@ CHARSET_INFO my_charset_ucs2_romanian_uca_ci=
 CHARSET_INFO my_charset_ucs2_slovenian_uca_ci=
 {
     132,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_slovenian_ci",/* name         */
     "",			/* comment      */
@@ -8250,7 +8348,7 @@ CHARSET_INFO my_charset_ucs2_slovenian_uca_ci=
 CHARSET_INFO my_charset_ucs2_polish_uca_ci=
 {
     133,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_polish_ci",	/* name         */
     "",			/* comment      */
@@ -8282,7 +8380,7 @@ CHARSET_INFO my_charset_ucs2_polish_uca_ci=
 CHARSET_INFO my_charset_ucs2_estonian_uca_ci=
 {
     134,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_estonian_ci",	/* name         */
     "",			/* comment      */
@@ -8314,7 +8412,7 @@ CHARSET_INFO my_charset_ucs2_estonian_uca_ci=
 CHARSET_INFO my_charset_ucs2_spanish_uca_ci=
 {
     135,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_spanish_ci",	/* name         */
     "",			/* comment      */
@@ -8346,7 +8444,7 @@ CHARSET_INFO my_charset_ucs2_spanish_uca_ci=
 CHARSET_INFO my_charset_ucs2_swedish_uca_ci=
 {
     136,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_swedish_ci",	/* name         */
     "",			/* comment      */
@@ -8378,7 +8476,7 @@ CHARSET_INFO my_charset_ucs2_swedish_uca_ci=
 CHARSET_INFO my_charset_ucs2_turkish_uca_ci=
 {
     137,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_turkish_ci",	/* name         */
     "",			/* comment      */
@@ -8410,7 +8508,7 @@ CHARSET_INFO my_charset_ucs2_turkish_uca_ci=
 CHARSET_INFO my_charset_ucs2_czech_uca_ci=
 {
     138,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_czech_ci",	/* name         */
     "",			/* comment      */
@@ -8443,7 +8541,7 @@ CHARSET_INFO my_charset_ucs2_czech_uca_ci=
 CHARSET_INFO my_charset_ucs2_danish_uca_ci=
 {
     139,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_danish_ci",	/* name         */
     "",			/* comment      */
@@ -8475,7 +8573,7 @@ CHARSET_INFO my_charset_ucs2_danish_uca_ci=
 CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci=
 {
     140,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_lithuanian_ci",/* name         */
     "",			/* comment      */
@@ -8507,7 +8605,7 @@ CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci=
 CHARSET_INFO my_charset_ucs2_slovak_uca_ci=
 {
     141,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_slovak_ci",	/* name         */
     "",			/* comment      */
@@ -8539,7 +8637,7 @@ CHARSET_INFO my_charset_ucs2_slovak_uca_ci=
 CHARSET_INFO my_charset_ucs2_spanish2_uca_ci=
 {
     142,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_spanish2_ci",	/* name         */
     "",			/* comment      */
@@ -8572,7 +8670,7 @@ CHARSET_INFO my_charset_ucs2_spanish2_uca_ci=
 CHARSET_INFO my_charset_ucs2_roman_uca_ci=
 {
     143,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_roman_ci",	/* name         */
     "",			/* comment      */
@@ -8605,7 +8703,7 @@ CHARSET_INFO my_charset_ucs2_roman_uca_ci=
 CHARSET_INFO my_charset_ucs2_persian_uca_ci=
 {
     144,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_persian_ci",	/* name         */
     "",			/* comment      */
@@ -8638,7 +8736,7 @@ CHARSET_INFO my_charset_ucs2_persian_uca_ci=
 CHARSET_INFO my_charset_ucs2_esperanto_uca_ci=
 {
     145,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_esperanto_ci",/* name         */
     "",			/* comment      */
@@ -8671,7 +8769,7 @@ CHARSET_INFO my_charset_ucs2_esperanto_uca_ci=
 CHARSET_INFO my_charset_ucs2_hungarian_uca_ci=
 {
     146,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_hungarian_ci",/* name         */
     "",			/* comment      */
@@ -8701,6 +8799,39 @@ CHARSET_INFO my_charset_ucs2_hungarian_uca_ci=
 };
 
 
+CHARSET_INFO my_charset_ucs2_croatian_uca_ci=
+{
+    149,0,0,             /* number       */
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    "ucs2",              /* cs name    */
+    "ucs2_croatian_ci",  /* name         */
+    "",                  /* comment      */
+    croatian,            /* tailoring    */
+    NULL,                /* ctype        */
+    NULL,                /* to_lower     */
+    NULL,                /* to_upper     */
+    NULL,                /* sort_order   */
+    NULL,                /* contractions */
+    NULL,                /* sort_order_big*/
+    NULL,                /* tab_to_uni   */
+    NULL,                /* tab_from_uni */
+    my_unicase_default,  /* caseinfo     */
+    NULL,                /* state_map    */
+    NULL,                /* ident_map    */
+    8,                   /* strxfrm_multiply */
+    1,                   /* caseup_multiply  */
+    1,                   /* casedn_multiply  */
+    2,                   /* mbminlen     */
+    2,                   /* mbmaxlen     */
+    9,                   /* min_sort_char */
+    0xFFFF,              /* max_sort_char */
+    ' ',                 /* pad char      */
+    0,                   /* escape_with_backslash_is_dangerous */
+    &my_charset_ucs2_handler,
+    &my_collation_ucs2_uca_handler
+};
+
+
 #endif
 
 
@@ -9358,6 +9489,113 @@ CHARSET_INFO my_charset_utf8_hungarian_uca_ci=
     &my_collation_any_uca_handler
 };
 
+CHARSET_INFO my_charset_utf8_croatian_uca_ci=
+{
+    213,0,0,            /* number       */
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    "utf8",             /* cs name    */
+    "utf8_croatian_ci", /* name         */
+    "",                 /* comment      */
+    croatian,           /* tailoring    */
+    ctype_utf8,         /* ctype        */
+    NULL,               /* to_lower     */
+    NULL,               /* to_upper     */
+    NULL,               /* sort_order   */
+    NULL,               /* contractions */
+    NULL,               /* sort_order_big*/
+    NULL,               /* tab_to_uni   */
+    NULL,               /* tab_from_uni */
+    my_unicase_default, /* caseinfo     */
+    NULL,               /* state_map    */
+    NULL,               /* ident_map    */
+    8,                  /* strxfrm_multiply */
+    1,                  /* caseup_multiply  */
+    1,                  /* casedn_multiply  */
+    1,                  /* mbminlen     */
+    3,                  /* mbmaxlen     */
+    9,                  /* min_sort_char */
+    0xFFFF,             /* max_sort_char */
+    ' ',                /* pad char      */
+    0,                  /* escape_with_backslash_is_dangerous */
+    &my_charset_utf8_handler,
+    &my_collation_any_uca_handler
+};
+
 #endif /* HAVE_CHARSET_utf8 */
 
 #endif /* HAVE_UCA_COLLATIONS */
+
+/**
+  Check if UCA data has contractions (public version)
+
+  @cs       Pointer to CHARSET_INFO data
+  @retval   0 - no contraction, 1 - have contractions.
+*/
+
+my_bool
+my_uca_have_contractions(CHARSET_INFO *cs)
+{
+  return cs->contractions != NULL;
+}
+
+/**
+  Check if a character can be contraction head
+  
+  @cs       Pointer to CHARSET_INFO data
+  @wc       Code point
+  
+  @retval   0 - cannot be contraction head
+  @retval   1 - can be contraction head
+*/
+
+my_bool
+my_uca_can_be_contraction_head(CHARSET_INFO *cs, my_wc_t wc)
+{
+  return cs->contractions->flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_CNT_HEAD;
+}
+
+
+/**
+  Check if a character can be contraction tail
+  
+  @cs       Pointer to CHARSET_INFO data
+  @wc       Code point
+  
+  @retval   0 - cannot be contraction tail
+  @retval   1 - can be contraction tail
+*/
+
+my_bool
+my_uca_can_be_contraction_tail(CHARSET_INFO *cs, my_wc_t wc)
+{
+  return cs->contractions->flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_CNT_TAIL;
+}
+
+
+/**
+  Find a contraction and return its weight array
+  
+  @cs       Pointer to CHARSET data
+  @wc1      First character
+  @wc2      Second character
+  
+  @return   Weight array
+  @retval   NULL - no contraction found
+  @retval   ptr  - contraction weight array
+*/
+
+uint16 *
+my_uca_contraction2_weight(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
+{
+  MY_CONTRACTIONS *list= cs->contractions;
+  MY_CONTRACTION *c, *last;
+  for (c= list->item, last= &list->item[list->nitems]; c < last; c++)
+  {
+    if (c->ch[0] == wc1 && c->ch[1] == wc2)
+    {
+      return c->weight;
+    }
+  }
+  return NULL;
+}
+
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index cead55f8a0a..bf9c6f9626a 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1498,6 +1498,14 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
   }
 }
 
+
+static inline my_wc_t
+ucs2_to_wc(const uchar *ptr)
+{
+  return (((uint) ptr[0]) << 8) + ptr[1];
+}
+
+
 /*
 ** Calculate min_str and max_str that ranges a LIKE string.
 ** Arguments:
@@ -1526,12 +1534,12 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
   char *min_org=min_str;
   char *min_end=min_str+res_length;
   size_t charlen= res_length / cs->mbmaxlen;
-  const char *contraction_flags= cs->contractions ?
-             ((const char*) cs->contractions) + 0x40*0x40 : NULL;
+  my_bool have_contractions= my_uca_have_contractions(cs);
   
   for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
         ; ptr+=2, charlen--)
   {
+    my_wc_t wc;
     if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
     {
       ptr+=2;					/* Skip escape */
@@ -1567,9 +1575,10 @@ fill_max_and_min:
       return 0;
     }
 
-    if (contraction_flags && ptr + 3 < end &&
-        ptr[0] == '\0' && contraction_flags[(uchar) ptr[1]])
+    if (have_contractions && ptr + 3 < end &&
+        my_uca_can_be_contraction_head(cs, (wc= ucs2_to_wc((uchar*) ptr))))
     {
+      my_wc_t wc2;
       /* Contraction head found */
       if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many))
       {
@@ -1581,8 +1590,8 @@ fill_max_and_min:
         Check if the second letter can be contraction part,
         and if two letters really produce a contraction.
       */
-      if (ptr[2] == '\0' && contraction_flags[(uchar) ptr[3]] &&
-          cs->contractions[(ptr[1]-0x40)*0x40 + ptr[3] - 0x40])
+      if (my_uca_can_be_contraction_tail(cs, (wc2= ucs2_to_wc((uchar*) ptr + 2))) &&
+          my_uca_contraction2_weight(cs, wc , wc2))
       {
         /* Contraction found */
         if (charlen == 1 || min_str + 2 >= min_end)
@@ -1702,7 +1711,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler=
 CHARSET_INFO my_charset_ucs2_general_ci=
 {
     35,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_general_ci",	/* name         */
     "",			/* comment      */
@@ -1734,7 +1743,7 @@ CHARSET_INFO my_charset_ucs2_general_ci=
 CHARSET_INFO my_charset_ucs2_bin=
 {
     90,0,0,		/* number       */
-    MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE,
+    MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
     "ucs2",		/* cs name    */
     "ucs2_bin",		/* name         */
     "",			/* comment      */
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index ae942b59caa..91f633e45ce 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -4200,7 +4200,7 @@ static MY_CHARSET_HANDLER my_charset_filename_handler=
 CHARSET_INFO my_charset_filename=
 {
     17,0,0,             /* number       */
-    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_HIDDEN,
+    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_HIDDEN|MY_CS_NONASCII,
     "filename",         /* cs name      */
     "filename",         /* name         */
     "",                 /* comment      */
diff --git a/strings/ctype.c b/strings/ctype.c
index 17ad1256e74..75d76aceea3 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -405,3 +405,23 @@ my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
   }
   return 1;
 }
+
+
+/*
+  Shared function between conf_to_src and mysys.
+  Check if a 8bit character set is compatible with
+  ascii on the range 0x00..0x7F.
+*/
+my_bool
+my_charset_is_ascii_compatible(CHARSET_INFO *cs)
+{
+  uint i;
+  if (!cs->tab_to_uni)
+    return 1;
+  for (i= 0; i < 128; i++)
+  {
+    if (cs->tab_to_uni[i] != i)
+      return 0;
+  }
+  return 1;
+}
diff --git a/strings/decimal.c b/strings/decimal.c
index 282e7cae8ab..184f78a20b7 100644
--- a/strings/decimal.c
+++ b/strings/decimal.c
@@ -30,7 +30,7 @@
   integer that determines the number of significant digits in a
   particular radix R, where R is either 2 or 10. S is a non-negative
   integer. Every value of an exact numeric type of scale S is of the
-  form n*10^{-S}, where n is an integer such that �-R^P <= n <= R^P.
+  form n*10^{-S}, where n is an integer such that �-R^P <= n <= R^P.
 
   [...]
 
@@ -144,7 +144,7 @@ static double scaler1[]= {
   1.0, 10.0, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9
 };
 
-#ifdef HAVE_purify
+#ifdef HAVE_valgrind
 #define sanity(d) DBUG_ASSERT((d)->len > 0)
 #else
 #define sanity(d) DBUG_ASSERT((d)->len >0 && ((d)->buf[0] | \
@@ -306,7 +306,7 @@ int decimal_actual_fraction(decimal_t *from)
   {
     for (i= DIG_PER_DEC1 - ((frac - 1) % DIG_PER_DEC1);
          *buf0 % powers10[i++] == 0;
-         frac--) ;
+         frac--) {}
   }
   return frac;
 }
@@ -500,7 +500,7 @@ static void digits_bounds(decimal_t *from, int *start_result, int *end_result)
     stop= (int) ((buf_end - from->buf + 1) * DIG_PER_DEC1);
     i= 1;
   }
-  for (; *buf_end % powers10[i++] == 0; stop--) ;
+  for (; *buf_end % powers10[i++] == 0; stop--) {}
   *end_result= stop; /* index of position after last decimal digit (from 0) */
 }
 
@@ -1011,7 +1011,7 @@ static int ull2dec(ulonglong from, decimal_t *to)
 
   sanity(to);
 
-  for (intg1=1; from >= DIG_BASE; intg1++, from/=DIG_BASE) ;
+  for (intg1=1; from >= DIG_BASE; intg1++, from/=DIG_BASE) {}
   if (unlikely(intg1 > to->len))
   {
     intg1=to->len;
diff --git a/strings/llstr.c b/strings/llstr.c
index 643cf36a311..678f8b05f39 100644
--- a/strings/llstr.c
+++ b/strings/llstr.c
@@ -38,3 +38,4 @@ char *ullstr(longlong value,char *buff)
   longlong10_to_str(value,buff,10);
   return buff;
 }
+
diff --git a/strings/strmake.c b/strings/strmake.c
index 2d5fa5e36aa..56bd3a8f084 100644
--- a/strings/strmake.c
+++ b/strings/strmake.c
@@ -29,26 +29,38 @@
 
 char *strmake(register char *dst, register const char *src, size_t length)
 {
-#ifdef EXTRA_DEBUG
-  /*
-    'length' is the maximum length of the string; the buffer needs
-    to be one character larger to accomodate the terminating '\0'.
-    This is easy to get wrong, so we make sure we write to the
-    entire length of the buffer to identify incorrect buffer-sizes.
-    We only initialise the "unused" part of the buffer here, a) for
-    efficiency, and b) because dst==src is allowed, so initialising
-    the entire buffer would overwrite the source-string. Also, we
-    write a character rather than '\0' as this makes spotting these
-    problems in the results easier.
-  */
-  uint n= 0;
-  while (n < length && src[n++]);
-  memset(dst + n, (int) 'Z', length - n + 1);
-#endif
-
   while (length--)
+  {
     if (! (*dst++ = *src++))
+    {
+#ifdef EXTRA_DEBUG
+      /*
+        'length' is the maximum length of the string; the buffer needs
+        to be one character larger to accommodate the terminating
+        '\0'.  This is easy to get wrong, so we make sure we write to
+        the entire length of the buffer to identify incorrect
+        buffer-sizes.  We only initialism the "unused" part of the
+        buffer here, a) for efficiency, and b) because dst==src is
+        allowed, so initializing the entire buffer would overwrite the
+        source-string. Also, we write a character rather than '\0' as
+        this makes spotting these problems in the results easier.
+
+        If we are using purify/valgrind, we only set one character at
+        end to be able to detect also wrong accesses after the end of
+        dst.
+      */
+      if (length)
+      {
+#ifdef HAVE_valgrind
+        dst[length-1]= 'Z';
+#else
+        bfill(dst, length-1, (int) 'Z');
+#endif /* HAVE_valgrind */
+      }
+#endif /* EXTRA_DEBUG */
       return dst-1;
+    }
+  }
   *dst=0;
   return dst;
 }
diff --git a/strings/strmov.c b/strings/strmov.c
index eedf22a4ef1..5112ad8d90e 100644
--- a/strings/strmov.c
+++ b/strings/strmov.c
@@ -24,10 +24,7 @@
 #include <my_global.h>
 #include "m_string.h"
 
-#ifdef strmov
-#undef strmov
-#define strmov strmov_overlapp
-#endif
+#ifndef strmov
 
 #if !defined(MC68000) && !defined(DS90)
 
@@ -51,3 +48,5 @@ char *strmov(dst, src)
 }
 
 #endif
+
+#endif /* strmov */
diff --git a/strings/strmov_overlapp.c b/strings/strmov_overlapp.c
new file mode 100644
index 00000000000..4cc3e294620
--- /dev/null
+++ b/strings/strmov_overlapp.c
@@ -0,0 +1,26 @@
+/* Copyright (C) 2000 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#include <my_global.h>
+#include "m_string.h"
+
+/* A trivial implementation */
+char *strmov_overlapp(char *dst, const char *src)
+{
+  size_t len= strlen(src);
+  memmove(dst, src, len+1);
+  return dst+len;
+}
+