diff options
Diffstat (limited to 'strings')
-rwxr-xr-x | strings/CMakeLists.txt | 5 | ||||
-rw-r--r-- | strings/Makefile.am | 8 | ||||
-rw-r--r-- | strings/bcmp.c | 4 | ||||
-rw-r--r-- | strings/bmove512.c | 2 | ||||
-rw-r--r-- | strings/conf_to_src.c | 5 | ||||
-rw-r--r-- | strings/ctype-extra.c | 70 | ||||
-rw-r--r-- | strings/ctype-mb.c | 11 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 4 | ||||
-rw-r--r-- | strings/ctype-uca.c | 386 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 25 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 2 | ||||
-rw-r--r-- | strings/ctype.c | 20 | ||||
-rw-r--r-- | strings/decimal.c | 10 | ||||
-rw-r--r-- | strings/llstr.c | 1 | ||||
-rw-r--r-- | strings/strmake.c | 46 | ||||
-rw-r--r-- | strings/strmov.c | 7 | ||||
-rw-r--r-- | strings/strmov_overlapp.c | 26 |
17 files changed, 436 insertions, 196 deletions
diff --git a/strings/CMakeLists.txt b/strings/CMakeLists.txt index 3d9de566670..5ba1c0e5747 100755 --- a/strings/CMakeLists.txt +++ b/strings/CMakeLists.txt @@ -23,9 +23,10 @@ SET(STRINGS_SOURCES bchange.c bcmp.c bfill.c bmove512.c bmove_upp.c ctype-big5.c ctype-latin1.c ctype-mb.c ctype-simple.c ctype-sjis.c ctype-tis620.c ctype-uca.c ctype-ucs2.c ctype-ujis.c ctype-utf8.c ctype-win1250ch.c ctype.c decimal.c int2str.c is_prefix.c llstr.c longlong2str.c my_strtoll10.c my_vsnprintf.c r_strinstr.c - str2int.c str_alloc.c strcend.c strend.c strfill.c strmake.c strmov.c strnmov.c + str2int.c str_alloc.c strcend.c strend.c strfill.c strmake.c strmov.c strmov_overlapp.c strnmov.c strtod.c strtol.c strtoll.c strtoul.c strtoull.c strxmov.c strxnmov.c xml.c - my_strchr.c strcont.c strinstr.c strnlen.c) + my_strchr.c strcont.c strinstr.c strnlen.c + strappend.c) IF(NOT SOURCE_SUBLIBS) ADD_LIBRARY(strings ${STRINGS_SOURCES}) diff --git a/strings/Makefile.am b/strings/Makefile.am index db9016b7148..94e24045d03 100644 --- a/strings/Makefile.am +++ b/strings/Makefile.am @@ -21,19 +21,19 @@ pkglib_LIBRARIES = libmystrings.a # Exact one of ASSEMBLER_X if ASSEMBLER_x86 ASRCS = strings-x86.s longlong2str-x86.s my_strtoll10-x86.s -CSRCS = bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c strtod.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-ucs2.c ctype-uca.c ctype-win1250ch.c ctype-bin.c ctype-latin1.c my_vsnprintf.c xml.c decimal.c ctype-extra.c str_alloc.c longlong2str_asm.c my_strchr.c strmov.c +CSRCS = bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c strtod.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-ucs2.c ctype-uca.c ctype-win1250ch.c ctype-bin.c ctype-latin1.c my_vsnprintf.c xml.c decimal.c ctype-extra.c str_alloc.c longlong2str_asm.c my_strchr.c strmov.c strmov_overlapp.c else if ASSEMBLER_sparc32 # These file MUST all be on the same line!! Otherwise automake # generats a very broken makefile ASRCS = bmove_upp-sparc.s strappend-sparc.s strend-sparc.s strinstr-sparc.s strmake-sparc.s strmov-sparc.s strnmov-sparc.s strstr-sparc.s -CSRCS = strcont.c strfill.c strcend.c is_prefix.c longlong2str.c bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c strtod.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c strxmov.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-ucs2.c ctype-uca.c ctype-win1250ch.c ctype-bin.c ctype-latin1.c my_vsnprintf.c xml.c decimal.c ctype-extra.c my_strtoll10.c str_alloc.c my_strchr.c strmov.c +CSRCS = strcont.c strfill.c strcend.c is_prefix.c longlong2str.c bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c strtod.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c strxmov.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-ucs2.c ctype-uca.c ctype-win1250ch.c ctype-bin.c ctype-latin1.c my_vsnprintf.c xml.c decimal.c ctype-extra.c my_strtoll10.c str_alloc.c my_strchr.c strmov.c strmov_overlapp.c else #no assembler ASRCS = # These file MUST all be on the same line!! Otherwise automake # generats a very broken makefile -CSRCS = strxmov.c bmove_upp.c strappend.c strcont.c strend.c strfill.c strcend.c is_prefix.c strstr.c strinstr.c strmake.c strnmov.c strmov.c longlong2str.c bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c strtod.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-ucs2.c ctype-uca.c ctype-win1250ch.c ctype-bin.c ctype-latin1.c my_vsnprintf.c xml.c decimal.c ctype-extra.c my_strtoll10.c str_alloc.c my_strchr.c +CSRCS = strxmov.c bmove_upp.c strappend.c strcont.c strend.c strfill.c strcend.c is_prefix.c strstr.c strinstr.c strmake.c strnmov.c strmov.c longlong2str.c bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c strtod.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-ucs2.c ctype-uca.c ctype-win1250ch.c ctype-bin.c ctype-latin1.c my_vsnprintf.c xml.c decimal.c ctype-extra.c my_strtoll10.c str_alloc.c my_strchr.c strmov_overlapp.c endif endif @@ -54,7 +54,7 @@ EXTRA_DIST = ctype-big5.c ctype-cp932.c ctype-czech.c ctype-eucjpms.c ctype-euc strinstr-sparc.s strmake-sparc.s strmov-sparc.s \ strnmov-sparc.s strstr-sparc.s strxmov-sparc.s \ t_ctype.h my_strchr.c CMakeLists.txt \ - CHARSET_INFO.txt + CHARSET_INFO.txt strmov_overlapp.c libmystrings_a_LIBADD= conf_to_src_SOURCES = conf_to_src.c xml.c ctype.c bcmp.c diff --git a/strings/bcmp.c b/strings/bcmp.c index 1b6ed22fc22..39b5f24e055 100644 --- a/strings/bcmp.c +++ b/strings/bcmp.c @@ -24,7 +24,7 @@ #include <my_global.h> #include "m_string.h" -#ifdef HAVE_purify +#ifdef HAVE_valgrind #undef bcmp #undef HAVE_BCMP #endif @@ -50,7 +50,7 @@ uint len; /* 0 <= len <= 65535 */ #else -#ifndef HAVE_purify +#ifndef HAVE_valgrind size_t bcmp(register const uchar *s1,register const uchar *s2, register size_t len) #else diff --git a/strings/bmove512.c b/strings/bmove512.c index c3f0446ead6..0ae23d1f42d 100644 --- a/strings/bmove512.c +++ b/strings/bmove512.c @@ -33,7 +33,7 @@ #ifdef HAVE_LONG_LONG #define LONG ulonglong #else -#define LONG ulonglong +#define LONG ulong #endif void bmove512(uchar *to, const uchar *from, register size_t length) diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c index 7e742050aa8..aa40a269848 100644 --- a/strings/conf_to_src.c +++ b/strings/conf_to_src.c @@ -184,11 +184,12 @@ void dispcset(FILE *f,CHARSET_INFO *cs) { fprintf(f,"{\n"); fprintf(f," %d,%d,%d,\n",cs->number,0,0); - fprintf(f," MY_CS_COMPILED%s%s%s%s,\n", + fprintf(f," MY_CS_COMPILED%s%s%s%s%s,\n", cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "", cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "", is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "", - my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : ""); + my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "", + !my_charset_is_ascii_compatible(cs) ? "|MY_CS_NONASCII": ""); if (cs->name) { diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c index 75244e40435..64cf30e3673 100644 --- a/strings/ctype-extra.c +++ b/strings/ctype-extra.c @@ -6804,7 +6804,7 @@ CHARSET_INFO compiled_charsets[] = { #ifdef HAVE_CHARSET_swe7 { 10,0,0, - MY_CS_COMPILED|MY_CS_PRIMARY, + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_NONASCII, "swe7", /* cset name */ "swe7_swedish_ci", /* coll name */ "", /* comment */ @@ -8454,7 +8454,7 @@ CHARSET_INFO compiled_charsets[] = { #ifdef HAVE_CHARSET_swe7 { 82,0,0, - MY_CS_COMPILED|MY_CS_BINSORT, + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_NONASCII, "swe7", /* cset name */ "swe7_bin", /* coll name */ "", /* comment */ @@ -8550,72 +8550,6 @@ CHARSET_INFO compiled_charsets[] = { } , #endif -#ifdef HAVE_CHARSET_geostd8 -{ - 92,0,0, - MY_CS_COMPILED|MY_CS_PRIMARY, - "geostd8", /* cset name */ - "geostd8_general_ci", /* coll name */ - "", /* comment */ - NULL, /* tailoring */ - ctype_geostd8_general_ci, /* ctype */ - to_lower_geostd8_general_ci, /* lower */ - to_upper_geostd8_general_ci, /* upper */ - sort_order_geostd8_general_ci, /* sort_order */ - NULL, /* contractions */ - NULL, /* sort_order_big*/ - to_uni_geostd8_general_ci, /* to_uni */ - NULL, /* from_uni */ - my_unicase_default, /* caseinfo */ - NULL, /* state map */ - NULL, /* ident map */ - 1, /* strxfrm_multiply*/ - 1, /* caseup_multiply*/ - 1, /* casedn_multiply*/ - 1, /* mbminlen */ - 1, /* mbmaxlen */ - 0, /* min_sort_char */ - 255, /* max_sort_char */ - ' ', /* pad_char */ - 0, /* escape_with_backslash_is_dangerous */ - &my_charset_8bit_handler, - &my_collation_8bit_simple_ci_handler, -} -, -#endif -#ifdef HAVE_CHARSET_geostd8 -{ - 93,0,0, - MY_CS_COMPILED|MY_CS_BINSORT, - "geostd8", /* cset name */ - "geostd8_bin", /* coll name */ - "", /* comment */ - NULL, /* tailoring */ - ctype_geostd8_bin, /* ctype */ - to_lower_geostd8_bin, /* lower */ - to_upper_geostd8_bin, /* upper */ - NULL, /* sort_order */ - NULL, /* contractions */ - NULL, /* sort_order_big*/ - to_uni_geostd8_bin, /* to_uni */ - NULL, /* from_uni */ - my_unicase_default, /* caseinfo */ - NULL, /* state map */ - NULL, /* ident map */ - 1, /* strxfrm_multiply*/ - 1, /* caseup_multiply*/ - 1, /* casedn_multiply*/ - 1, /* mbminlen */ - 1, /* mbmaxlen */ - 0, /* min_sort_char */ - 255, /* max_sort_char */ - ' ', /* pad_char */ - 0, /* escape_with_backslash_is_dangerous */ - &my_charset_8bit_handler, - &my_collation_8bit_bin_handler, -} -, -#endif #ifdef HAVE_CHARSET_latin1 { 94,0,0, diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 903811e2ab9..51f93954bd5 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -567,8 +567,7 @@ my_bool my_like_range_mb(CHARSET_INFO *cs, char *min_end= min_str + res_length; char *max_end= max_str + res_length; size_t maxcharlen= res_length / cs->mbmaxlen; - const char *contraction_flags= cs->contractions ? - ((const char*) cs->contractions) + 0x40*0x40 : NULL; + my_bool have_contractions= my_uca_have_contractions(cs); for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--) { @@ -636,8 +635,8 @@ fill_max_and_min: 'ab\min\min\min\min' and 'ab\max\max\max\max'. */ - if (contraction_flags && ptr + 1 < end && - contraction_flags[(uchar) *ptr]) + if (have_contractions && ptr + 1 < end && + my_uca_can_be_contraction_head(cs, (uchar) *ptr)) { /* Ptr[0] is a contraction head. */ @@ -659,8 +658,8 @@ fill_max_and_min: is not a contraction, then we put only ptr[0], and continue with ptr[1] on the next loop. */ - if (contraction_flags[(uchar) ptr[1]] && - cs->contractions[(*ptr-0x40)*0x40 + ptr[1] - 0x40]) + if (my_uca_can_be_contraction_tail(cs, (uchar) ptr[1]) && + my_uca_contraction2_weight(cs, (uchar) ptr[0], (uchar) ptr[1])) { /* Contraction found */ if (maxcharlen == 1 || min_str + 1 >= min_end) diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index ac426e0d7b5..cc7581ffe6e 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -4672,7 +4672,7 @@ static MY_CHARSET_HANDLER my_charset_handler= CHARSET_INFO my_charset_sjis_japanese_ci= { 13,0,0, /* number */ - MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM, /* state */ + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_NONASCII, /* state */ "sjis", /* cs name */ "sjis_japanese_ci", /* name */ "", /* comment */ @@ -4704,7 +4704,7 @@ CHARSET_INFO my_charset_sjis_japanese_ci= CHARSET_INFO my_charset_sjis_bin= { 88,0,0, /* number */ - MY_CS_COMPILED|MY_CS_BINSORT, /* state */ + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_NONASCII, /* state */ "sjis", /* cs name */ "sjis_bin", /* name */ "", /* comment */ diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 6ae0cc3a293..b2a60265a0a 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -36,6 +36,12 @@ #include "m_string.h" #include "m_ctype.h" + +#define MY_UCA_CNT_FLAG_SIZE 4096 +#define MY_UCA_CNT_FLAG_MASK 4095 +#define MY_UCA_CNT_HEAD 1 +#define MY_UCA_CNT_TAIL 2 + #ifdef HAVE_UCA_COLLATIONS #define MY_UCA_NPAGES 256 @@ -6713,6 +6719,16 @@ static const char hungarian[]= "&U < \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170"; +static const char croatian[]= + +"&C < \\u010D <<< \\u010C < \\u0107 <<< \\u0106 " +"&D < d\\u017E <<< \\u01C6 <<< D\\u017E <<< \\u01C5 <<< D\\u017D <<< \\u01C4 " +" < \\u0111 <<< \\u0110 " +"&L < lj <<< \\u01C9 <<< Lj <<< \\u01C8 <<< LJ <<< \\u01C7 " +"&N < nj <<< \\u01CC <<< Nj <<< \\u01CB <<< NJ <<< \\u01CA " +"&S < \\u0161 <<< \\u0160 " +"&Z < \\u017E <<< \\u017D"; + /* Unicode Collation Algorithm: Collation element (weight) scanner, @@ -6726,7 +6742,7 @@ typedef struct my_uca_scanner_st const uchar *send; /* End of the input string */ uchar *uca_length; uint16 **uca_weight; - uint16 *contractions; + MY_CONTRACTIONS *contractions; uint16 implicit[2]; int page; int code; @@ -6746,6 +6762,75 @@ typedef struct my_uca_scanner_handler_st static uint16 nochar[]= {0,0}; +/********** Helper functions to handle contraction ************/ + + +/** + Mark a character as a contraction part + + @cs Pointer to CHARSET_INFO data + @wc Unicode code point + @flag flag: "is contraction head", "is contraction tail" +*/ + +static void +my_uca_add_contraction_flag(CHARSET_INFO *cs, my_wc_t wc, int flag) +{ + cs->contractions->flags[wc & MY_UCA_CNT_FLAG_MASK]|= flag; +} + + +/** + Add a new contraction into contraction list + + @cs Pointer to CHARSET_INFO data + @wc Unicode code points of the characters + @len Number of characters + + @return New contraction + @retval Pointer to a newly added contraction +*/ + +static MY_CONTRACTION * +my_uca_add_contraction(CHARSET_INFO *cs, + my_wc_t *wc, int len __attribute__((unused))) +{ + MY_CONTRACTIONS *list= cs->contractions; + MY_CONTRACTION *next= &list->item[list->nitems]; + DBUG_ASSERT(len == 2); /* We currently support only contraction2 */ + next->ch[0]= wc[0]; + next->ch[1]= wc[1]; + list->nitems++; + return next; +} + + +/** + Allocate and initialize memory for contraction list and flags + + @cs Pointer to CHARSET_INFO data + @alloc Memory allocation function (typically points to my_alloc_once) + @n Number of contractions + + @return Error code + @retval 0 - memory allocated successfully + @retval 1 - not enough memory +*/ + +static my_bool +my_uca_alloc_contractions(CHARSET_INFO *cs, void *(*alloc)(size_t), size_t n) +{ + uint size= n * sizeof(MY_CONTRACTION); + if (!(cs->contractions= (*alloc)(sizeof(MY_CONTRACTIONS)))) + return 1; + bzero(cs->contractions, sizeof(MY_CONTRACTIONS)); + if (!(cs->contractions->item= (*alloc)(size)) || + !(cs->contractions->flags= (char*) (*alloc)(MY_UCA_CNT_FLAG_SIZE))) + return 1; + bzero((void*) cs->contractions->item, size); + bzero((void*) cs->contractions->flags, MY_UCA_CNT_FLAG_SIZE); + return 0; +} #ifdef HAVE_CHARSET_ucs2 /* @@ -6766,7 +6851,7 @@ static uint16 nochar[]= {0,0}; */ static void my_uca_scanner_init_ucs2(my_uca_scanner *scanner, - CHARSET_INFO *cs __attribute__((unused)), + CHARSET_INFO *cs, const uchar *str, size_t length) { scanner->wbeg= nochar; @@ -6777,6 +6862,7 @@ static void my_uca_scanner_init_ucs2(my_uca_scanner *scanner, scanner->uca_length= cs->sort_order; scanner->uca_weight= cs->sort_order_big; scanner->contractions= cs->contractions; + scanner->cs= cs; return; } @@ -6865,18 +6951,23 @@ static int my_uca_scanner_next_ucs2(my_uca_scanner *scanner) if (scanner->contractions && (scanner->sbeg <= scanner->send)) { - int cweight; + my_wc_t wc1= ((scanner->page << 8) | scanner->code); - if (!scanner->page && !scanner->sbeg[0] && - (scanner->sbeg[1] > 0x40) && (scanner->sbeg[1] < 0x80) && - (scanner->code > 0x40) && (scanner->code < 0x80) && - (cweight= scanner->contractions[(scanner->code-0x40)*0x40+scanner->sbeg[1]-0x40])) + if (my_uca_can_be_contraction_head(scanner->cs, wc1)) + { + uint16 *cweight; + my_wc_t wc2= (((my_wc_t) scanner->sbeg[0]) << 8) | scanner->sbeg[1]; + if (my_uca_can_be_contraction_tail(scanner->cs, wc2) && + (cweight= my_uca_contraction2_weight(scanner->cs, + scanner->code, + scanner->sbeg[1]))) { scanner->implicit[0]= 0; scanner->wbeg= scanner->implicit; scanner->sbeg+=2; - return cweight; + return *cweight; } + } } if (!ucaw[scanner->page]) @@ -6959,23 +7050,22 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner) scanner->code= wc & 0xFF; scanner->sbeg+= mb_len; - if (scanner->contractions && !scanner->page && - (scanner->code > 0x40) && (scanner->code < 0x80)) + if (my_uca_have_contractions(scanner->cs) && + my_uca_can_be_contraction_head(scanner->cs, wc)) { - uint page1, code1, cweight; + my_wc_t wc2; + uint16 *cweight; - if (((mb_len= scanner->cs->cset->mb_wc(scanner->cs, &wc, + if (((mb_len= scanner->cs->cset->mb_wc(scanner->cs, &wc2, scanner->sbeg, scanner->send)) >=0) && - (!(page1= (wc >> 8))) && - ((code1= (wc & 0xFF)) > 0x40) && - (code1 < 0x80) && - (cweight= scanner->contractions[(scanner->code-0x40)*0x40 + code1-0x40])) + my_uca_can_be_contraction_tail(scanner->cs, wc2) && + (cweight= my_uca_contraction2_weight(scanner->cs, wc, wc2))) { scanner->implicit[0]= 0; scanner->wbeg= scanner->implicit; scanner->sbeg+= mb_len; - return cweight; + return *cweight; } } @@ -7012,6 +7102,33 @@ static my_uca_scanner_handler my_any_uca_scanner_handler= my_uca_scanner_next_any }; + + +/** + Helper function: + Find address of weights of the given character. + + @weights UCA weight array + @lengths UCA length array + @ch character Unicode code point + + @return Weight array + @retval pointer to weight array for the given character, + or NULL if this page does not have implicit weights. +*/ + +static inline uint16 * +my_char_weight_addr(CHARSET_INFO *cs, uint wc) +{ + uint page= (wc >> 8); + uint ofst= wc & 0xFF; + return cs->sort_order_big[page] ? + cs->sort_order_big[page] + ofst * cs->sort_order[page] : + NULL; +} + + + /* Compares two strings according to the collation @@ -7683,8 +7800,8 @@ ex: typedef struct my_coll_rule_item_st { - uint base; /* Base character */ - uint curr[2]; /* Current character */ + my_wc_t base; /* Base character */ + my_wc_t curr[2]; /* Current character */ int diff[3]; /* Primary, Secondary and Tertiary difference */ } MY_COLL_RULE; @@ -7834,6 +7951,7 @@ static int my_coll_rule_parse(MY_COLL_RULE *rule, size_t mitems, static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(size_t)) { MY_COLL_RULE rule[MY_MAX_COLL_RULE]; + MY_COLL_RULE *r, *rfirst, *rlast; char errstr[128]; uchar *newlengths; uint16 **newweights; @@ -7858,6 +7976,9 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(size_t)) return 1; } + rfirst= rule; + rlast= rule + rc; + if (!cs->caseinfo) cs->caseinfo= my_unicase_default; @@ -7941,44 +8062,21 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(size_t)) /* Now process contractions */ if (ncontractions) { - /* - 8K for weights for basic latin letter pairs, - plus 256 bytes for "is contraction part" flags. - */ - uint size= 0x40*0x40*sizeof(uint16) + 256; - char *contraction_flags; - if (!(cs->contractions= (uint16*) (*alloc)(size))) - return 1; - bzero((void*)cs->contractions, size); - contraction_flags= ((char*) cs->contractions) + 0x40*0x40; - for (i=0; i < rc; i++) + if (my_uca_alloc_contractions(cs, alloc, ncontractions)) + return 1; + for (r= rfirst; r < rlast; r++) { - if (rule[i].curr[1]) + uint16 *to; + if (r->curr[1]) /* Contraction */ { - uint pageb= (rule[i].base >> 8) & 0xFF; - uint chb= rule[i].base & 0xFF; - uint16 *offsb= defweights[pageb] + chb*deflengths[pageb]; - uint offsc; - - if (offsb[1] || - rule[i].curr[0] < 0x40 || rule[i].curr[0] > 0x7f || - rule[i].curr[1] < 0x40 || rule[i].curr[1] > 0x7f) - { - /* - TODO: add error reporting; - We support only basic latin letters contractions at this point. - Also, We don't support contractions with weight longer than one. - Otherwise, we'd need much more memory. - */ - return 1; - } - offsc= (rule[i].curr[0]-0x40)*0x40+(rule[i].curr[1]-0x40); - - /* Copy base weight applying primary difference */ - cs->contractions[offsc]= offsb[0] + rule[i].diff[0]; - /* Mark both letters as "is contraction part */ - contraction_flags[rule[i].curr[0]]= 1; - contraction_flags[rule[i].curr[1]]= 1; + /* Mark both letters as "is contraction part" */ + my_uca_add_contraction_flag(cs, r->curr[0], MY_UCA_CNT_HEAD); + my_uca_add_contraction_flag(cs, r->curr[1], MY_UCA_CNT_TAIL); + to= my_uca_add_contraction(cs, r->curr, 2)->weight; + /* Copy weight from the reset character */ + to[0]= my_char_weight_addr(cs, r->base)[0]; + /* Apply primary difference */ + to[0]+= r->diff[0]; } } } @@ -8090,7 +8188,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler = CHARSET_INFO my_charset_ucs2_unicode_ci= { 128,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_unicode_ci", /* name */ "", /* comment */ @@ -8122,7 +8220,7 @@ CHARSET_INFO my_charset_ucs2_unicode_ci= CHARSET_INFO my_charset_ucs2_icelandic_uca_ci= { 129,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_icelandic_ci",/* name */ "", /* comment */ @@ -8154,7 +8252,7 @@ CHARSET_INFO my_charset_ucs2_icelandic_uca_ci= CHARSET_INFO my_charset_ucs2_latvian_uca_ci= { 130,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_latvian_ci", /* name */ "", /* comment */ @@ -8186,7 +8284,7 @@ CHARSET_INFO my_charset_ucs2_latvian_uca_ci= CHARSET_INFO my_charset_ucs2_romanian_uca_ci= { 131,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_romanian_ci", /* name */ "", /* comment */ @@ -8218,7 +8316,7 @@ CHARSET_INFO my_charset_ucs2_romanian_uca_ci= CHARSET_INFO my_charset_ucs2_slovenian_uca_ci= { 132,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_slovenian_ci",/* name */ "", /* comment */ @@ -8250,7 +8348,7 @@ CHARSET_INFO my_charset_ucs2_slovenian_uca_ci= CHARSET_INFO my_charset_ucs2_polish_uca_ci= { 133,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_polish_ci", /* name */ "", /* comment */ @@ -8282,7 +8380,7 @@ CHARSET_INFO my_charset_ucs2_polish_uca_ci= CHARSET_INFO my_charset_ucs2_estonian_uca_ci= { 134,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_estonian_ci", /* name */ "", /* comment */ @@ -8314,7 +8412,7 @@ CHARSET_INFO my_charset_ucs2_estonian_uca_ci= CHARSET_INFO my_charset_ucs2_spanish_uca_ci= { 135,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_spanish_ci", /* name */ "", /* comment */ @@ -8346,7 +8444,7 @@ CHARSET_INFO my_charset_ucs2_spanish_uca_ci= CHARSET_INFO my_charset_ucs2_swedish_uca_ci= { 136,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_swedish_ci", /* name */ "", /* comment */ @@ -8378,7 +8476,7 @@ CHARSET_INFO my_charset_ucs2_swedish_uca_ci= CHARSET_INFO my_charset_ucs2_turkish_uca_ci= { 137,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_turkish_ci", /* name */ "", /* comment */ @@ -8410,7 +8508,7 @@ CHARSET_INFO my_charset_ucs2_turkish_uca_ci= CHARSET_INFO my_charset_ucs2_czech_uca_ci= { 138,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_czech_ci", /* name */ "", /* comment */ @@ -8443,7 +8541,7 @@ CHARSET_INFO my_charset_ucs2_czech_uca_ci= CHARSET_INFO my_charset_ucs2_danish_uca_ci= { 139,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_danish_ci", /* name */ "", /* comment */ @@ -8475,7 +8573,7 @@ CHARSET_INFO my_charset_ucs2_danish_uca_ci= CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci= { 140,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_lithuanian_ci",/* name */ "", /* comment */ @@ -8507,7 +8605,7 @@ CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci= CHARSET_INFO my_charset_ucs2_slovak_uca_ci= { 141,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_slovak_ci", /* name */ "", /* comment */ @@ -8539,7 +8637,7 @@ CHARSET_INFO my_charset_ucs2_slovak_uca_ci= CHARSET_INFO my_charset_ucs2_spanish2_uca_ci= { 142,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_spanish2_ci", /* name */ "", /* comment */ @@ -8572,7 +8670,7 @@ CHARSET_INFO my_charset_ucs2_spanish2_uca_ci= CHARSET_INFO my_charset_ucs2_roman_uca_ci= { 143,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_roman_ci", /* name */ "", /* comment */ @@ -8605,7 +8703,7 @@ CHARSET_INFO my_charset_ucs2_roman_uca_ci= CHARSET_INFO my_charset_ucs2_persian_uca_ci= { 144,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_persian_ci", /* name */ "", /* comment */ @@ -8638,7 +8736,7 @@ CHARSET_INFO my_charset_ucs2_persian_uca_ci= CHARSET_INFO my_charset_ucs2_esperanto_uca_ci= { 145,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_esperanto_ci",/* name */ "", /* comment */ @@ -8671,7 +8769,7 @@ CHARSET_INFO my_charset_ucs2_esperanto_uca_ci= CHARSET_INFO my_charset_ucs2_hungarian_uca_ci= { 146,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_hungarian_ci",/* name */ "", /* comment */ @@ -8701,6 +8799,39 @@ CHARSET_INFO my_charset_ucs2_hungarian_uca_ci= }; +CHARSET_INFO my_charset_ucs2_croatian_uca_ci= +{ + 149,0,0, /* number */ + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + "ucs2", /* cs name */ + "ucs2_croatian_ci", /* name */ + "", /* comment */ + croatian, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 2, /* mbminlen */ + 2, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_ucs2_handler, + &my_collation_ucs2_uca_handler +}; + + #endif @@ -9358,6 +9489,113 @@ CHARSET_INFO my_charset_utf8_hungarian_uca_ci= &my_collation_any_uca_handler }; +CHARSET_INFO my_charset_utf8_croatian_uca_ci= +{ + 213,0,0, /* number */ + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + "utf8", /* cs name */ + "utf8_croatian_ci", /* name */ + "", /* comment */ + croatian, /* tailoring */ + ctype_utf8, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + my_unicase_default, /* caseinfo */ + NULL, /* state_map */ + NULL, /* ident_map */ + 8, /* strxfrm_multiply */ + 1, /* caseup_multiply */ + 1, /* casedn_multiply */ + 1, /* mbminlen */ + 3, /* mbmaxlen */ + 9, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + &my_charset_utf8_handler, + &my_collation_any_uca_handler +}; + #endif /* HAVE_CHARSET_utf8 */ #endif /* HAVE_UCA_COLLATIONS */ + +/** + Check if UCA data has contractions (public version) + + @cs Pointer to CHARSET_INFO data + @retval 0 - no contraction, 1 - have contractions. +*/ + +my_bool +my_uca_have_contractions(CHARSET_INFO *cs) +{ + return cs->contractions != NULL; +} + +/** + Check if a character can be contraction head + + @cs Pointer to CHARSET_INFO data + @wc Code point + + @retval 0 - cannot be contraction head + @retval 1 - can be contraction head +*/ + +my_bool +my_uca_can_be_contraction_head(CHARSET_INFO *cs, my_wc_t wc) +{ + return cs->contractions->flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_CNT_HEAD; +} + + +/** + Check if a character can be contraction tail + + @cs Pointer to CHARSET_INFO data + @wc Code point + + @retval 0 - cannot be contraction tail + @retval 1 - can be contraction tail +*/ + +my_bool +my_uca_can_be_contraction_tail(CHARSET_INFO *cs, my_wc_t wc) +{ + return cs->contractions->flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_CNT_TAIL; +} + + +/** + Find a contraction and return its weight array + + @cs Pointer to CHARSET data + @wc1 First character + @wc2 Second character + + @return Weight array + @retval NULL - no contraction found + @retval ptr - contraction weight array +*/ + +uint16 * +my_uca_contraction2_weight(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) +{ + MY_CONTRACTIONS *list= cs->contractions; + MY_CONTRACTION *c, *last; + for (c= list->item, last= &list->item[list->nitems]; c < last; c++) + { + if (c->ch[0] == wc1 && c->ch[1] == wc2) + { + return c->weight; + } + } + return NULL; +} + diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index cead55f8a0a..bf9c6f9626a 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1498,6 +1498,14 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), } } + +static inline my_wc_t +ucs2_to_wc(const uchar *ptr) +{ + return (((uint) ptr[0]) << 8) + ptr[1]; +} + + /* ** Calculate min_str and max_str that ranges a LIKE string. ** Arguments: @@ -1526,12 +1534,12 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, char *min_org=min_str; char *min_end=min_str+res_length; size_t charlen= res_length / cs->mbmaxlen; - const char *contraction_flags= cs->contractions ? - ((const char*) cs->contractions) + 0x40*0x40 : NULL; + my_bool have_contractions= my_uca_have_contractions(cs); for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0 ; ptr+=2, charlen--) { + my_wc_t wc; if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end) { ptr+=2; /* Skip escape */ @@ -1567,9 +1575,10 @@ fill_max_and_min: return 0; } - if (contraction_flags && ptr + 3 < end && - ptr[0] == '\0' && contraction_flags[(uchar) ptr[1]]) + if (have_contractions && ptr + 3 < end && + my_uca_can_be_contraction_head(cs, (wc= ucs2_to_wc((uchar*) ptr)))) { + my_wc_t wc2; /* Contraction head found */ if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many)) { @@ -1581,8 +1590,8 @@ fill_max_and_min: Check if the second letter can be contraction part, and if two letters really produce a contraction. */ - if (ptr[2] == '\0' && contraction_flags[(uchar) ptr[3]] && - cs->contractions[(ptr[1]-0x40)*0x40 + ptr[3] - 0x40]) + if (my_uca_can_be_contraction_tail(cs, (wc2= ucs2_to_wc((uchar*) ptr + 2))) && + my_uca_contraction2_weight(cs, wc , wc2)) { /* Contraction found */ if (charlen == 1 || min_str + 2 >= min_end) @@ -1702,7 +1711,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler= CHARSET_INFO my_charset_ucs2_general_ci= { 35,0,0, /* number */ - MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_general_ci", /* name */ "", /* comment */ @@ -1734,7 +1743,7 @@ CHARSET_INFO my_charset_ucs2_general_ci= CHARSET_INFO my_charset_ucs2_bin= { 90,0,0, /* number */ - MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_bin", /* name */ "", /* comment */ diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index ae942b59caa..91f633e45ce 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -4200,7 +4200,7 @@ static MY_CHARSET_HANDLER my_charset_filename_handler= CHARSET_INFO my_charset_filename= { 17,0,0, /* number */ - MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_HIDDEN, + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_HIDDEN|MY_CS_NONASCII, "filename", /* cs name */ "filename", /* name */ "", /* comment */ diff --git a/strings/ctype.c b/strings/ctype.c index 17ad1256e74..75d76aceea3 100644 --- a/strings/ctype.c +++ b/strings/ctype.c @@ -405,3 +405,23 @@ my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs) } return 1; } + + +/* + Shared function between conf_to_src and mysys. + Check if a 8bit character set is compatible with + ascii on the range 0x00..0x7F. +*/ +my_bool +my_charset_is_ascii_compatible(CHARSET_INFO *cs) +{ + uint i; + if (!cs->tab_to_uni) + return 1; + for (i= 0; i < 128; i++) + { + if (cs->tab_to_uni[i] != i) + return 0; + } + return 1; +} diff --git a/strings/decimal.c b/strings/decimal.c index 282e7cae8ab..184f78a20b7 100644 --- a/strings/decimal.c +++ b/strings/decimal.c @@ -30,7 +30,7 @@ integer that determines the number of significant digits in a particular radix R, where R is either 2 or 10. S is a non-negative integer. Every value of an exact numeric type of scale S is of the - form n*10^{-S}, where n is an integer such that -R^P <= n <= R^P. + form n*10^{-S}, where n is an integer such that �-R^P <= n <= R^P. [...] @@ -144,7 +144,7 @@ static double scaler1[]= { 1.0, 10.0, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9 }; -#ifdef HAVE_purify +#ifdef HAVE_valgrind #define sanity(d) DBUG_ASSERT((d)->len > 0) #else #define sanity(d) DBUG_ASSERT((d)->len >0 && ((d)->buf[0] | \ @@ -306,7 +306,7 @@ int decimal_actual_fraction(decimal_t *from) { for (i= DIG_PER_DEC1 - ((frac - 1) % DIG_PER_DEC1); *buf0 % powers10[i++] == 0; - frac--) ; + frac--) {} } return frac; } @@ -500,7 +500,7 @@ static void digits_bounds(decimal_t *from, int *start_result, int *end_result) stop= (int) ((buf_end - from->buf + 1) * DIG_PER_DEC1); i= 1; } - for (; *buf_end % powers10[i++] == 0; stop--) ; + for (; *buf_end % powers10[i++] == 0; stop--) {} *end_result= stop; /* index of position after last decimal digit (from 0) */ } @@ -1011,7 +1011,7 @@ static int ull2dec(ulonglong from, decimal_t *to) sanity(to); - for (intg1=1; from >= DIG_BASE; intg1++, from/=DIG_BASE) ; + for (intg1=1; from >= DIG_BASE; intg1++, from/=DIG_BASE) {} if (unlikely(intg1 > to->len)) { intg1=to->len; diff --git a/strings/llstr.c b/strings/llstr.c index 643cf36a311..678f8b05f39 100644 --- a/strings/llstr.c +++ b/strings/llstr.c @@ -38,3 +38,4 @@ char *ullstr(longlong value,char *buff) longlong10_to_str(value,buff,10); return buff; } + diff --git a/strings/strmake.c b/strings/strmake.c index 2d5fa5e36aa..56bd3a8f084 100644 --- a/strings/strmake.c +++ b/strings/strmake.c @@ -29,26 +29,38 @@ char *strmake(register char *dst, register const char *src, size_t length) { -#ifdef EXTRA_DEBUG - /* - 'length' is the maximum length of the string; the buffer needs - to be one character larger to accomodate the terminating '\0'. - This is easy to get wrong, so we make sure we write to the - entire length of the buffer to identify incorrect buffer-sizes. - We only initialise the "unused" part of the buffer here, a) for - efficiency, and b) because dst==src is allowed, so initialising - the entire buffer would overwrite the source-string. Also, we - write a character rather than '\0' as this makes spotting these - problems in the results easier. - */ - uint n= 0; - while (n < length && src[n++]); - memset(dst + n, (int) 'Z', length - n + 1); -#endif - while (length--) + { if (! (*dst++ = *src++)) + { +#ifdef EXTRA_DEBUG + /* + 'length' is the maximum length of the string; the buffer needs + to be one character larger to accommodate the terminating + '\0'. This is easy to get wrong, so we make sure we write to + the entire length of the buffer to identify incorrect + buffer-sizes. We only initialism the "unused" part of the + buffer here, a) for efficiency, and b) because dst==src is + allowed, so initializing the entire buffer would overwrite the + source-string. Also, we write a character rather than '\0' as + this makes spotting these problems in the results easier. + + If we are using purify/valgrind, we only set one character at + end to be able to detect also wrong accesses after the end of + dst. + */ + if (length) + { +#ifdef HAVE_valgrind + dst[length-1]= 'Z'; +#else + bfill(dst, length-1, (int) 'Z'); +#endif /* HAVE_valgrind */ + } +#endif /* EXTRA_DEBUG */ return dst-1; + } + } *dst=0; return dst; } diff --git a/strings/strmov.c b/strings/strmov.c index eedf22a4ef1..5112ad8d90e 100644 --- a/strings/strmov.c +++ b/strings/strmov.c @@ -24,10 +24,7 @@ #include <my_global.h> #include "m_string.h" -#ifdef strmov -#undef strmov -#define strmov strmov_overlapp -#endif +#ifndef strmov #if !defined(MC68000) && !defined(DS90) @@ -51,3 +48,5 @@ char *strmov(dst, src) } #endif + +#endif /* strmov */ diff --git a/strings/strmov_overlapp.c b/strings/strmov_overlapp.c new file mode 100644 index 00000000000..4cc3e294620 --- /dev/null +++ b/strings/strmov_overlapp.c @@ -0,0 +1,26 @@ +/* Copyright (C) 2000 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <my_global.h> +#include "m_string.h" + +/* A trivial implementation */ +char *strmov_overlapp(char *dst, const char *src) +{ + size_t len= strlen(src); + memmove(dst, src, len+1); + return dst+len; +} + |