summaryrefslogtreecommitdiff
path: root/strings/ctype-simple.c
diff options
context:
space:
mode:
Diffstat (limited to 'strings/ctype-simple.c')
-rw-r--r--strings/ctype-simple.c29
1 files changed, 28 insertions, 1 deletions
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index d7a1b3f33b4..288f5fdd49d 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -1303,7 +1303,28 @@ create_fromuni(struct charset_info_st *cs,
if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
{
int ofs= wc - idx[i].uidx.from;
- tab[ofs]= ch;
+ if (!tab[ofs] || tab[ofs] > 0x7F) /* Prefer ASCII*/
+ {
+ /*
+ Some character sets can have double encoding. For example,
+ in ARMSCII8, the following characters are encoded twice:
+
+ Encoding#1 Encoding#2 Unicode Character Name
+ ---------- ---------- ------- --------------
+ 0x27 0xFF U+0027 APOSTROPHE
+ 0x28 0xA5 U+0028 LEFT PARENTHESIS
+ 0x29 0xA4 U+0029 RIGHT PARENTHESIS
+ 0x2C 0xAB U+002C COMMA
+ 0x2D 0xAC U+002D HYPHEN-MINUS
+ 0x2E 0xA9 U+002E FULL STOP
+
+ That is, both 0x27 and 0xFF convert to Unicode U+0027.
+ When converting back from Unicode to ARMSCII,
+ we prefer the ASCII range, that is we want U+0027
+ to convert to 0x27 rather than to 0xFF.
+ */
+ tab[ofs]= ch;
+ }
}
}
}
@@ -1598,7 +1619,10 @@ exp: /* [ E [ <sign> ] <unsigned integer> ] */
if ((negative_exp= (*str == '-')) || *str=='+')
{
if (++str == end)
+ {
+ str-= 2; /* 'e-' or 'e+' not followed by digits */
goto ret_sign;
+ }
}
for (exponent= 0 ;
str < end && (ch= (uchar) (*str - '0')) < 10;
@@ -1608,6 +1632,8 @@ exp: /* [ E [ <sign> ] <unsigned integer> ] */
}
shift+= negative_exp ? -exponent : exponent;
}
+ else
+ str--; /* 'e' not followed by digits */
}
if (shift == 0) /* No shift, check addon digit */
@@ -1929,6 +1955,7 @@ MY_CHARSET_HANDLER my_charset_8bit_handler=
my_charlen_8bit,
my_well_formed_char_length_8bit,
my_copy_8bit,
+ my_wc_mb_bin, /* native_to_mb */
};
MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =