summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <bar@mysql.com>2005-03-25 16:08:54 +0400
committerunknown <bar@mysql.com>2005-03-25 16:08:54 +0400
commit90aa6e00a730483e9aa324990d28780571601b58 (patch)
tree0b1831ee34ec7e5f948d0f95d9bceb93087c90db
parentcdf8e293d9e71ea00d346a4e29aeafe241ec6fc5 (diff)
downloadmariadb-git-90aa6e00a730483e9aa324990d28780571601b58.tar.gz
Allow inserting of extra HKSCS and cp950 characters into a Big5 column.
-rw-r--r--mysql-test/r/ctype_big5.result7
-rw-r--r--mysql-test/t/ctype_big5.test9
-rw-r--r--strings/ctype-big5.c39
3 files changed, 54 insertions, 1 deletions
diff --git a/mysql-test/r/ctype_big5.result b/mysql-test/r/ctype_big5.result
index 8f4ee3d0558..c63704f6d9d 100644
--- a/mysql-test/r/ctype_big5.result
+++ b/mysql-test/r/ctype_big5.result
@@ -77,3 +77,10 @@ big5_bin 6109
big5_bin 61
big5_bin 6120
drop table t1;
+SET NAMES big5;
+CREATE TABLE t1 (a text) character set big5;
+INSERT INTO t1 VALUES ('ùØ');
+SELECT * FROM t1;
+a
+ùØ
+DROP TABLE t1;
diff --git a/mysql-test/t/ctype_big5.test b/mysql-test/t/ctype_big5.test
index 8b75123ca32..b5cf610d941 100644
--- a/mysql-test/t/ctype_big5.test
+++ b/mysql-test/t/ctype_big5.test
@@ -16,3 +16,12 @@ SET collation_connection='big5_chinese_ci';
-- source include/ctype_filesort.inc
SET collation_connection='big5_bin';
-- source include/ctype_filesort.inc
+
+#
+# Bugs#9357: TEXT columns break string with special word in BIG5 charset.
+#
+SET NAMES big5;
+CREATE TABLE t1 (a text) character set big5;
+INSERT INTO t1 VALUES ('ùØ');
+SELECT * FROM t1;
+DROP TABLE t1;
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index 270b02212af..58847a96591 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -6271,6 +6271,43 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
return 2;
}
+
+/*
+ Returns a well formed length of a BIG5 string.
+ CP950 and HKSCS additional characters are also accepted.
+*/
+static
+uint my_well_formed_len_big5(CHARSET_INFO *cs __attribute__((unused)),
+ const char *b, const char *e, uint pos)
+{
+ const char *b0= b;
+ const char *emb= e - 1; /* Last possible end of an MB character */
+ while (pos && b < e)
+ {
+ /*
+ Cast to int8 for extra safety. "char" can be unsigned
+ by default on some platforms.
+ */
+ if (((int8)b[0]) >= 0)
+ {
+ /* Single byte ascii character */
+ b++;
+ }
+ else if ((b < emb) && isbig5code((uchar)*b, (uchar)b[1]))
+ {
+ /* Double byte character */
+ b+= 2;
+ }
+ else
+ {
+ /* Wrong byte sequence */
+ break;
+ }
+ }
+ return b - b0;
+}
+
+
static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
{
NULL, /* init */
@@ -6291,7 +6328,7 @@ static MY_CHARSET_HANDLER my_charset_big5_handler=
mbcharlen_big5,
my_numchars_mb,
my_charpos_mb,
- my_well_formed_len_mb,
+ my_well_formed_len_big5,
my_lengthsp_8bit,
my_numcells_8bit,
my_mb_wc_big5, /* mb_wc */