summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.org>2015-03-02 18:24:22 +0400
committerAlexander Barkov <bar@mariadb.org>2015-03-02 18:24:22 +0400
commitb1b6101af2f69871ff3f3049c6e42e95ced20544 (patch)
treeb395810839d006b6101352b2c8f65984ffd28d92 /strings
parent7047bef1ef8c2cf04932b4b1a51a479a746d9a40 (diff)
downloadmariadb-git-b1b6101af2f69871ff3f3049c6e42e95ced20544.tar.gz
A preparatory patch for MDEV-6566.
Adding a new virtual function MY_CHARSET_HANDLER::copy_abort(). Moving character set specific code into the correspoding implementations (for simple, multi-byte and mbmaxlen>1 character sets).
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-big5.c3
-rw-r--r--strings/ctype-bin.c3
-rw-r--r--strings/ctype-cp932.c3
-rw-r--r--strings/ctype-euc_kr.c3
-rw-r--r--strings/ctype-eucjpms.c3
-rw-r--r--strings/ctype-gb2312.c3
-rw-r--r--strings/ctype-gbk.c3
-rw-r--r--strings/ctype-latin1.c3
-rw-r--r--strings/ctype-mb.c23
-rw-r--r--strings/ctype-simple.c22
-rw-r--r--strings/ctype-sjis.c3
-rw-r--r--strings/ctype-tis620.c3
-rw-r--r--strings/ctype-ucs2.c71
-rw-r--r--strings/ctype-ujis.c3
-rw-r--r--strings/ctype-utf8.c9
15 files changed, 139 insertions, 19 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index 38bdf86c64a..a9eb2b1b318 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -6922,7 +6922,8 @@ static MY_CHARSET_HANDLER my_charset_big5_handler=
my_strntod_8bit,
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
- my_scan_8bit
+ my_scan_8bit,
+ my_copy_abort_mb,
};
struct charset_info_st my_charset_big5_chinese_ci=
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index 2e699db0bd3..6b53b34159a 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -548,7 +548,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntod_8bit,
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
- my_scan_8bit
+ my_scan_8bit,
+ my_copy_8bit,
};
diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c
index 86f450718d7..66b352721db 100644
--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -34800,7 +34800,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntod_8bit,
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
- my_scan_8bit
+ my_scan_8bit,
+ my_copy_abort_mb,
};
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index b7065369258..36d99eec375 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -10007,7 +10007,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntod_8bit,
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
- my_scan_8bit
+ my_scan_8bit,
+ my_copy_abort_mb,
};
diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c
index 0ce179b3a2d..8c47b666cf4 100644
--- a/strings/ctype-eucjpms.c
+++ b/strings/ctype-eucjpms.c
@@ -67549,7 +67549,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntod_8bit,
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
- my_scan_8bit
+ my_scan_8bit,
+ my_copy_abort_mb,
};
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index 0399660d311..b5aeed2088f 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -6410,7 +6410,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntod_8bit,
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
- my_scan_8bit
+ my_scan_8bit,
+ my_copy_abort_mb,
};
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index f1b46ca4e6c..d282d96145d 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -10806,7 +10806,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntod_8bit,
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
- my_scan_8bit
+ my_scan_8bit,
+ my_copy_abort_mb,
};
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index babf74599ea..099f03460ce 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -421,7 +421,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntod_8bit,
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
- my_scan_8bit
+ my_scan_8bit,
+ my_copy_8bit,
};
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index cc0513dbc90..fc41563324a 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -423,6 +423,29 @@ size_t my_well_formed_len_mb(CHARSET_INFO *cs, const char *b, const char *e,
}
+/*
+ Copy a multi-byte string. Abort if a bad byte sequence was found.
+ Note more than "nchars" characters are copied.
+*/
+size_t
+my_copy_abort_mb(CHARSET_INFO *cs,
+ char *dst, size_t dst_length,
+ const char *src, size_t src_length,
+ size_t nchars, MY_STRCOPY_STATUS *status)
+{
+ int well_formed_error;
+ size_t res;
+
+ set_if_smaller(src_length, dst_length);
+ res= cs->cset->well_formed_len(cs, src, src + src_length,
+ nchars, &well_formed_error);
+ memmove(dst, src, res);
+ status->m_source_end_pos= src + res;
+ status->m_well_formed_error_pos= well_formed_error ? src + res : NULL;
+ return res;
+}
+
+
uint my_instr_mb(CHARSET_INFO *cs,
const char *b, size_t b_length,
const char *s, size_t s_length,
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index 7f13cef4474..b010c528979 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -1108,6 +1108,25 @@ size_t my_well_formed_len_8bit(CHARSET_INFO *cs __attribute__((unused)),
}
+/*
+ Copy a 8-bit string. Not more than "nchars" character are copied.
+*/
+size_t
+my_copy_8bit(CHARSET_INFO *cs __attribute__((unused)),
+ char *dst, size_t dst_length,
+ const char *src, size_t src_length,
+ size_t nchars, MY_STRCOPY_STATUS *status)
+{
+ set_if_smaller(src_length, dst_length);
+ set_if_smaller(src_length, nchars);
+ if (src_length)
+ memmove(dst, src, src_length);
+ status->m_source_end_pos= src + src_length;
+ status->m_well_formed_error_pos= NULL;
+ return src_length;
+}
+
+
size_t my_lengthsp_8bit(CHARSET_INFO *cs __attribute__((unused)),
const char *ptr, size_t length)
{
@@ -1886,7 +1905,8 @@ MY_CHARSET_HANDLER my_charset_8bit_handler=
my_strntod_8bit,
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
- my_scan_8bit
+ my_scan_8bit,
+ my_copy_8bit,
};
MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index c6e55879102..2038632c9d3 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -34172,7 +34172,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntod_8bit,
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
- my_scan_8bit
+ my_scan_8bit,
+ my_copy_abort_mb,
};
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index 61477f177c1..343fb812e20 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -885,7 +885,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntod_8bit,
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
- my_scan_8bit
+ my_scan_8bit,
+ my_copy_8bit,
};
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index a560eb08bae..8f234e9e3a8 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -92,6 +92,65 @@ my_strcasecmp_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)),
}
+/*
+ Copy an UCS2/UTF16/UTF32 string.
+ Not more that "nchars" characters are copied.
+
+ UCS2/UTF16/UTF32 may need to prepend zero some bytes,
+ e.g. when copying from a BINARY source:
+ INSERT INTO t1 (ucs2_column) VALUES (0x01);
+ 0x01 -> 0x0001
+*/
+static size_t
+my_copy_abort_mb2_or_mb4(CHARSET_INFO *cs,
+ char *dst, size_t dst_length,
+ const char *src, size_t src_length,
+ size_t nchars, MY_STRCOPY_STATUS *status)
+{
+ size_t src_offset;
+
+ if ((src_offset= (src_length % cs->mbminlen)))
+ {
+ int well_formed_error;
+ size_t pad_length;
+ if (dst_length < cs->mbminlen || !nchars)
+ {
+ status->m_source_end_pos= status->m_well_formed_error_pos= src;
+ return 0;
+ }
+
+ pad_length= cs->mbminlen - src_offset;
+ bzero(dst, pad_length);
+ memmove(dst + pad_length, src, src_offset);
+ /*
+ In some cases left zero-padding can create an incorrect character.
+ For example:
+ INSERT INTO t1 (utf32_column) VALUES (0x110000);
+ We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
+ The valid characters range is limited to 0x00000000..0x0010FFFF.
+
+ Make sure we didn't pad to an incorrect character.
+ */
+ if (cs->cset->well_formed_len(cs,
+ dst, dst + cs->mbminlen, 1,
+ &well_formed_error) != cs->mbminlen)
+ {
+ status->m_source_end_pos= status->m_well_formed_error_pos= src;
+ return 0;
+ }
+ nchars--;
+ src+= src_offset;
+ src_length-= src_offset;
+ dst+= cs->mbminlen;
+ dst_length-= cs->mbminlen;
+ return
+ cs->mbminlen /* The left-padded character */ +
+ my_copy_abort_mb(cs, dst, dst_length, src, src_length, nchars, status);
+ }
+ return my_copy_abort_mb(cs, dst, dst_length, src, src_length, nchars, status);
+}
+
+
static long
my_strntol_mb2_or_mb4(CHARSET_INFO *cs,
const char *nptr, size_t l, int base,
@@ -1682,7 +1741,8 @@ MY_CHARSET_HANDLER my_charset_utf16_handler=
my_strntod_mb2_or_mb4,
my_strtoll10_mb2,
my_strntoull10rnd_mb2_or_mb4,
- my_scan_mb2
+ my_scan_mb2,
+ my_copy_abort_mb2_or_mb4,
};
@@ -1851,7 +1911,8 @@ static MY_CHARSET_HANDLER my_charset_utf16le_handler=
my_strntod_mb2_or_mb4,
my_strtoll10_mb2,
my_strntoull10rnd_mb2_or_mb4,
- my_scan_mb2
+ my_scan_mb2,
+ my_copy_abort_mb2_or_mb4,
};
@@ -2765,7 +2826,8 @@ MY_CHARSET_HANDLER my_charset_utf32_handler=
my_strntod_mb2_or_mb4,
my_strtoll10_utf32,
my_strntoull10rnd_mb2_or_mb4,
- my_scan_utf32
+ my_scan_utf32,
+ my_copy_abort_mb2_or_mb4,
};
@@ -3383,7 +3445,8 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler=
my_strntod_mb2_or_mb4,
my_strtoll10_mb2,
my_strntoull10rnd_mb2_or_mb4,
- my_scan_mb2
+ my_scan_mb2,
+ my_copy_abort_mb2_or_mb4,
};
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index e7dbefe6c1d..f208d15f364 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -67295,7 +67295,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strntod_8bit,
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
- my_scan_8bit
+ my_scan_8bit,
+ my_copy_abort_mb,
};
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index d0a64d11c84..1116228f706 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -5614,7 +5614,8 @@ MY_CHARSET_HANDLER my_charset_utf8_handler=
my_strntod_8bit,
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
- my_scan_8bit
+ my_scan_8bit,
+ my_copy_abort_mb,
};
@@ -7167,7 +7168,8 @@ static MY_CHARSET_HANDLER my_charset_filename_handler=
my_strntod_8bit,
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
- my_scan_8bit
+ my_scan_8bit,
+ my_copy_abort_mb,
};
@@ -8110,7 +8112,8 @@ MY_CHARSET_HANDLER my_charset_utf8mb4_handler=
my_strntod_8bit,
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
- my_scan_8bit
+ my_scan_8bit,
+ my_copy_abort_mb,
};